315 lines
8.5 KiB
ArmAsm
315 lines
8.5 KiB
ArmAsm
/***************************************************************************
|
|
* libs/libc/machine/arm/armv7-m/gnu/arch_strcpy.S
|
|
*
|
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
* contributor license agreements. See the NOTICE file distributed with
|
|
* this work for additional information regarding copyright ownership. The
|
|
* ASF licenses this file to you under the Apache License, Version 2.0 (the
|
|
* "License"); you may not use this file except in compliance with the
|
|
* License. You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
* License for the specific language governing permissions and limitations
|
|
* under the License.
|
|
*
|
|
***************************************************************************/
|
|
|
|
#include "libc.h"
|
|
|
|
#ifdef LIBC_BUILD_STRCPY
|
|
|
|
/* This strcpy borrowed some ideas from arch_strcmp.S(). */
|
|
|
|
/* Parameters and result. */
|
|
#define dst r0
|
|
#define src r1
|
|
#define result r0
|
|
|
|
/* Internal variables, or callee saved registers */
|
|
#define tmp1 r4
|
|
#define tmp2 r5
|
|
#define tmp3 r6
|
|
#define src_offset r7
|
|
|
|
#ifdef __ARM_BIG_ENDIAN
|
|
# define MASK_0 0xff000000
|
|
# define MASK_1 0xff0000
|
|
# define MASK_2 0xff00
|
|
# define MASK_3 0xff
|
|
# define BYTE_0_SHIFT 24
|
|
# define BYTE_1_SHIFT 16
|
|
# define BYTE_2_SHIFT 8
|
|
# define BYTE_3_SHIFT 0
|
|
#else
|
|
# define MASK_0 0xff
|
|
# define MASK_1 0xff00
|
|
# define MASK_2 0xff0000
|
|
# define MASK_3 0xff000000
|
|
# define BYTE_0_SHIFT 0
|
|
# define BYTE_1_SHIFT 8
|
|
# define BYTE_2_SHIFT 16
|
|
# define BYTE_3_SHIFT 24
|
|
#endif
|
|
|
|
|
|
.syntax unified
|
|
.text
|
|
.section .text.strcpy
|
|
.align 2
|
|
.global strcpy
|
|
.thumb
|
|
.type strcpy, %function
|
|
|
|
strcpy:
|
|
push {result, tmp1, tmp2, tmp3, src_offset}
|
|
eor tmp1, dst, src
|
|
tst tmp1, #3
|
|
/* If dst and src not at same byte offset from a word boundary */
|
|
bne .Lstrs_diff_offset
|
|
/* Process same byte offset then, get the offset */
|
|
ands tmp1, src, #3
|
|
beq .Ldst_src_aligned
|
|
/* get number of bytes unaligned */
|
|
rsb tmp1, #4
|
|
|
|
.Lbyte_copy_until_dsr_src_aligned:
|
|
ldrb tmp2, [src], #1
|
|
cmp tmp2, #0
|
|
beq .Lcopy_done
|
|
strb tmp2, [dst], #1
|
|
subs tmp1, #1
|
|
bne .Lbyte_copy_until_dsr_src_aligned
|
|
|
|
.Ldst_src_aligned:
|
|
/* Now dst and src are aligned */
|
|
ldr tmp1, [src], #4
|
|
sub tmp2, tmp1, #0x01010101
|
|
bic tmp2, tmp1
|
|
tst tmp2, #0x80808080
|
|
/* All zero means no zero byte is detected */
|
|
it eq
|
|
streq tmp1, [dst], #4
|
|
beq .Ldst_src_aligned
|
|
|
|
/* There is a zero in the word, copy until zero */
|
|
sub src, #4
|
|
.Lbyte_copy_until_zero:
|
|
ldrb tmp2, [src], #1
|
|
cmp tmp2, #0
|
|
beq .Lcopy_done
|
|
strb tmp2, [dst], #1
|
|
b .Lbyte_copy_until_zero
|
|
|
|
/* Make dst aligned, so we won't write anything before dst.
|
|
* If we attempt to write before dst, atomic read-write must
|
|
* be ensured. Atomic operation complicates things.
|
|
* So the solution here is byte by byte copy until dst aligned.
|
|
*/
|
|
.Lstrs_diff_offset:
|
|
ands tmp1, dst, #3
|
|
beq .Ldiff_offset_loop_begin
|
|
/* get number of dst bytes unaligned */
|
|
rsb tmp1, #4
|
|
|
|
.Lbyte_copy_until_dst_aligned:
|
|
ldrb tmp2, [src], #1
|
|
cmp tmp2, #0
|
|
beq .Lcopy_done
|
|
strb tmp2, [dst], #1
|
|
subs tmp1, #1
|
|
bne .Lbyte_copy_until_dst_aligned
|
|
|
|
.Ldiff_offset_loop_begin:
|
|
/* src_offset mustn't be 0 here */
|
|
and src_offset, src, 3
|
|
lsls src_offset, #3
|
|
bic src, #3
|
|
/* first word logic
|
|
* prepend 0xff to make the algorithm simpler
|
|
* only the first word needs to be prepended
|
|
*/
|
|
ldr tmp1, [src], #4
|
|
mov tmp2, #0xffffffff
|
|
rsb tmp3, src_offset, #32
|
|
|
|
#ifdef __ARM_BIG_ENDIAN
|
|
lsls tmp2, tmp3
|
|
#else
|
|
lsrs tmp2, tmp3
|
|
#endif
|
|
orr tmp1, tmp1, tmp2
|
|
/* Test if the first word contains zero */
|
|
sub tmp3, tmp1, #0x01010101
|
|
bic tmp3, tmp1
|
|
tst tmp3, #0x80808080
|
|
/* non-zero means zero byte is detected */
|
|
bne .Ltail_copy
|
|
|
|
/* before loop, set tmp2=tmp1 to simplify the logic in the loop */
|
|
mov tmp2, tmp1
|
|
.Ldiff_offset_loop:
|
|
mov tmp1, tmp2
|
|
ldr tmp2, [src], #4
|
|
/* Test if contains zero */
|
|
sub tmp3, tmp2, #0x01010101
|
|
bic tmp3, tmp2
|
|
tst tmp3, #0x80808080
|
|
/* non-zero means zero byte is detected */
|
|
bne .Ltail_copy
|
|
/* Now let's fill dst */
|
|
#ifdef __ARM_BIG_ENDIAN
|
|
lsls tmp1, src_offset
|
|
rsb tmp3, src_offset, #32
|
|
lsrs tmp3, tmp2, tmp3
|
|
orr tmp1, tmp1, tmp3
|
|
#else
|
|
lsrs tmp1, src_offset
|
|
rsb tmp3, src_offset, #32
|
|
lsls tmp3, tmp2, tmp3
|
|
orr tmp1, tmp1, tmp3
|
|
#endif
|
|
str tmp1, [dst], #4
|
|
b .Ldiff_offset_loop
|
|
|
|
.Ltail_copy:
|
|
cmp src_offset, #24
|
|
beq .Loffset_3
|
|
cmp src_offset, #16
|
|
beq .Loffset_2
|
|
/* src_offset == 8 here */
|
|
ands tmp3, tmp1, MASK_1
|
|
beq .Lcopy_done
|
|
lsrs tmp3, BYTE_1_SHIFT
|
|
strb tmp3, [dst], #1
|
|
.Loffset_2:
|
|
ands tmp3, tmp1, MASK_2
|
|
beq .Lcopy_done
|
|
lsrs tmp3, BYTE_2_SHIFT
|
|
strb tmp3, [dst], #1
|
|
.Loffset_3:
|
|
ands tmp3, tmp1, MASK_3
|
|
beq .Lcopy_done
|
|
lsrs tmp3, BYTE_3_SHIFT
|
|
strb tmp3, [dst], #1
|
|
ands tmp3, tmp2, MASK_0
|
|
beq .Lcopy_done
|
|
lsrs tmp3, BYTE_0_SHIFT
|
|
strb tmp3, [dst], #1
|
|
ands tmp3, tmp2, MASK_1
|
|
beq .Lcopy_done
|
|
lsrs tmp3, BYTE_1_SHIFT
|
|
strb tmp3, [dst], #1
|
|
ands tmp3, tmp2, MASK_2
|
|
beq .Lcopy_done
|
|
lsrs tmp3, BYTE_2_SHIFT
|
|
strb tmp3, [dst], #1
|
|
.Lcopy_done:
|
|
mov tmp3, #0
|
|
strb tmp3, [dst]
|
|
pop {result, tmp1, tmp2, tmp3, src_offset}
|
|
bx lr
|
|
|
|
#if 0
|
|
/* Pseudo Code of strcpy when dst/src not at same byte offset */
|
|
|
|
/* Make dst aligned, so we won't write anything before dst.
|
|
* If we attempt to write before dst, atomic read-write must
|
|
* be ensured. Atomic operation complicates things.
|
|
* So the solution here is byte by byte copy until dst aligned.
|
|
*/
|
|
if (dst & 3 == 0)
|
|
goto diff_offset_loop_begin;
|
|
ByteCopyUntilDstAligned();
|
|
|
|
.diff_offset_loop_begin:
|
|
/* src_offset mustn't be 0 here */
|
|
src_offset = src & 3;
|
|
src_offset = src_offset * 8;
|
|
src = src & 0xfffffffc;
|
|
tmp1 = *src;
|
|
src +=4;
|
|
/* first word logic
|
|
* prepend 0xff to make the algorithm simpler
|
|
* only the first word needs to be prepended
|
|
*/
|
|
if (src_offset != 0)
|
|
{
|
|
tmp2 = 0xffffffff
|
|
#if big endian
|
|
tmp2 = tmp2 << (32 - src_offset)
|
|
#else
|
|
tmp2 = tmp2 >> (32 - src_offset)
|
|
#endif
|
|
tmp1 |= tmp2
|
|
}
|
|
if (HasZeroByte(tmp1))
|
|
{
|
|
goto .tail_copy;
|
|
}
|
|
|
|
/* before loop, set tmp2=tmp1 to simplify the logic in the loop */
|
|
tmp2 = tmp1
|
|
.diff_offset_loop:
|
|
tmp1 = tmp2;
|
|
tmp2 = *src;
|
|
src += 4;
|
|
|
|
/* double word tail means we have to copy from tmp1 and tmp2 to dst */
|
|
if (HasZeroByte(tmp2))
|
|
{
|
|
goto .tail_copy;
|
|
}
|
|
/* Now let's fill dst */
|
|
#if big endian
|
|
tmp1 = tmp1 << (src_offset);
|
|
tmp1 |= tmp2 >> (32 - src_offset);
|
|
*dst = tmp1;
|
|
#else
|
|
tmp1 = tmp1 >> (src_offset);
|
|
tmp1 |= tmp2 << (32 - src_offset);
|
|
*dst = tmp1;
|
|
#endif
|
|
dst +=4;
|
|
goto .diff_offset_loop;
|
|
|
|
/* byte by byte copy at the tail */
|
|
.tail_copy:
|
|
if (src_offset == 3)
|
|
goto offset_3;
|
|
if (src_offset == 2)
|
|
goto offset_2;
|
|
|
|
/* src_offset mustn't be 0 here */
|
|
/* default src_offset == 1 */
|
|
if (tmp1 & MASK_1 == 0)
|
|
goto cpy_done;
|
|
*dst++ = tmp1 & MASK_1;
|
|
offset_2:
|
|
if (tmp1 & MASK_2 == 0)
|
|
goto cpy_done;
|
|
*dst++ = tmp1 & MASK_2;
|
|
offset_3:
|
|
if (tmp1 & MASK_3 == 0)
|
|
goto cpy_done;
|
|
*dst++ = tmp1 & MASK_3;
|
|
if (tmp2 & MASK_0 == 0)
|
|
goto cpy_done;
|
|
*dst++ = tmp2 & MASK_0;
|
|
if (tmp2 & MASK_1 == 0)
|
|
goto cpy_done;
|
|
*dst++ = tmp2 & MASK_1;
|
|
if (tmp2 & MASK_2 == 0)
|
|
goto cpy_done;
|
|
*dst++ = tmp2 & MASK_2;
|
|
/* tmp2 BYTE3 must be zero here */
|
|
|
|
.cpy_done:
|
|
*dst++ = 0;
|
|
#endif /* Pseudo code end */
|
|
|
|
#endif
|