arch_memcpy: Optimize arch memcpy for armv7-m and armv8-m
Use ldm and stm instruction to optimize performance when both src and dst are 32-bit aligned. Signed-off-by: zhangyuan21 <zhangyuan21@xiaomi.com>
This commit is contained in:
parent
d563717827
commit
3625385541
@ -66,12 +66,18 @@
|
||||
#if __OPT_BIG_BLOCK_SIZE == 16
|
||||
#define BEGIN_UNROLL_BIG_BLOCK \
|
||||
.irp offset, 0,4,8,12
|
||||
#define BEGIN_UNROLL_BIG_BLOCK_X4 \
|
||||
.irp offset, 0
|
||||
#elif __OPT_BIG_BLOCK_SIZE == 32
|
||||
#define BEGIN_UNROLL_BIG_BLOCK \
|
||||
.irp offset, 0,4,8,12,16,20,24,28
|
||||
#define BEGIN_UNROLL_BIG_BLOCK_X4 \
|
||||
.irp offset, 0,16
|
||||
#elif __OPT_BIG_BLOCK_SIZE == 64
|
||||
#define BEGIN_UNROLL_BIG_BLOCK \
|
||||
.irp offset, 0,4,8,12,16,20,24,28,32,36,40,44,48,52,56,60
|
||||
#define BEGIN_UNROLL_BIG_BLOCK_X4 \
|
||||
.irp offset, 0,16,32,48
|
||||
#else
|
||||
#error "Illegal __OPT_BIG_BLOCK_SIZE"
|
||||
#endif
|
||||
@ -113,6 +119,21 @@ memcpy:
|
||||
ands r3, r3, #3
|
||||
bne .Lmisaligned_copy
|
||||
|
||||
.Lbig_aligned:
|
||||
subs r2, __OPT_BIG_BLOCK_SIZE
|
||||
blo .Lmid_block
|
||||
|
||||
stmfd sp!, {r4-r7}
|
||||
.Lbig_aligned_loop:
|
||||
BEGIN_UNROLL_BIG_BLOCK_X4
|
||||
ldmia r1!, {r4, r5, r6, r7}
|
||||
stmia r0!, {r4, r5, r6, r7}
|
||||
END_UNROLL
|
||||
subs r2, __OPT_BIG_BLOCK_SIZE
|
||||
bhs .Lbig_aligned_loop
|
||||
ldmfd sp!, {r4-r7}
|
||||
b .Lmid_block
|
||||
|
||||
.Lbig_block:
|
||||
subs r2, __OPT_BIG_BLOCK_SIZE
|
||||
blo .Lmid_block
|
||||
|
@ -69,12 +69,18 @@
|
||||
#if __OPT_BIG_BLOCK_SIZE == 16
|
||||
#define BEGIN_UNROLL_BIG_BLOCK \
|
||||
.irp offset, 0,4,8,12
|
||||
#define BEGIN_UNROLL_BIG_BLOCK_X4 \
|
||||
.irp offset, 0
|
||||
#elif __OPT_BIG_BLOCK_SIZE == 32
|
||||
#define BEGIN_UNROLL_BIG_BLOCK \
|
||||
.irp offset, 0,4,8,12,16,20,24,28
|
||||
#define BEGIN_UNROLL_BIG_BLOCK_X4 \
|
||||
.irp offset, 0,16
|
||||
#elif __OPT_BIG_BLOCK_SIZE == 64
|
||||
#define BEGIN_UNROLL_BIG_BLOCK \
|
||||
.irp offset, 0,4,8,12,16,20,24,28,32,36,40,44,48,52,56,60
|
||||
#define BEGIN_UNROLL_BIG_BLOCK_X4 \
|
||||
.irp offset, 0,16,32,48
|
||||
#else
|
||||
#error "Illegal __OPT_BIG_BLOCK_SIZE"
|
||||
#endif
|
||||
@ -134,6 +140,21 @@ memcpy:
|
||||
ands r3, r3, #3
|
||||
bne .Lmisaligned_copy
|
||||
|
||||
.Lbig_aligned:
|
||||
subs r2, __OPT_BIG_BLOCK_SIZE
|
||||
blo .Lmid_block
|
||||
|
||||
stmfd sp!, {r4-r7}
|
||||
.Lbig_aligned_loop:
|
||||
BEGIN_UNROLL_BIG_BLOCK_X4
|
||||
ldmia r1!, {r4, r5, r6, r7}
|
||||
stmia r0!, {r4, r5, r6, r7}
|
||||
END_UNROLL
|
||||
subs r2, __OPT_BIG_BLOCK_SIZE
|
||||
bhs .Lbig_aligned_loop
|
||||
ldmfd sp!, {r4-r7}
|
||||
b .Lmid_block
|
||||
|
||||
.Lbig_block:
|
||||
subs r2, __OPT_BIG_BLOCK_SIZE
|
||||
blo .Lmid_block
|
||||
|
Loading…
Reference in New Issue
Block a user