arch_memcpy: Optimize arch memcpy for armv7-m and armv8-m
Use ldm and stm instruction to optimize performance when both src and dst are 32-bit aligned. Signed-off-by: zhangyuan21 <zhangyuan21@xiaomi.com>
This commit is contained in:
parent
d563717827
commit
3625385541
@ -66,12 +66,18 @@
|
|||||||
#if __OPT_BIG_BLOCK_SIZE == 16
|
#if __OPT_BIG_BLOCK_SIZE == 16
|
||||||
#define BEGIN_UNROLL_BIG_BLOCK \
|
#define BEGIN_UNROLL_BIG_BLOCK \
|
||||||
.irp offset, 0,4,8,12
|
.irp offset, 0,4,8,12
|
||||||
|
#define BEGIN_UNROLL_BIG_BLOCK_X4 \
|
||||||
|
.irp offset, 0
|
||||||
#elif __OPT_BIG_BLOCK_SIZE == 32
|
#elif __OPT_BIG_BLOCK_SIZE == 32
|
||||||
#define BEGIN_UNROLL_BIG_BLOCK \
|
#define BEGIN_UNROLL_BIG_BLOCK \
|
||||||
.irp offset, 0,4,8,12,16,20,24,28
|
.irp offset, 0,4,8,12,16,20,24,28
|
||||||
|
#define BEGIN_UNROLL_BIG_BLOCK_X4 \
|
||||||
|
.irp offset, 0,16
|
||||||
#elif __OPT_BIG_BLOCK_SIZE == 64
|
#elif __OPT_BIG_BLOCK_SIZE == 64
|
||||||
#define BEGIN_UNROLL_BIG_BLOCK \
|
#define BEGIN_UNROLL_BIG_BLOCK \
|
||||||
.irp offset, 0,4,8,12,16,20,24,28,32,36,40,44,48,52,56,60
|
.irp offset, 0,4,8,12,16,20,24,28,32,36,40,44,48,52,56,60
|
||||||
|
#define BEGIN_UNROLL_BIG_BLOCK_X4 \
|
||||||
|
.irp offset, 0,16,32,48
|
||||||
#else
|
#else
|
||||||
#error "Illegal __OPT_BIG_BLOCK_SIZE"
|
#error "Illegal __OPT_BIG_BLOCK_SIZE"
|
||||||
#endif
|
#endif
|
||||||
@ -113,6 +119,21 @@ memcpy:
|
|||||||
ands r3, r3, #3
|
ands r3, r3, #3
|
||||||
bne .Lmisaligned_copy
|
bne .Lmisaligned_copy
|
||||||
|
|
||||||
|
.Lbig_aligned:
|
||||||
|
subs r2, __OPT_BIG_BLOCK_SIZE
|
||||||
|
blo .Lmid_block
|
||||||
|
|
||||||
|
stmfd sp!, {r4-r7}
|
||||||
|
.Lbig_aligned_loop:
|
||||||
|
BEGIN_UNROLL_BIG_BLOCK_X4
|
||||||
|
ldmia r1!, {r4, r5, r6, r7}
|
||||||
|
stmia r0!, {r4, r5, r6, r7}
|
||||||
|
END_UNROLL
|
||||||
|
subs r2, __OPT_BIG_BLOCK_SIZE
|
||||||
|
bhs .Lbig_aligned_loop
|
||||||
|
ldmfd sp!, {r4-r7}
|
||||||
|
b .Lmid_block
|
||||||
|
|
||||||
.Lbig_block:
|
.Lbig_block:
|
||||||
subs r2, __OPT_BIG_BLOCK_SIZE
|
subs r2, __OPT_BIG_BLOCK_SIZE
|
||||||
blo .Lmid_block
|
blo .Lmid_block
|
||||||
|
@ -69,12 +69,18 @@
|
|||||||
#if __OPT_BIG_BLOCK_SIZE == 16
|
#if __OPT_BIG_BLOCK_SIZE == 16
|
||||||
#define BEGIN_UNROLL_BIG_BLOCK \
|
#define BEGIN_UNROLL_BIG_BLOCK \
|
||||||
.irp offset, 0,4,8,12
|
.irp offset, 0,4,8,12
|
||||||
|
#define BEGIN_UNROLL_BIG_BLOCK_X4 \
|
||||||
|
.irp offset, 0
|
||||||
#elif __OPT_BIG_BLOCK_SIZE == 32
|
#elif __OPT_BIG_BLOCK_SIZE == 32
|
||||||
#define BEGIN_UNROLL_BIG_BLOCK \
|
#define BEGIN_UNROLL_BIG_BLOCK \
|
||||||
.irp offset, 0,4,8,12,16,20,24,28
|
.irp offset, 0,4,8,12,16,20,24,28
|
||||||
|
#define BEGIN_UNROLL_BIG_BLOCK_X4 \
|
||||||
|
.irp offset, 0,16
|
||||||
#elif __OPT_BIG_BLOCK_SIZE == 64
|
#elif __OPT_BIG_BLOCK_SIZE == 64
|
||||||
#define BEGIN_UNROLL_BIG_BLOCK \
|
#define BEGIN_UNROLL_BIG_BLOCK \
|
||||||
.irp offset, 0,4,8,12,16,20,24,28,32,36,40,44,48,52,56,60
|
.irp offset, 0,4,8,12,16,20,24,28,32,36,40,44,48,52,56,60
|
||||||
|
#define BEGIN_UNROLL_BIG_BLOCK_X4 \
|
||||||
|
.irp offset, 0,16,32,48
|
||||||
#else
|
#else
|
||||||
#error "Illegal __OPT_BIG_BLOCK_SIZE"
|
#error "Illegal __OPT_BIG_BLOCK_SIZE"
|
||||||
#endif
|
#endif
|
||||||
@ -134,6 +140,21 @@ memcpy:
|
|||||||
ands r3, r3, #3
|
ands r3, r3, #3
|
||||||
bne .Lmisaligned_copy
|
bne .Lmisaligned_copy
|
||||||
|
|
||||||
|
.Lbig_aligned:
|
||||||
|
subs r2, __OPT_BIG_BLOCK_SIZE
|
||||||
|
blo .Lmid_block
|
||||||
|
|
||||||
|
stmfd sp!, {r4-r7}
|
||||||
|
.Lbig_aligned_loop:
|
||||||
|
BEGIN_UNROLL_BIG_BLOCK_X4
|
||||||
|
ldmia r1!, {r4, r5, r6, r7}
|
||||||
|
stmia r0!, {r4, r5, r6, r7}
|
||||||
|
END_UNROLL
|
||||||
|
subs r2, __OPT_BIG_BLOCK_SIZE
|
||||||
|
bhs .Lbig_aligned_loop
|
||||||
|
ldmfd sp!, {r4-r7}
|
||||||
|
b .Lmid_block
|
||||||
|
|
||||||
.Lbig_block:
|
.Lbig_block:
|
||||||
subs r2, __OPT_BIG_BLOCK_SIZE
|
subs r2, __OPT_BIG_BLOCK_SIZE
|
||||||
blo .Lmid_block
|
blo .Lmid_block
|
||||||
|
Loading…
Reference in New Issue
Block a user