arch_memcpy: Optimize arch memcpy for armv7-m and armv8-m

Use ldm and stm instruction to optimize performance when
both src and dst are 32-bit aligned.

Signed-off-by: zhangyuan21 <zhangyuan21@xiaomi.com>
This commit is contained in:
zhangyuan21 2023-06-19 20:48:24 +08:00 committed by Petro Karashchenko
parent d563717827
commit 3625385541
2 changed files with 42 additions and 0 deletions

View File

@ -66,12 +66,18 @@
#if __OPT_BIG_BLOCK_SIZE == 16
#define BEGIN_UNROLL_BIG_BLOCK \
.irp offset, 0,4,8,12
#define BEGIN_UNROLL_BIG_BLOCK_X4 \
.irp offset, 0
#elif __OPT_BIG_BLOCK_SIZE == 32
#define BEGIN_UNROLL_BIG_BLOCK \
.irp offset, 0,4,8,12,16,20,24,28
#define BEGIN_UNROLL_BIG_BLOCK_X4 \
.irp offset, 0,16
#elif __OPT_BIG_BLOCK_SIZE == 64
#define BEGIN_UNROLL_BIG_BLOCK \
.irp offset, 0,4,8,12,16,20,24,28,32,36,40,44,48,52,56,60
#define BEGIN_UNROLL_BIG_BLOCK_X4 \
.irp offset, 0,16,32,48
#else
#error "Illegal __OPT_BIG_BLOCK_SIZE"
#endif
@ -113,6 +119,21 @@ memcpy:
ands r3, r3, #3
bne .Lmisaligned_copy
.Lbig_aligned:
subs r2, __OPT_BIG_BLOCK_SIZE
blo .Lmid_block
stmfd sp!, {r4-r7}
.Lbig_aligned_loop:
BEGIN_UNROLL_BIG_BLOCK_X4
ldmia r1!, {r4, r5, r6, r7}
stmia r0!, {r4, r5, r6, r7}
END_UNROLL
subs r2, __OPT_BIG_BLOCK_SIZE
bhs .Lbig_aligned_loop
ldmfd sp!, {r4-r7}
b .Lmid_block
.Lbig_block:
subs r2, __OPT_BIG_BLOCK_SIZE
blo .Lmid_block

View File

@ -69,12 +69,18 @@
#if __OPT_BIG_BLOCK_SIZE == 16
#define BEGIN_UNROLL_BIG_BLOCK \
.irp offset, 0,4,8,12
#define BEGIN_UNROLL_BIG_BLOCK_X4 \
.irp offset, 0
#elif __OPT_BIG_BLOCK_SIZE == 32
#define BEGIN_UNROLL_BIG_BLOCK \
.irp offset, 0,4,8,12,16,20,24,28
#define BEGIN_UNROLL_BIG_BLOCK_X4 \
.irp offset, 0,16
#elif __OPT_BIG_BLOCK_SIZE == 64
#define BEGIN_UNROLL_BIG_BLOCK \
.irp offset, 0,4,8,12,16,20,24,28,32,36,40,44,48,52,56,60
#define BEGIN_UNROLL_BIG_BLOCK_X4 \
.irp offset, 0,16,32,48
#else
#error "Illegal __OPT_BIG_BLOCK_SIZE"
#endif
@ -134,6 +140,21 @@ memcpy:
ands r3, r3, #3
bne .Lmisaligned_copy
.Lbig_aligned:
subs r2, __OPT_BIG_BLOCK_SIZE
blo .Lmid_block
stmfd sp!, {r4-r7}
.Lbig_aligned_loop:
BEGIN_UNROLL_BIG_BLOCK_X4
ldmia r1!, {r4, r5, r6, r7}
stmia r0!, {r4, r5, r6, r7}
END_UNROLL
subs r2, __OPT_BIG_BLOCK_SIZE
bhs .Lbig_aligned_loop
ldmfd sp!, {r4-r7}
b .Lmid_block
.Lbig_block:
subs r2, __OPT_BIG_BLOCK_SIZE
blo .Lmid_block