diff --git a/ChangeLog b/ChangeLog index 313d8865f7..82a4fd0926 100644 --- a/ChangeLog +++ b/ChangeLog @@ -3490,4 +3490,7 @@ the ARMv7-M family contributed by Mike Smith. * lib/strings/lib_vikmemcpy.c: As an option, the larger but faster implemementation of memcpy from Daniel Vik is now available (this is - from http://www.danielvik.com/2010/02/fast-memcpy-in-c.html). \ No newline at end of file + from http://www.danielvik.com/2010/02/fast-memcpy-in-c.html). + * lib/strings/lib_memset.c: CONFIG_MEMSET_OPTSPEED will select a + version of memset() optimized for speed. By default, memset() is + optimized for size. diff --git a/Documentation/NuttShell.html b/Documentation/NuttShell.html index ad204f5dc9..78a5651074 100644 --- a/Documentation/NuttShell.html +++ b/Documentation/NuttShell.html @@ -8,7 +8,7 @@

NuttShell (NSH)

-

Last Updated: August 28, 2012

+

Last Updated: October 20, 2012

diff --git a/Documentation/NuttxPortingGuide.html b/Documentation/NuttxPortingGuide.html index a16032db57..e43ca8a2fa 100644 --- a/Documentation/NuttxPortingGuide.html +++ b/Documentation/NuttxPortingGuide.html @@ -12,7 +12,7 @@

NuttX RTOS Porting Guide

-

Last Updated: August 28, 2012

+

Last Updated: October 20, 2012

@@ -4449,12 +4449,12 @@ build If CONFIG_ARCH_MEMCPY is not selected, then you make also select Daniel Vik's optimized implementation of memcpy():

- +

And if CONFIG_MEMCPY_VIK, the following tuning options are available: @@ -4474,6 +4474,15 @@ build Compiles memcpy for 64 bit architectures +

  • + If CONFIG_ARCH_MEMSET is not selected, then the following option is also available: +

    + +
  • The architecture may provide custom versions of certain standard header files: diff --git a/configs/README.txt b/configs/README.txt index cc65540d7f..0bb531d67a 100644 --- a/configs/README.txt +++ b/configs/README.txt @@ -638,6 +638,12 @@ defconfig -- This is a configuration file similar to the Linux CONFIG_MEMCPY_64BIT - Compiles memcpy for 64 bit architectures + If CONFIG_ARCH_MEMSET is not selected, then the following option is + also available: + + CONFIG_MEMSET_OPTSPEED - Select this option to use a version of memcpy() + optimized for speed. Default: memcpy() is optimized for size. + The architecture may provide custom versions of certain standard header files: diff --git a/lib/Kconfig b/lib/Kconfig index b3f743db28..0f25c89238 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -153,19 +153,20 @@ config ARCH_OPTIMIZED_FUNCTIONS if ARCH_OPTIMIZED_FUNCTIONS config ARCH_MEMCPY - bool "memcpy" + bool "memcpy()" default n ---help--- Select this option if the architecture provides an optimized version of memcpy(). config MEMCPY_VIK - bool "Vik memcpy" + bool "Vik memcpy()" default n depends on !ARCH_MEMCPY ---help--- - Select this option to use the optimized memcpy() function by Daniel Vik. - See licensing information in the top-level COPYING file. + Select this option to use the optimized memcpy() function by Daniel Vik. + Select this option to option for speed at the expense of increased size. + See licensing information in the top-level COPYING file. if MEMCPY_VIK config MEMCPY_PRE_INC_PTRS @@ -182,50 +183,58 @@ config MEMCPY_INDEXED_COPY MEMCPY_PRE_INC_PTRS option. config MEMCPY_64BIT - bool "64-bit memcpy" + bool "64-bit memcpy()" default n ---help--- - Compiles memcpy for 64 bit architectures + Compiles memcpy() for 64 bit architectures endif config ARCH_MEMCMP - bool "memcmp" + bool "memcmp()" default n ---help--- Select this option if the architecture provides an optimized version of memcmp(). config ARCH_MEMMOVE - bool "memmove" + bool "memmove()" default n ---help--- Select this option if the architecture provides an optimized version of memmove(). config ARCH_MEMSET - bool "memset" + bool "memset()" default n ---help--- Select this option if the architecture provides an optimized version of memset(). +config MEMSET_OPTSPEED + bool "Optimize memset() for speed" + default n + depends on !ARCH_MEMSET + ---help--- + Select this option to use a version of memcpy() optimized for speed. + Default: memcpy() is optimized for size. + config ARCH_STRCMP - bool "strcmp" + bool "strcmp()" default n ---help--- Select this option if the architecture provides an optimized version of strcmp(). config ARCH_STRCPY - bool "strcpy" + bool "strcpy()" default n ---help--- Select this option if the architecture provides an optimized version of strcpy(). config ARCH_STRNCPY - bool "strncpy" + bool "strncpy()" default n ---help--- Select this option if the architecture provides an optimized version @@ -239,14 +248,14 @@ config ARCH_STRLEN of strlen(). config ARCH_STRNLEN - bool "strlen" + bool "strlen()" default n ---help--- Select this option if the architecture provides an optimized version of strnlen(). config ARCH_BZERO - bool "bzero" + bool "bzero()" default n ---help--- Select this option if the architecture provides an optimized version diff --git a/lib/string/lib_memset.c b/lib/string/lib_memset.c index 916351b974..c910d2ce04 100644 --- a/lib/string/lib_memset.c +++ b/lib/string/lib_memset.c @@ -42,8 +42,12 @@ ************************************************************/ #include + #include + +#include #include +#include /************************************************************ * Global Functions @@ -52,8 +56,88 @@ #ifndef CONFIG_ARCH_MEMSET void *memset(void *s, int c, size_t n) { +#ifdef CONFIG_MEMSET_OPTSPEED + /* This version is optimized for speed (you could do better + * still by exploiting processor caching or memory burst + * knowledge. 64-bit support might improve performance as + * well. + */ + + uintptr_t addr = (uintptr_t)s; + uint16_t val16 = ((uint16_t)c << 8) | (uint16_t)c; + uint32_t val32 = ((uint32_t)val16 << 16) | (uint32_t)val16; + + /* Make sure that there is something to be cleared */ + + if (n > 0) + { + /* Align to a 16-bit boundary */ + + if ((addr & 1) != 0) + { + *(uint8_t*)addr = (uint8_t)c; + addr += 1; + n -= 1; + } + + /* Check if there are at least 16-bits left to be zeroed */ + + if (n >= 2) + { + /* Align to a 32-bit boundary (we know that the destination + * address is already aligned to at least a 16-bit boundary). + */ + + if ((addr & 3) != 0) + { + *(uint16_t*)addr = val16; + addr += 2; + n -= 2; + } + + /* Loop while there are at least 32-bits left to be zeroed */ + + while (n >= 4) + { + *(uint32_t*)addr = val32; + addr += 4; + n -= 4; + } + } + + /* We may get here under the following conditions: + * + * n = 0, addr may or may not be aligned + * n = 1, addr may or may not be aligned + * n = 2, addr is aligned to a 32-bit boundary + * n = 3, addr is aligned to a 32-bit boundary + */ + + switch (n) + { + default: + case 0: + DEBUGASSERT(n == 0); + break; + + case 2: + *(uint16_t*)addr = val16; + break; + + case 3: + *(uint16_t*)addr = val16; + addr += 2; + case 1: + *(uint8_t*)addr = (uint8_t)c; + break; + } + } +#else + /* This version is optimized for size */ + unsigned char *p = (unsigned char*)s; while (n-- > 0) *p++ = c; +#endif return s; } #endif