Add a versin of memset() optimized for speed

git-svn-id: svn://svn.code.sf.net/p/nuttx/code/trunk@5242 42af7a65-404d-4744-a932-0658087f49c3
This commit is contained in:
patacongo 2012-10-21 00:41:44 +00:00
parent dca2ef8722
commit 073a96bb2c
6 changed files with 134 additions and 23 deletions

View File

@ -3490,4 +3490,7 @@
the ARMv7-M family contributed by Mike Smith. the ARMv7-M family contributed by Mike Smith.
* lib/strings/lib_vikmemcpy.c: As an option, the larger but faster * lib/strings/lib_vikmemcpy.c: As an option, the larger but faster
implemementation of memcpy from Daniel Vik is now available (this is implemementation of memcpy from Daniel Vik is now available (this is
from http://www.danielvik.com/2010/02/fast-memcpy-in-c.html). from http://www.danielvik.com/2010/02/fast-memcpy-in-c.html).
* lib/strings/lib_memset.c: CONFIG_MEMSET_OPTSPEED will select a
version of memset() optimized for speed. By default, memset() is
optimized for size.

View File

@ -8,7 +8,7 @@
<tr align="center" bgcolor="#e4e4e4"> <tr align="center" bgcolor="#e4e4e4">
<td> <td>
<h1><big><font color="#3c34ec"><i>NuttShell (NSH)</i></font></big></h1> <h1><big><font color="#3c34ec"><i>NuttShell (NSH)</i></font></big></h1>
<p>Last Updated: August 28, 2012</p> <p>Last Updated: October 20, 2012</p>
</td> </td>
</tr> </tr>
</table> </table>

View File

@ -12,7 +12,7 @@
<h1><big><font color="#3c34ec"> <h1><big><font color="#3c34ec">
<i>NuttX RTOS Porting Guide</i> <i>NuttX RTOS Porting Guide</i>
</font></big></h1> </font></big></h1>
<p>Last Updated: August 28, 2012</p> <p>Last Updated: October 20, 2012</p>
</td> </td>
</tr> </tr>
</table> </table>
@ -4449,12 +4449,12 @@ build
If <code>CONFIG_ARCH_MEMCPY</code> is <b>not</b> selected, then you make also select Daniel If <code>CONFIG_ARCH_MEMCPY</code> is <b>not</b> selected, then you make also select Daniel
Vik's optimized implementation of <code>memcpy()</code>: Vik's optimized implementation of <code>memcpy()</code>:
</p> </p>
<ul><li> <ul><li>
<code>CONFIG_MEMCPY_VIK</code>: <code>CONFIG_MEMCPY_VIK</code>:
Select this option to use the optimized <code>memcpy()</code> function by Daniel Vik. Select this option to use the optimized <code>memcpy()</code> function by Daniel Vik.
See licensing information in the top-level <code>COPYING</code> file. See licensing information in the top-level <code>COPYING</code> file.
Default: <code>n</code>. Default: <code>n</code>.
</li></ul> </li></ul>
<p> <p>
And if <code>CONFIG_MEMCPY_VIK</code>, the following tuning options are available: And if <code>CONFIG_MEMCPY_VIK</code>, the following tuning options are available:
@ -4474,6 +4474,15 @@ build
Compiles memcpy for 64 bit architectures Compiles memcpy for 64 bit architectures
</li></ul> </li></ul>
<p><li>
If <code>CONFIG_ARCH_MEMSET</code> is <b>not</b> selected, then the following option is also available:
</p>
<ul><li>
<code>CONFIG_MEMSET_OPTSPEED</code>:
Select this option to use a version of <code>memset()</code> optimized for speed.
Default: <code>memset()</code> is optimized for size.
</li></ul>
<li> <li>
<p> <p>
The architecture may provide custom versions of certain standard header files: The architecture may provide custom versions of certain standard header files:

View File

@ -638,6 +638,12 @@ defconfig -- This is a configuration file similar to the Linux
CONFIG_MEMCPY_64BIT - Compiles memcpy for 64 bit architectures CONFIG_MEMCPY_64BIT - Compiles memcpy for 64 bit architectures
If CONFIG_ARCH_MEMSET is not selected, then the following option is
also available:
CONFIG_MEMSET_OPTSPEED - Select this option to use a version of memcpy()
optimized for speed. Default: memcpy() is optimized for size.
The architecture may provide custom versions of certain standard header The architecture may provide custom versions of certain standard header
files: files:

View File

@ -153,19 +153,20 @@ config ARCH_OPTIMIZED_FUNCTIONS
if ARCH_OPTIMIZED_FUNCTIONS if ARCH_OPTIMIZED_FUNCTIONS
config ARCH_MEMCPY config ARCH_MEMCPY
bool "memcpy" bool "memcpy()"
default n default n
---help--- ---help---
Select this option if the architecture provides an optimized version Select this option if the architecture provides an optimized version
of memcpy(). of memcpy().
config MEMCPY_VIK config MEMCPY_VIK
bool "Vik memcpy" bool "Vik memcpy()"
default n default n
depends on !ARCH_MEMCPY depends on !ARCH_MEMCPY
---help--- ---help---
Select this option to use the optimized memcpy() function by Daniel Vik. Select this option to use the optimized memcpy() function by Daniel Vik.
See licensing information in the top-level COPYING file. Select this option to option for speed at the expense of increased size.
See licensing information in the top-level COPYING file.
if MEMCPY_VIK if MEMCPY_VIK
config MEMCPY_PRE_INC_PTRS config MEMCPY_PRE_INC_PTRS
@ -182,50 +183,58 @@ config MEMCPY_INDEXED_COPY
MEMCPY_PRE_INC_PTRS option. MEMCPY_PRE_INC_PTRS option.
config MEMCPY_64BIT config MEMCPY_64BIT
bool "64-bit memcpy" bool "64-bit memcpy()"
default n default n
---help--- ---help---
Compiles memcpy for 64 bit architectures Compiles memcpy() for 64 bit architectures
endif endif
config ARCH_MEMCMP config ARCH_MEMCMP
bool "memcmp" bool "memcmp()"
default n default n
---help--- ---help---
Select this option if the architecture provides an optimized version Select this option if the architecture provides an optimized version
of memcmp(). of memcmp().
config ARCH_MEMMOVE config ARCH_MEMMOVE
bool "memmove" bool "memmove()"
default n default n
---help--- ---help---
Select this option if the architecture provides an optimized version Select this option if the architecture provides an optimized version
of memmove(). of memmove().
config ARCH_MEMSET config ARCH_MEMSET
bool "memset" bool "memset()"
default n default n
---help--- ---help---
Select this option if the architecture provides an optimized version Select this option if the architecture provides an optimized version
of memset(). of memset().
config MEMSET_OPTSPEED
bool "Optimize memset() for speed"
default n
depends on !ARCH_MEMSET
---help---
Select this option to use a version of memcpy() optimized for speed.
Default: memcpy() is optimized for size.
config ARCH_STRCMP config ARCH_STRCMP
bool "strcmp" bool "strcmp()"
default n default n
---help--- ---help---
Select this option if the architecture provides an optimized version Select this option if the architecture provides an optimized version
of strcmp(). of strcmp().
config ARCH_STRCPY config ARCH_STRCPY
bool "strcpy" bool "strcpy()"
default n default n
---help--- ---help---
Select this option if the architecture provides an optimized version Select this option if the architecture provides an optimized version
of strcpy(). of strcpy().
config ARCH_STRNCPY config ARCH_STRNCPY
bool "strncpy" bool "strncpy()"
default n default n
---help--- ---help---
Select this option if the architecture provides an optimized version Select this option if the architecture provides an optimized version
@ -239,14 +248,14 @@ config ARCH_STRLEN
of strlen(). of strlen().
config ARCH_STRNLEN config ARCH_STRNLEN
bool "strlen" bool "strlen()"
default n default n
---help--- ---help---
Select this option if the architecture provides an optimized version Select this option if the architecture provides an optimized version
of strnlen(). of strnlen().
config ARCH_BZERO config ARCH_BZERO
bool "bzero" bool "bzero()"
default n default n
---help--- ---help---
Select this option if the architecture provides an optimized version Select this option if the architecture provides an optimized version

View File

@ -42,8 +42,12 @@
************************************************************/ ************************************************************/
#include <nuttx/config.h> #include <nuttx/config.h>
#include <sys/types.h> #include <sys/types.h>
#include <stdint.h>
#include <string.h> #include <string.h>
#include <assert.h>
/************************************************************ /************************************************************
* Global Functions * Global Functions
@ -52,8 +56,88 @@
#ifndef CONFIG_ARCH_MEMSET #ifndef CONFIG_ARCH_MEMSET
void *memset(void *s, int c, size_t n) void *memset(void *s, int c, size_t n)
{ {
#ifdef CONFIG_MEMSET_OPTSPEED
/* This version is optimized for speed (you could do better
* still by exploiting processor caching or memory burst
* knowledge. 64-bit support might improve performance as
* well.
*/
uintptr_t addr = (uintptr_t)s;
uint16_t val16 = ((uint16_t)c << 8) | (uint16_t)c;
uint32_t val32 = ((uint32_t)val16 << 16) | (uint32_t)val16;
/* Make sure that there is something to be cleared */
if (n > 0)
{
/* Align to a 16-bit boundary */
if ((addr & 1) != 0)
{
*(uint8_t*)addr = (uint8_t)c;
addr += 1;
n -= 1;
}
/* Check if there are at least 16-bits left to be zeroed */
if (n >= 2)
{
/* Align to a 32-bit boundary (we know that the destination
* address is already aligned to at least a 16-bit boundary).
*/
if ((addr & 3) != 0)
{
*(uint16_t*)addr = val16;
addr += 2;
n -= 2;
}
/* Loop while there are at least 32-bits left to be zeroed */
while (n >= 4)
{
*(uint32_t*)addr = val32;
addr += 4;
n -= 4;
}
}
/* We may get here under the following conditions:
*
* n = 0, addr may or may not be aligned
* n = 1, addr may or may not be aligned
* n = 2, addr is aligned to a 32-bit boundary
* n = 3, addr is aligned to a 32-bit boundary
*/
switch (n)
{
default:
case 0:
DEBUGASSERT(n == 0);
break;
case 2:
*(uint16_t*)addr = val16;
break;
case 3:
*(uint16_t*)addr = val16;
addr += 2;
case 1:
*(uint8_t*)addr = (uint8_t)c;
break;
}
}
#else
/* This version is optimized for size */
unsigned char *p = (unsigned char*)s; unsigned char *p = (unsigned char*)s;
while (n-- > 0) *p++ = c; while (n-- > 0) *p++ = c;
#endif
return s; return s;
} }
#endif #endif