Add a versin of memset() optimized for speed
git-svn-id: svn://svn.code.sf.net/p/nuttx/code/trunk@5242 42af7a65-404d-4744-a932-0658087f49c3
This commit is contained in:
parent
dca2ef8722
commit
073a96bb2c
@ -3490,4 +3490,7 @@
|
||||
the ARMv7-M family contributed by Mike Smith.
|
||||
* lib/strings/lib_vikmemcpy.c: As an option, the larger but faster
|
||||
implemementation of memcpy from Daniel Vik is now available (this is
|
||||
from http://www.danielvik.com/2010/02/fast-memcpy-in-c.html).
|
||||
from http://www.danielvik.com/2010/02/fast-memcpy-in-c.html).
|
||||
* lib/strings/lib_memset.c: CONFIG_MEMSET_OPTSPEED will select a
|
||||
version of memset() optimized for speed. By default, memset() is
|
||||
optimized for size.
|
||||
|
@ -8,7 +8,7 @@
|
||||
<tr align="center" bgcolor="#e4e4e4">
|
||||
<td>
|
||||
<h1><big><font color="#3c34ec"><i>NuttShell (NSH)</i></font></big></h1>
|
||||
<p>Last Updated: August 28, 2012</p>
|
||||
<p>Last Updated: October 20, 2012</p>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
@ -12,7 +12,7 @@
|
||||
<h1><big><font color="#3c34ec">
|
||||
<i>NuttX RTOS Porting Guide</i>
|
||||
</font></big></h1>
|
||||
<p>Last Updated: August 28, 2012</p>
|
||||
<p>Last Updated: October 20, 2012</p>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
@ -4449,12 +4449,12 @@ build
|
||||
If <code>CONFIG_ARCH_MEMCPY</code> is <b>not</b> selected, then you make also select Daniel
|
||||
Vik's optimized implementation of <code>memcpy()</code>:
|
||||
</p>
|
||||
<ul><li>
|
||||
<code>CONFIG_MEMCPY_VIK</code>:
|
||||
Select this option to use the optimized <code>memcpy()</code> function by Daniel Vik.
|
||||
See licensing information in the top-level <code>COPYING</code> file.
|
||||
Default: <code>n</code>.
|
||||
</li></ul>
|
||||
<ul><li>
|
||||
<code>CONFIG_MEMCPY_VIK</code>:
|
||||
Select this option to use the optimized <code>memcpy()</code> function by Daniel Vik.
|
||||
See licensing information in the top-level <code>COPYING</code> file.
|
||||
Default: <code>n</code>.
|
||||
</li></ul>
|
||||
|
||||
<p>
|
||||
And if <code>CONFIG_MEMCPY_VIK</code>, the following tuning options are available:
|
||||
@ -4474,6 +4474,15 @@ build
|
||||
Compiles memcpy for 64 bit architectures
|
||||
</li></ul>
|
||||
|
||||
<p><li>
|
||||
If <code>CONFIG_ARCH_MEMSET</code> is <b>not</b> selected, then the following option is also available:
|
||||
</p>
|
||||
<ul><li>
|
||||
<code>CONFIG_MEMSET_OPTSPEED</code>:
|
||||
Select this option to use a version of <code>memset()</code> optimized for speed.
|
||||
Default: <code>memset()</code> is optimized for size.
|
||||
</li></ul>
|
||||
|
||||
<li>
|
||||
<p>
|
||||
The architecture may provide custom versions of certain standard header files:
|
||||
|
@ -638,6 +638,12 @@ defconfig -- This is a configuration file similar to the Linux
|
||||
|
||||
CONFIG_MEMCPY_64BIT - Compiles memcpy for 64 bit architectures
|
||||
|
||||
If CONFIG_ARCH_MEMSET is not selected, then the following option is
|
||||
also available:
|
||||
|
||||
CONFIG_MEMSET_OPTSPEED - Select this option to use a version of memcpy()
|
||||
optimized for speed. Default: memcpy() is optimized for size.
|
||||
|
||||
The architecture may provide custom versions of certain standard header
|
||||
files:
|
||||
|
||||
|
37
lib/Kconfig
37
lib/Kconfig
@ -153,19 +153,20 @@ config ARCH_OPTIMIZED_FUNCTIONS
|
||||
|
||||
if ARCH_OPTIMIZED_FUNCTIONS
|
||||
config ARCH_MEMCPY
|
||||
bool "memcpy"
|
||||
bool "memcpy()"
|
||||
default n
|
||||
---help---
|
||||
Select this option if the architecture provides an optimized version
|
||||
of memcpy().
|
||||
|
||||
config MEMCPY_VIK
|
||||
bool "Vik memcpy"
|
||||
bool "Vik memcpy()"
|
||||
default n
|
||||
depends on !ARCH_MEMCPY
|
||||
---help---
|
||||
Select this option to use the optimized memcpy() function by Daniel Vik.
|
||||
See licensing information in the top-level COPYING file.
|
||||
Select this option to use the optimized memcpy() function by Daniel Vik.
|
||||
Select this option to option for speed at the expense of increased size.
|
||||
See licensing information in the top-level COPYING file.
|
||||
|
||||
if MEMCPY_VIK
|
||||
config MEMCPY_PRE_INC_PTRS
|
||||
@ -182,50 +183,58 @@ config MEMCPY_INDEXED_COPY
|
||||
MEMCPY_PRE_INC_PTRS option.
|
||||
|
||||
config MEMCPY_64BIT
|
||||
bool "64-bit memcpy"
|
||||
bool "64-bit memcpy()"
|
||||
default n
|
||||
---help---
|
||||
Compiles memcpy for 64 bit architectures
|
||||
Compiles memcpy() for 64 bit architectures
|
||||
|
||||
endif
|
||||
|
||||
config ARCH_MEMCMP
|
||||
bool "memcmp"
|
||||
bool "memcmp()"
|
||||
default n
|
||||
---help---
|
||||
Select this option if the architecture provides an optimized version
|
||||
of memcmp().
|
||||
|
||||
config ARCH_MEMMOVE
|
||||
bool "memmove"
|
||||
bool "memmove()"
|
||||
default n
|
||||
---help---
|
||||
Select this option if the architecture provides an optimized version
|
||||
of memmove().
|
||||
|
||||
config ARCH_MEMSET
|
||||
bool "memset"
|
||||
bool "memset()"
|
||||
default n
|
||||
---help---
|
||||
Select this option if the architecture provides an optimized version
|
||||
of memset().
|
||||
|
||||
config MEMSET_OPTSPEED
|
||||
bool "Optimize memset() for speed"
|
||||
default n
|
||||
depends on !ARCH_MEMSET
|
||||
---help---
|
||||
Select this option to use a version of memcpy() optimized for speed.
|
||||
Default: memcpy() is optimized for size.
|
||||
|
||||
config ARCH_STRCMP
|
||||
bool "strcmp"
|
||||
bool "strcmp()"
|
||||
default n
|
||||
---help---
|
||||
Select this option if the architecture provides an optimized version
|
||||
of strcmp().
|
||||
|
||||
config ARCH_STRCPY
|
||||
bool "strcpy"
|
||||
bool "strcpy()"
|
||||
default n
|
||||
---help---
|
||||
Select this option if the architecture provides an optimized version
|
||||
of strcpy().
|
||||
|
||||
config ARCH_STRNCPY
|
||||
bool "strncpy"
|
||||
bool "strncpy()"
|
||||
default n
|
||||
---help---
|
||||
Select this option if the architecture provides an optimized version
|
||||
@ -239,14 +248,14 @@ config ARCH_STRLEN
|
||||
of strlen().
|
||||
|
||||
config ARCH_STRNLEN
|
||||
bool "strlen"
|
||||
bool "strlen()"
|
||||
default n
|
||||
---help---
|
||||
Select this option if the architecture provides an optimized version
|
||||
of strnlen().
|
||||
|
||||
config ARCH_BZERO
|
||||
bool "bzero"
|
||||
bool "bzero()"
|
||||
default n
|
||||
---help---
|
||||
Select this option if the architecture provides an optimized version
|
||||
|
@ -42,8 +42,12 @@
|
||||
************************************************************/
|
||||
|
||||
#include <nuttx/config.h>
|
||||
|
||||
#include <sys/types.h>
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
|
||||
/************************************************************
|
||||
* Global Functions
|
||||
@ -52,8 +56,88 @@
|
||||
#ifndef CONFIG_ARCH_MEMSET
|
||||
void *memset(void *s, int c, size_t n)
|
||||
{
|
||||
#ifdef CONFIG_MEMSET_OPTSPEED
|
||||
/* This version is optimized for speed (you could do better
|
||||
* still by exploiting processor caching or memory burst
|
||||
* knowledge. 64-bit support might improve performance as
|
||||
* well.
|
||||
*/
|
||||
|
||||
uintptr_t addr = (uintptr_t)s;
|
||||
uint16_t val16 = ((uint16_t)c << 8) | (uint16_t)c;
|
||||
uint32_t val32 = ((uint32_t)val16 << 16) | (uint32_t)val16;
|
||||
|
||||
/* Make sure that there is something to be cleared */
|
||||
|
||||
if (n > 0)
|
||||
{
|
||||
/* Align to a 16-bit boundary */
|
||||
|
||||
if ((addr & 1) != 0)
|
||||
{
|
||||
*(uint8_t*)addr = (uint8_t)c;
|
||||
addr += 1;
|
||||
n -= 1;
|
||||
}
|
||||
|
||||
/* Check if there are at least 16-bits left to be zeroed */
|
||||
|
||||
if (n >= 2)
|
||||
{
|
||||
/* Align to a 32-bit boundary (we know that the destination
|
||||
* address is already aligned to at least a 16-bit boundary).
|
||||
*/
|
||||
|
||||
if ((addr & 3) != 0)
|
||||
{
|
||||
*(uint16_t*)addr = val16;
|
||||
addr += 2;
|
||||
n -= 2;
|
||||
}
|
||||
|
||||
/* Loop while there are at least 32-bits left to be zeroed */
|
||||
|
||||
while (n >= 4)
|
||||
{
|
||||
*(uint32_t*)addr = val32;
|
||||
addr += 4;
|
||||
n -= 4;
|
||||
}
|
||||
}
|
||||
|
||||
/* We may get here under the following conditions:
|
||||
*
|
||||
* n = 0, addr may or may not be aligned
|
||||
* n = 1, addr may or may not be aligned
|
||||
* n = 2, addr is aligned to a 32-bit boundary
|
||||
* n = 3, addr is aligned to a 32-bit boundary
|
||||
*/
|
||||
|
||||
switch (n)
|
||||
{
|
||||
default:
|
||||
case 0:
|
||||
DEBUGASSERT(n == 0);
|
||||
break;
|
||||
|
||||
case 2:
|
||||
*(uint16_t*)addr = val16;
|
||||
break;
|
||||
|
||||
case 3:
|
||||
*(uint16_t*)addr = val16;
|
||||
addr += 2;
|
||||
case 1:
|
||||
*(uint8_t*)addr = (uint8_t)c;
|
||||
break;
|
||||
}
|
||||
}
|
||||
#else
|
||||
/* This version is optimized for size */
|
||||
|
||||
unsigned char *p = (unsigned char*)s;
|
||||
while (n-- > 0) *p++ = c;
|
||||
#endif
|
||||
return s;
|
||||
}
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue
Block a user