Add a versin of memset() optimized for speed
git-svn-id: svn://svn.code.sf.net/p/nuttx/code/trunk@5242 42af7a65-404d-4744-a932-0658087f49c3
This commit is contained in:
parent
dca2ef8722
commit
073a96bb2c
@ -3490,4 +3490,7 @@
|
|||||||
the ARMv7-M family contributed by Mike Smith.
|
the ARMv7-M family contributed by Mike Smith.
|
||||||
* lib/strings/lib_vikmemcpy.c: As an option, the larger but faster
|
* lib/strings/lib_vikmemcpy.c: As an option, the larger but faster
|
||||||
implemementation of memcpy from Daniel Vik is now available (this is
|
implemementation of memcpy from Daniel Vik is now available (this is
|
||||||
from http://www.danielvik.com/2010/02/fast-memcpy-in-c.html).
|
from http://www.danielvik.com/2010/02/fast-memcpy-in-c.html).
|
||||||
|
* lib/strings/lib_memset.c: CONFIG_MEMSET_OPTSPEED will select a
|
||||||
|
version of memset() optimized for speed. By default, memset() is
|
||||||
|
optimized for size.
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
<tr align="center" bgcolor="#e4e4e4">
|
<tr align="center" bgcolor="#e4e4e4">
|
||||||
<td>
|
<td>
|
||||||
<h1><big><font color="#3c34ec"><i>NuttShell (NSH)</i></font></big></h1>
|
<h1><big><font color="#3c34ec"><i>NuttShell (NSH)</i></font></big></h1>
|
||||||
<p>Last Updated: August 28, 2012</p>
|
<p>Last Updated: October 20, 2012</p>
|
||||||
</td>
|
</td>
|
||||||
</tr>
|
</tr>
|
||||||
</table>
|
</table>
|
||||||
|
@ -12,7 +12,7 @@
|
|||||||
<h1><big><font color="#3c34ec">
|
<h1><big><font color="#3c34ec">
|
||||||
<i>NuttX RTOS Porting Guide</i>
|
<i>NuttX RTOS Porting Guide</i>
|
||||||
</font></big></h1>
|
</font></big></h1>
|
||||||
<p>Last Updated: August 28, 2012</p>
|
<p>Last Updated: October 20, 2012</p>
|
||||||
</td>
|
</td>
|
||||||
</tr>
|
</tr>
|
||||||
</table>
|
</table>
|
||||||
@ -4449,12 +4449,12 @@ build
|
|||||||
If <code>CONFIG_ARCH_MEMCPY</code> is <b>not</b> selected, then you make also select Daniel
|
If <code>CONFIG_ARCH_MEMCPY</code> is <b>not</b> selected, then you make also select Daniel
|
||||||
Vik's optimized implementation of <code>memcpy()</code>:
|
Vik's optimized implementation of <code>memcpy()</code>:
|
||||||
</p>
|
</p>
|
||||||
<ul><li>
|
<ul><li>
|
||||||
<code>CONFIG_MEMCPY_VIK</code>:
|
<code>CONFIG_MEMCPY_VIK</code>:
|
||||||
Select this option to use the optimized <code>memcpy()</code> function by Daniel Vik.
|
Select this option to use the optimized <code>memcpy()</code> function by Daniel Vik.
|
||||||
See licensing information in the top-level <code>COPYING</code> file.
|
See licensing information in the top-level <code>COPYING</code> file.
|
||||||
Default: <code>n</code>.
|
Default: <code>n</code>.
|
||||||
</li></ul>
|
</li></ul>
|
||||||
|
|
||||||
<p>
|
<p>
|
||||||
And if <code>CONFIG_MEMCPY_VIK</code>, the following tuning options are available:
|
And if <code>CONFIG_MEMCPY_VIK</code>, the following tuning options are available:
|
||||||
@ -4474,6 +4474,15 @@ build
|
|||||||
Compiles memcpy for 64 bit architectures
|
Compiles memcpy for 64 bit architectures
|
||||||
</li></ul>
|
</li></ul>
|
||||||
|
|
||||||
|
<p><li>
|
||||||
|
If <code>CONFIG_ARCH_MEMSET</code> is <b>not</b> selected, then the following option is also available:
|
||||||
|
</p>
|
||||||
|
<ul><li>
|
||||||
|
<code>CONFIG_MEMSET_OPTSPEED</code>:
|
||||||
|
Select this option to use a version of <code>memset()</code> optimized for speed.
|
||||||
|
Default: <code>memset()</code> is optimized for size.
|
||||||
|
</li></ul>
|
||||||
|
|
||||||
<li>
|
<li>
|
||||||
<p>
|
<p>
|
||||||
The architecture may provide custom versions of certain standard header files:
|
The architecture may provide custom versions of certain standard header files:
|
||||||
|
@ -638,6 +638,12 @@ defconfig -- This is a configuration file similar to the Linux
|
|||||||
|
|
||||||
CONFIG_MEMCPY_64BIT - Compiles memcpy for 64 bit architectures
|
CONFIG_MEMCPY_64BIT - Compiles memcpy for 64 bit architectures
|
||||||
|
|
||||||
|
If CONFIG_ARCH_MEMSET is not selected, then the following option is
|
||||||
|
also available:
|
||||||
|
|
||||||
|
CONFIG_MEMSET_OPTSPEED - Select this option to use a version of memcpy()
|
||||||
|
optimized for speed. Default: memcpy() is optimized for size.
|
||||||
|
|
||||||
The architecture may provide custom versions of certain standard header
|
The architecture may provide custom versions of certain standard header
|
||||||
files:
|
files:
|
||||||
|
|
||||||
|
37
lib/Kconfig
37
lib/Kconfig
@ -153,19 +153,20 @@ config ARCH_OPTIMIZED_FUNCTIONS
|
|||||||
|
|
||||||
if ARCH_OPTIMIZED_FUNCTIONS
|
if ARCH_OPTIMIZED_FUNCTIONS
|
||||||
config ARCH_MEMCPY
|
config ARCH_MEMCPY
|
||||||
bool "memcpy"
|
bool "memcpy()"
|
||||||
default n
|
default n
|
||||||
---help---
|
---help---
|
||||||
Select this option if the architecture provides an optimized version
|
Select this option if the architecture provides an optimized version
|
||||||
of memcpy().
|
of memcpy().
|
||||||
|
|
||||||
config MEMCPY_VIK
|
config MEMCPY_VIK
|
||||||
bool "Vik memcpy"
|
bool "Vik memcpy()"
|
||||||
default n
|
default n
|
||||||
depends on !ARCH_MEMCPY
|
depends on !ARCH_MEMCPY
|
||||||
---help---
|
---help---
|
||||||
Select this option to use the optimized memcpy() function by Daniel Vik.
|
Select this option to use the optimized memcpy() function by Daniel Vik.
|
||||||
See licensing information in the top-level COPYING file.
|
Select this option to option for speed at the expense of increased size.
|
||||||
|
See licensing information in the top-level COPYING file.
|
||||||
|
|
||||||
if MEMCPY_VIK
|
if MEMCPY_VIK
|
||||||
config MEMCPY_PRE_INC_PTRS
|
config MEMCPY_PRE_INC_PTRS
|
||||||
@ -182,50 +183,58 @@ config MEMCPY_INDEXED_COPY
|
|||||||
MEMCPY_PRE_INC_PTRS option.
|
MEMCPY_PRE_INC_PTRS option.
|
||||||
|
|
||||||
config MEMCPY_64BIT
|
config MEMCPY_64BIT
|
||||||
bool "64-bit memcpy"
|
bool "64-bit memcpy()"
|
||||||
default n
|
default n
|
||||||
---help---
|
---help---
|
||||||
Compiles memcpy for 64 bit architectures
|
Compiles memcpy() for 64 bit architectures
|
||||||
|
|
||||||
endif
|
endif
|
||||||
|
|
||||||
config ARCH_MEMCMP
|
config ARCH_MEMCMP
|
||||||
bool "memcmp"
|
bool "memcmp()"
|
||||||
default n
|
default n
|
||||||
---help---
|
---help---
|
||||||
Select this option if the architecture provides an optimized version
|
Select this option if the architecture provides an optimized version
|
||||||
of memcmp().
|
of memcmp().
|
||||||
|
|
||||||
config ARCH_MEMMOVE
|
config ARCH_MEMMOVE
|
||||||
bool "memmove"
|
bool "memmove()"
|
||||||
default n
|
default n
|
||||||
---help---
|
---help---
|
||||||
Select this option if the architecture provides an optimized version
|
Select this option if the architecture provides an optimized version
|
||||||
of memmove().
|
of memmove().
|
||||||
|
|
||||||
config ARCH_MEMSET
|
config ARCH_MEMSET
|
||||||
bool "memset"
|
bool "memset()"
|
||||||
default n
|
default n
|
||||||
---help---
|
---help---
|
||||||
Select this option if the architecture provides an optimized version
|
Select this option if the architecture provides an optimized version
|
||||||
of memset().
|
of memset().
|
||||||
|
|
||||||
|
config MEMSET_OPTSPEED
|
||||||
|
bool "Optimize memset() for speed"
|
||||||
|
default n
|
||||||
|
depends on !ARCH_MEMSET
|
||||||
|
---help---
|
||||||
|
Select this option to use a version of memcpy() optimized for speed.
|
||||||
|
Default: memcpy() is optimized for size.
|
||||||
|
|
||||||
config ARCH_STRCMP
|
config ARCH_STRCMP
|
||||||
bool "strcmp"
|
bool "strcmp()"
|
||||||
default n
|
default n
|
||||||
---help---
|
---help---
|
||||||
Select this option if the architecture provides an optimized version
|
Select this option if the architecture provides an optimized version
|
||||||
of strcmp().
|
of strcmp().
|
||||||
|
|
||||||
config ARCH_STRCPY
|
config ARCH_STRCPY
|
||||||
bool "strcpy"
|
bool "strcpy()"
|
||||||
default n
|
default n
|
||||||
---help---
|
---help---
|
||||||
Select this option if the architecture provides an optimized version
|
Select this option if the architecture provides an optimized version
|
||||||
of strcpy().
|
of strcpy().
|
||||||
|
|
||||||
config ARCH_STRNCPY
|
config ARCH_STRNCPY
|
||||||
bool "strncpy"
|
bool "strncpy()"
|
||||||
default n
|
default n
|
||||||
---help---
|
---help---
|
||||||
Select this option if the architecture provides an optimized version
|
Select this option if the architecture provides an optimized version
|
||||||
@ -239,14 +248,14 @@ config ARCH_STRLEN
|
|||||||
of strlen().
|
of strlen().
|
||||||
|
|
||||||
config ARCH_STRNLEN
|
config ARCH_STRNLEN
|
||||||
bool "strlen"
|
bool "strlen()"
|
||||||
default n
|
default n
|
||||||
---help---
|
---help---
|
||||||
Select this option if the architecture provides an optimized version
|
Select this option if the architecture provides an optimized version
|
||||||
of strnlen().
|
of strnlen().
|
||||||
|
|
||||||
config ARCH_BZERO
|
config ARCH_BZERO
|
||||||
bool "bzero"
|
bool "bzero()"
|
||||||
default n
|
default n
|
||||||
---help---
|
---help---
|
||||||
Select this option if the architecture provides an optimized version
|
Select this option if the architecture provides an optimized version
|
||||||
|
@ -42,8 +42,12 @@
|
|||||||
************************************************************/
|
************************************************************/
|
||||||
|
|
||||||
#include <nuttx/config.h>
|
#include <nuttx/config.h>
|
||||||
|
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
#include <assert.h>
|
||||||
|
|
||||||
/************************************************************
|
/************************************************************
|
||||||
* Global Functions
|
* Global Functions
|
||||||
@ -52,8 +56,88 @@
|
|||||||
#ifndef CONFIG_ARCH_MEMSET
|
#ifndef CONFIG_ARCH_MEMSET
|
||||||
void *memset(void *s, int c, size_t n)
|
void *memset(void *s, int c, size_t n)
|
||||||
{
|
{
|
||||||
|
#ifdef CONFIG_MEMSET_OPTSPEED
|
||||||
|
/* This version is optimized for speed (you could do better
|
||||||
|
* still by exploiting processor caching or memory burst
|
||||||
|
* knowledge. 64-bit support might improve performance as
|
||||||
|
* well.
|
||||||
|
*/
|
||||||
|
|
||||||
|
uintptr_t addr = (uintptr_t)s;
|
||||||
|
uint16_t val16 = ((uint16_t)c << 8) | (uint16_t)c;
|
||||||
|
uint32_t val32 = ((uint32_t)val16 << 16) | (uint32_t)val16;
|
||||||
|
|
||||||
|
/* Make sure that there is something to be cleared */
|
||||||
|
|
||||||
|
if (n > 0)
|
||||||
|
{
|
||||||
|
/* Align to a 16-bit boundary */
|
||||||
|
|
||||||
|
if ((addr & 1) != 0)
|
||||||
|
{
|
||||||
|
*(uint8_t*)addr = (uint8_t)c;
|
||||||
|
addr += 1;
|
||||||
|
n -= 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Check if there are at least 16-bits left to be zeroed */
|
||||||
|
|
||||||
|
if (n >= 2)
|
||||||
|
{
|
||||||
|
/* Align to a 32-bit boundary (we know that the destination
|
||||||
|
* address is already aligned to at least a 16-bit boundary).
|
||||||
|
*/
|
||||||
|
|
||||||
|
if ((addr & 3) != 0)
|
||||||
|
{
|
||||||
|
*(uint16_t*)addr = val16;
|
||||||
|
addr += 2;
|
||||||
|
n -= 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Loop while there are at least 32-bits left to be zeroed */
|
||||||
|
|
||||||
|
while (n >= 4)
|
||||||
|
{
|
||||||
|
*(uint32_t*)addr = val32;
|
||||||
|
addr += 4;
|
||||||
|
n -= 4;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* We may get here under the following conditions:
|
||||||
|
*
|
||||||
|
* n = 0, addr may or may not be aligned
|
||||||
|
* n = 1, addr may or may not be aligned
|
||||||
|
* n = 2, addr is aligned to a 32-bit boundary
|
||||||
|
* n = 3, addr is aligned to a 32-bit boundary
|
||||||
|
*/
|
||||||
|
|
||||||
|
switch (n)
|
||||||
|
{
|
||||||
|
default:
|
||||||
|
case 0:
|
||||||
|
DEBUGASSERT(n == 0);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 2:
|
||||||
|
*(uint16_t*)addr = val16;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 3:
|
||||||
|
*(uint16_t*)addr = val16;
|
||||||
|
addr += 2;
|
||||||
|
case 1:
|
||||||
|
*(uint8_t*)addr = (uint8_t)c;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
/* This version is optimized for size */
|
||||||
|
|
||||||
unsigned char *p = (unsigned char*)s;
|
unsigned char *p = (unsigned char*)s;
|
||||||
while (n-- > 0) *p++ = c;
|
while (n-- > 0) *p++ = c;
|
||||||
|
#endif
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
Loading…
Reference in New Issue
Block a user