Optimized memset() can be configured to do 64-bit stores

git-svn-id: svn://svn.code.sf.net/p/nuttx/code/trunk@5243 42af7a65-404d-4744-a932-0658087f49c3
This commit is contained in:
patacongo 2012-10-21 01:31:56 +00:00
parent 073a96bb2c
commit 9e9fe21501
6 changed files with 100 additions and 21 deletions

View File

@ -3494,3 +3494,5 @@
* lib/strings/lib_memset.c: CONFIG_MEMSET_OPTSPEED will select a * lib/strings/lib_memset.c: CONFIG_MEMSET_OPTSPEED will select a
version of memset() optimized for speed. By default, memset() is version of memset() optimized for speed. By default, memset() is
optimized for size. optimized for size.
* lib/strings/lib_memset.c: CONFIG_MEMSET_64BIT will perform 64-bit
aligned memset() operations.

View File

@ -4457,7 +4457,7 @@ build
</li></ul> </li></ul>
<p> <p>
And if <code>CONFIG_MEMCPY_VIK</code>, the following tuning options are available: And if <code>CONFIG_MEMCPY_VIK</code> is selected, the following tuning options are available:
</p> </p>
<ul><li> <ul><li>
<code>CONFIG_MEMCPY_PRE_INC_PTRS</code>: <code>CONFIG_MEMCPY_PRE_INC_PTRS</code>:
@ -4471,7 +4471,7 @@ build
</li> </li>
<li> <li>
<code>CONFIG_MEMCPY_64BIT</code>: <code>CONFIG_MEMCPY_64BIT</code>:
Compiles memcpy for 64 bit architectures Compiles <code>memcpy()</code> for 64 bit architectures
</li></ul> </li></ul>
<p><li> <p><li>
@ -4483,6 +4483,14 @@ build
Default: <code>memset()</code> is optimized for size. Default: <code>memset()</code> is optimized for size.
</li></ul> </li></ul>
<p>
And if <code>CONFIG_MEMSET_OPTSPEED</code> is selected, the following tuning option is available:
</p>
<ul><li>
<code>CONFIG_MEMSET_64BIT</code>:
Compiles <code>memset()</code> for 64 bit architectures
</li></ul>
<li> <li>
<p> <p>
The architecture may provide custom versions of certain standard header files: The architecture may provide custom versions of certain standard header files:

View File

@ -628,7 +628,7 @@ defconfig -- This is a configuration file similar to the Linux
function by Daniel Vik. See licensing information in the top-level function by Daniel Vik. See licensing information in the top-level
COPYING file. Default: n COPYING file. Default: n
And if CONFIG_MEMCPY_VIK, the following tuning options are available: And if CONFIG_MEMCPY_VIK is selected, the following tuning options are available:
CONFIG_MEMCPY_PRE_INC_PTRS - Use pre-increment of pointers. Default is CONFIG_MEMCPY_PRE_INC_PTRS - Use pre-increment of pointers. Default is
post increment of pointers. post increment of pointers.
@ -644,6 +644,11 @@ defconfig -- This is a configuration file similar to the Linux
CONFIG_MEMSET_OPTSPEED - Select this option to use a version of memcpy() CONFIG_MEMSET_OPTSPEED - Select this option to use a version of memcpy()
optimized for speed. Default: memcpy() is optimized for size. optimized for speed. Default: memcpy() is optimized for size.
And if CONFIG_MEMSET_OPTSPEED is selected, the following tuning option is
available:
CONFIG_MEMSET_64BIT - Compiles memset() for 64 bit architectures
The architecture may provide custom versions of certain standard header The architecture may provide custom versions of certain standard header
files: files:

View File

@ -219,6 +219,13 @@ config MEMSET_OPTSPEED
Select this option to use a version of memcpy() optimized for speed. Select this option to use a version of memcpy() optimized for speed.
Default: memcpy() is optimized for size. Default: memcpy() is optimized for size.
config MEMSET_64BIT
bool "64-bit memset()"
default n
depends on MEMSET_OPTSPEED
---help---
Compiles memset() for 64 bit architectures
config ARCH_STRCMP config ARCH_STRCMP
bool "strcmp()" bool "strcmp()"
default n default n

View File

@ -56,17 +56,22 @@ void *memmove(void *dest, const void *src, size_t count)
if (dest <= src) if (dest <= src)
{ {
tmp = (char*) dest; tmp = (char*) dest;
s = (char*) src; s = (char*) src;
while (count--) while (count--)
*tmp++ = *s++; {
*tmp++ = *s++;
}
} }
else else
{ {
tmp = (char*) dest + count; tmp = (char*) dest + count;
s = (char*) src + count; s = (char*) src + count;
while (count--) while (count--)
*--tmp = *--s; {
*--tmp = *--s;
}
} }
return dest; return dest;
} }
#endif #endif

View File

@ -1,4 +1,5 @@
/************************************************************
/****************************************************************************
* lib/string/lib_memset.c * lib/string/lib_memset.c
* *
* Copyright (C) 2007, 2011 Gregory Nutt. All rights reserved. * Copyright (C) 2007, 2011 Gregory Nutt. All rights reserved.
@ -31,15 +32,12 @@
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE. * POSSIBILITY OF SUCH DAMAGE.
* *
************************************************************/ ****************************************************************************/
/************************************************************
* Compilation Switches
************************************************************/
/************************************************************ /****************************************************************************
* Included Files * Included Files
************************************************************/ ****************************************************************************/
#include <nuttx/config.h> #include <nuttx/config.h>
@ -49,9 +47,21 @@
#include <string.h> #include <string.h>
#include <assert.h> #include <assert.h>
/************************************************************ /****************************************************************************
* Pre-processor Definitions
****************************************************************************/
/* Can't support CONFIG_MEMSET_64BIT if the platform does not have 64-bit
* integer types.
*/
#ifndef CONFIG_HAVE_LONG_LONG
# undef CONFIG_MEMSET_64BIT
#endif
/****************************************************************************
* Global Functions * Global Functions
************************************************************/ ****************************************************************************/
#ifndef CONFIG_ARCH_MEMSET #ifndef CONFIG_ARCH_MEMSET
void *memset(void *s, int c, size_t n) void *memset(void *s, int c, size_t n)
@ -59,13 +69,15 @@ void *memset(void *s, int c, size_t n)
#ifdef CONFIG_MEMSET_OPTSPEED #ifdef CONFIG_MEMSET_OPTSPEED
/* This version is optimized for speed (you could do better /* This version is optimized for speed (you could do better
* still by exploiting processor caching or memory burst * still by exploiting processor caching or memory burst
* knowledge. 64-bit support might improve performance as * knowledge.)
* well.
*/ */
uintptr_t addr = (uintptr_t)s; uintptr_t addr = (uintptr_t)s;
uint16_t val16 = ((uint16_t)c << 8) | (uint16_t)c; uint16_t val16 = ((uint16_t)c << 8) | (uint16_t)c;
uint32_t val32 = ((uint32_t)val16 << 16) | (uint32_t)val16; uint32_t val32 = ((uint32_t)val16 << 16) | (uint32_t)val16;
#ifdef CONFIG_MEMSET_64BIT
uint64_t val64 = ((uint64_t)val32 << 32) | (uint64_t)val32;
#endif
/* Make sure that there is something to be cleared */ /* Make sure that there is something to be cleared */
@ -95,6 +107,7 @@ void *memset(void *s, int c, size_t n)
n -= 2; n -= 2;
} }
#ifndef CONFIG_MEMSET_64BIT
/* Loop while there are at least 32-bits left to be zeroed */ /* Loop while there are at least 32-bits left to be zeroed */
while (n >= 4) while (n >= 4)
@ -103,12 +116,51 @@ void *memset(void *s, int c, size_t n)
addr += 4; addr += 4;
n -= 4; n -= 4;
} }
#else
/* Align to a 32-bit boundary */
if (n >= 4)
{
/* Align to a 64-bit boundary (we know that the destination
* address is already aligned to at least a 32-bit boundary).
*/
if ((addr & 7) != 0)
{
*(uint32_t*)addr = val32;
addr += 4;
n -= 4;
}
/* Loop while there are at least 64-bits left to be zeroed */
while (n >= 8)
{
*(uint64_t*)addr = val64;
addr += 8;
n -= 8;
}
}
#endif
} }
#ifdef CONFIG_MEMSET_64BIT
/* We may get here with n in the range 0..7. If n >= 4, then we should
* have 64-bit alignment.
*/
if (n >= 4)
{
*(uint32_t*)addr = val32;
addr += 4;
n -= 4;
}
#endif
/* We may get here under the following conditions: /* We may get here under the following conditions:
* *
* n = 0, addr may or may not be aligned * n = 0, addr may or may not be aligned
* n = 1, addr may or may not be aligned * n = 1, addr is aligned to at least a 16-bit boundary
* n = 2, addr is aligned to a 32-bit boundary * n = 2, addr is aligned to a 32-bit boundary
* n = 3, addr is aligned to a 32-bit boundary * n = 3, addr is aligned to a 32-bit boundary
*/ */