Optimized memset() can be configured to do 64-bit stores

git-svn-id: svn://svn.code.sf.net/p/nuttx/code/trunk@5243 42af7a65-404d-4744-a932-0658087f49c3
This commit is contained in:
patacongo 2012-10-21 01:31:56 +00:00
parent 073a96bb2c
commit 9e9fe21501
6 changed files with 100 additions and 21 deletions

View File

@ -3494,3 +3494,5 @@
* lib/strings/lib_memset.c: CONFIG_MEMSET_OPTSPEED will select a
version of memset() optimized for speed. By default, memset() is
optimized for size.
* lib/strings/lib_memset.c: CONFIG_MEMSET_64BIT will perform 64-bit
aligned memset() operations.

View File

@ -4457,7 +4457,7 @@ build
</li></ul>
<p>
And if <code>CONFIG_MEMCPY_VIK</code>, the following tuning options are available:
And if <code>CONFIG_MEMCPY_VIK</code> is selected, the following tuning options are available:
</p>
<ul><li>
<code>CONFIG_MEMCPY_PRE_INC_PTRS</code>:
@ -4471,7 +4471,7 @@ build
</li>
<li>
<code>CONFIG_MEMCPY_64BIT</code>:
Compiles memcpy for 64 bit architectures
Compiles <code>memcpy()</code> for 64 bit architectures
</li></ul>
<p><li>
@ -4483,6 +4483,14 @@ build
Default: <code>memset()</code> is optimized for size.
</li></ul>
<p>
And if <code>CONFIG_MEMSET_OPTSPEED</code> is selected, the following tuning option is available:
</p>
<ul><li>
<code>CONFIG_MEMSET_64BIT</code>:
Compiles <code>memset()</code> for 64 bit architectures
</li></ul>
<li>
<p>
The architecture may provide custom versions of certain standard header files:

View File

@ -628,7 +628,7 @@ defconfig -- This is a configuration file similar to the Linux
function by Daniel Vik. See licensing information in the top-level
COPYING file. Default: n
And if CONFIG_MEMCPY_VIK, the following tuning options are available:
And if CONFIG_MEMCPY_VIK is selected, the following tuning options are available:
CONFIG_MEMCPY_PRE_INC_PTRS - Use pre-increment of pointers. Default is
post increment of pointers.
@ -644,6 +644,11 @@ defconfig -- This is a configuration file similar to the Linux
CONFIG_MEMSET_OPTSPEED - Select this option to use a version of memcpy()
optimized for speed. Default: memcpy() is optimized for size.
And if CONFIG_MEMSET_OPTSPEED is selected, the following tuning option is
available:
CONFIG_MEMSET_64BIT - Compiles memset() for 64 bit architectures
The architecture may provide custom versions of certain standard header
files:

View File

@ -219,6 +219,13 @@ config MEMSET_OPTSPEED
Select this option to use a version of memcpy() optimized for speed.
Default: memcpy() is optimized for size.
config MEMSET_64BIT
bool "64-bit memset()"
default n
depends on MEMSET_OPTSPEED
---help---
Compiles memset() for 64 bit architectures
config ARCH_STRCMP
bool "strcmp()"
default n

View File

@ -58,15 +58,20 @@ void *memmove(void *dest, const void *src, size_t count)
tmp = (char*) dest;
s = (char*) src;
while (count--)
{
*tmp++ = *s++;
}
}
else
{
tmp = (char*) dest + count;
s = (char*) src + count;
while (count--)
{
*--tmp = *--s;
}
}
return dest;
}
#endif

View File

@ -1,4 +1,5 @@
/************************************************************
/****************************************************************************
* lib/string/lib_memset.c
*
* Copyright (C) 2007, 2011 Gregory Nutt. All rights reserved.
@ -31,15 +32,12 @@
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
************************************************************/
****************************************************************************/
/************************************************************
* Compilation Switches
************************************************************/
/************************************************************
/****************************************************************************
* Included Files
************************************************************/
****************************************************************************/
#include <nuttx/config.h>
@ -49,9 +47,21 @@
#include <string.h>
#include <assert.h>
/************************************************************
/****************************************************************************
* Pre-processor Definitions
****************************************************************************/
/* Can't support CONFIG_MEMSET_64BIT if the platform does not have 64-bit
* integer types.
*/
#ifndef CONFIG_HAVE_LONG_LONG
# undef CONFIG_MEMSET_64BIT
#endif
/****************************************************************************
* Global Functions
************************************************************/
****************************************************************************/
#ifndef CONFIG_ARCH_MEMSET
void *memset(void *s, int c, size_t n)
@ -59,13 +69,15 @@ void *memset(void *s, int c, size_t n)
#ifdef CONFIG_MEMSET_OPTSPEED
/* This version is optimized for speed (you could do better
* still by exploiting processor caching or memory burst
* knowledge. 64-bit support might improve performance as
* well.
* knowledge.)
*/
uintptr_t addr = (uintptr_t)s;
uint16_t val16 = ((uint16_t)c << 8) | (uint16_t)c;
uint32_t val32 = ((uint32_t)val16 << 16) | (uint32_t)val16;
#ifdef CONFIG_MEMSET_64BIT
uint64_t val64 = ((uint64_t)val32 << 32) | (uint64_t)val32;
#endif
/* Make sure that there is something to be cleared */
@ -95,6 +107,7 @@ void *memset(void *s, int c, size_t n)
n -= 2;
}
#ifndef CONFIG_MEMSET_64BIT
/* Loop while there are at least 32-bits left to be zeroed */
while (n >= 4)
@ -103,12 +116,51 @@ void *memset(void *s, int c, size_t n)
addr += 4;
n -= 4;
}
#else
/* Align to a 32-bit boundary */
if (n >= 4)
{
/* Align to a 64-bit boundary (we know that the destination
* address is already aligned to at least a 32-bit boundary).
*/
if ((addr & 7) != 0)
{
*(uint32_t*)addr = val32;
addr += 4;
n -= 4;
}
/* Loop while there are at least 64-bits left to be zeroed */
while (n >= 8)
{
*(uint64_t*)addr = val64;
addr += 8;
n -= 8;
}
}
#endif
}
#ifdef CONFIG_MEMSET_64BIT
/* We may get here with n in the range 0..7. If n >= 4, then we should
* have 64-bit alignment.
*/
if (n >= 4)
{
*(uint32_t*)addr = val32;
addr += 4;
n -= 4;
}
#endif
/* We may get here under the following conditions:
*
* n = 0, addr may or may not be aligned
* n = 1, addr may or may not be aligned
* n = 1, addr is aligned to at least a 16-bit boundary
* n = 2, addr is aligned to a 32-bit boundary
* n = 3, addr is aligned to a 32-bit boundary
*/