system/cachespeed:Redesign of the overall test tool.

Compared to the previous version, the following factors have been taken into account in the new version of cachespeed:
1. the user needs to care about getting a piece of available memory (even if this piece of memory may memory stomp during use)
2. High repetition of test code, poor readability, and overall high test coupling
3. Not taking into account the alignment cache line situation
4. When comparing values between different test tools, there is a little error because of the different ways to get the values (different fine reading)
Therefore, in the new version according to the above problems have been improved:
1. Provide a "test_skeleton", which contains the overall testing process
2. The user only needs to execute, not to care about how to get the available memory address, and the memory allocated by the program also ensures the memory security and data accuracy
3. The system to obtain the data required for the test reduces the difficulty of use and the possibility of inaccurate results due to data errors.
4. Provide two kinds of precision data results, which can be configured through Kconfig
5. Optimize the output log, now more intuitive and concise, to help the subsequent data organization and observation
6. New test items for aligned/unaligned cache line
7. Better readability and extensibility, making it easier to add/remove test items

Signed-off-by: chenrun1 <chenrun1@xiaomi.com>
This commit is contained in:
chenrun1 2023-05-05 18:11:16 +08:00 committed by Xiang Xiao
parent 6f56e69a14
commit f3b1ac089b
3 changed files with 153 additions and 253 deletions

View File

@ -4,7 +4,8 @@
# #
config SYSTEM_CACHESPEED config SYSTEM_CACHESPEED
tristate "CACHE Speed Test" bool "CACHE Speed Test"
depends on (ARCH_ICACHE && ARCH_DCACHE) || (ARMV7M_ICACHE && ARMV7M_DCACHE) || (ARMV8M_ICACHE && ARMV8M_DCACHE)
default n default n
---help--- ---help---
Enable a simple CACHE speed test. Enable a simple CACHE speed test.
@ -26,5 +27,18 @@ config SYSTEM_CACHESPEED_STACKSIZE
int "CACHE speed test stack size" int "CACHE speed test stack size"
default DEFAULT_TASK_STACKSIZE default DEFAULT_TASK_STACKSIZE
config CACHESPEED_MONOTONIC_TIME
bool "Using Clock Gettime"
default n
---help---
When you turn on this option, your test loop will go from 10000 -> 1000 times
Please consider the following scenario:
1. By default (PERFTIME) will have a relatively high precision, but this also
tends to cause failure in getting values on some smaller bytes, so reducing
the precision with this option can make it easier to get data values.
2. whether to compare data with the content of other tools (using CLOCK
GETTIME method), if so, it is recommended to turn on, so as to ensure that
the data precision of both consistent.
endif endif

View File

@ -22,6 +22,10 @@ include $(APPDIR)/Make.defs
#CACHE speed test #CACHE speed test
ifneq ($(CONFIG_CACHESPEED_MONOTONIC_TIME),y)
CFLAGS += -DCACHESPEED_PERFTIME
endif
PROGNAME = $(CONFIG_SYSTEM_CACHESPEED_PROGNAME) PROGNAME = $(CONFIG_SYSTEM_CACHESPEED_PROGNAME)
PRIORITY = $(CONFIG_SYSTEM_CACHESPEED_PRIORITY) PRIORITY = $(CONFIG_SYSTEM_CACHESPEED_PRIORITY)
STACKSIZE = $(CONFIG_SYSTEM_CACHESPEED_STACKSIZE) STACKSIZE = $(CONFIG_SYSTEM_CACHESPEED_STACKSIZE)

View File

@ -24,15 +24,14 @@
#include <nuttx/arch.h> #include <nuttx/arch.h>
#include <nuttx/cache.h> #include <nuttx/cache.h>
#include <nuttx/config.h>
#include <nuttx/irq.h> #include <nuttx/irq.h>
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <unistd.h>
#include <inttypes.h> #include <inttypes.h>
#include <malloc.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
/**************************************************************************** /****************************************************************************
* Pre-processor Definitions * Pre-processor Definitions
@ -40,28 +39,47 @@
#define CACHESPEED_PREFIX "CACHE Speed: " #define CACHESPEED_PREFIX "CACHE Speed: "
#define GET_VALUE(value, type) \ #ifdef CACHESPEED_PERFTIME
#define TIME uint64_t
#define REPEAT_NUM 10000
#define CONVERT(cost) \
do \ do \
{ \ { \
FAR char *ptr; \ struct timespec ts; \
value = (type)strtoul(optarg, &ptr, 0); \ up_perf_convert(cost, &ts); \
if (*ptr != '\0') \ cost = ts.tv_sec * 1000000000 + ts.tv_nsec; \
{ \
printf(CACHESPEED_PREFIX "Parameter error: -%c %s\n", option, optarg); \
show_usage(argv[0], EXIT_FAILURE); \
} \
} while (0) } while (0)
#define TIMESTAMP(x) (x) = up_perf_gettime()
#else
#define TIME time_t
#define REPEAT_NUM 1000
#define CONVERT(cost)
#define TIMESTAMP(x) \
do \
{ \
struct timespec ts; \
clock_gettime(CLOCK_MONOTONIC, &ts); \
x = ts.tv_sec * 1000000000 + ts.tv_nsec; \
} while (0)
#endif
#define GET_DCACHE_LINE up_get_dcache_linesize()
#define GET_ICACHE_LINE up_get_icache_linesize()
#define GET_DCACHE_SIZE up_get_dcache_size()
#define GET_ICACHE_SIZE up_get_icache_size()
/**************************************************************************** /****************************************************************************
* Private Types * Private Types
****************************************************************************/ ****************************************************************************/
struct cachespeed_s struct cachespeed_s
{ {
FAR void *begin; uintptr_t addr;
size_t memset_size; size_t alloc;
uint32_t repeat_num;
size_t opt_size;
}; };
/**************************************************************************** /****************************************************************************
@ -73,274 +91,143 @@ struct cachespeed_s
****************************************************************************/ ****************************************************************************/
/**************************************************************************** /****************************************************************************
* Name: show_usage * Name: setup
****************************************************************************/ ****************************************************************************/
static void show_usage(FAR const char *progname, int exitcode) static void setup(FAR struct cachespeed_s *cs)
{ {
printf("\nUsage: %s -b <address> -o <operation size>" struct mallinfo info = mallinfo();
" -s <memset size>[262144] -n <repeat number>[100] \n",
progname);
printf("\nWhere:\n");
printf(" -b <hex-address> begin memset address.\n");
printf(" -o <operation size> The size of the operation.\n");
printf(" -s <memset size> Execute memset size (in bytes)."
" [default value: 262144].\n");
printf(" -n <repeat num> number of repetitions"
" [default value: 1000].\n");
exit(exitcode);
}
/**************************************************************************** /* Get the currently available memory from the system. We want the
* Name: parse_commandline * memset range to be as large as possible in our tests to ensure
****************************************************************************/ * that the cache is filled with our dirty data
*/
static void parse_commandline(int argc, FAR char **argv, cs->alloc = info.fordblks / 2;
FAR struct cachespeed_s *info) cs->addr = (uintptr_t)malloc(cs->alloc);
{ if (cs->addr == 0)
int option;
memset(info, 0, sizeof(struct cachespeed_s));
info->repeat_num = 1000;
info->memset_size = 262144;
while ((option = getopt(argc, argv, "b:o:s:n:")) != ERROR)
{ {
switch (option) printf(CACHESPEED_PREFIX "Unable to request memory.\n");
{
case 'b':
GET_VALUE(info->begin, void *);
break;
case 'o':
GET_VALUE(info->opt_size, size_t);
break;
case 's':
GET_VALUE(info->memset_size, size_t);
break;
case 'n':
GET_VALUE(info->repeat_num, uint32_t);
if (info->repeat_num == 0)
{
printf(CACHESPEED_PREFIX "<repeat number> must > 0\n");
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
} }
break;
case '?':
printf(CACHESPEED_PREFIX "Unknown option: %c\n", optopt);
show_usage(argv[0], EXIT_FAILURE);
break;
}
}
if (info->opt_size == 0 || info->begin == 0) /* Let's export the test message */
printf(CACHESPEED_PREFIX "address src: %" PRIxPTR "\n", cs->addr);
}
/****************************************************************************
* Name: teardown
****************************************************************************/
static void teardown(FAR struct cachespeed_s *cs)
{
free((void *)cs->addr);
printf(CACHESPEED_PREFIX "Done!\n");
}
/****************************************************************************
* Name: report_line
****************************************************************************/
static void report_line(size_t bytes, TIME cost)
{
double rate;
/* There is a situation: if the time is 0, then the
* calculated speed is wrong.
*/
CONVERT(cost);
if (cost == 0)
{ {
printf(CACHESPEED_PREFIX "Missing required arguments\n"); printf(CACHESPEED_PREFIX "%d bytes cost time too small!\n", bytes);
show_usage(argv[0], EXIT_FAILURE); return;
}
}
/****************************************************************************
* Name: get_perf_time
****************************************************************************/
static uint32_t get_perf_time(void)
{
return up_perf_gettime();
}
/****************************************************************************
* Name: get_time_elaps
****************************************************************************/
static uint32_t get_time_elaps(uint32_t prev_time)
{
return get_perf_time() - prev_time;
}
/****************************************************************************
* Name: print_result
****************************************************************************/
static void print_result(FAR const char *name, size_t bytes,
uint32_t cost_time, uint32_t repeat_cnt)
{
uint32_t rate;
struct timespec ts;
/* Converted to ns */
up_perf_convert(cost_time, &ts);
cost_time = ts.tv_sec * 1000000000 + ts.tv_nsec;
if (cost_time / 1000000 == 0)
{
printf(CACHESPEED_PREFIX
"The total overhead time in millisecond precision"
"is too short.\n");
} }
/* rate = (bytes / 1024) / (cost_time / 1000000000) */ /* rate = Test Data Size / Execution Time */
rate = (uint64_t)bytes * 1000000000 / cost_time / 1024; rate = 1.00 * bytes * REPEAT_NUM / cost;
printf(CACHESPEED_PREFIX
"%s avg = %"PRIu32 " ns\t Rate: %" PRIu32 " KB/s\t" printf("%d Bytes: %4lf, %4llu, %4llu\n\r",
"[cost = %" PRIu32 " ms]\n", bytes, rate, cost / REPEAT_NUM, cost);
name, cost_time / repeat_cnt, rate, cost_time / 1000000);
} }
/**************************************************************************** /****************************************************************************
* Name: dcache_speed_test * Name: test_skeleton
****************************************************************************/ ****************************************************************************/
static void dcache_speed_test(FAR void *begin, size_t memset_size, static void test_skeleton(FAR struct cachespeed_s *cs,
size_t opt_size, uint32_t repeat_cnt) const size_t cache_size,
const size_t cache_line_size, int align,
void (*func)(uintptr_t, uintptr_t),
const char *name)
{ {
size_t total_size = memset_size * repeat_cnt; size_t update_size;
uint32_t invalidate_cost_time; printf("** %s [rate, avg, cost] in nanoseconds(bytes/nesc) %s **\n",
uint32_t clean_cost_time; name, align ? "align" : "unalign");
uint32_t flush_cost_time;
uint32_t cnt;
uint32_t pt;
irqstate_t flags;
/* Initialize a variable */ if (!align)
invalidate_cost_time = 0;
clean_cost_time = 0;
flush_cost_time = 0;
/* Accumulate the time to get the total time */
printf("______dcache performance______\n");
printf("______do all operation______\n");
flags = enter_critical_section();
for (cnt = 0; cnt < repeat_cnt; cnt++)
{ {
uint32_t start_time; update_size = cache_line_size - 1;
memset(begin, 0, memset_size); }
start_time = get_perf_time(); else
up_clean_dcache_all(); {
clean_cost_time += get_time_elaps(start_time); update_size = cache_line_size;
}
for (size_t bytes = cache_line_size;
bytes <= cache_size; bytes += update_size)
{
irqstate_t irq;
TIME start;
TIME end;
TIME cost = 0;
/* Make sure that test with all the contents
* of our address in the cache.
*/
memset(begin, 0, memset_size);
start_time = get_perf_time();
up_flush_dcache_all(); up_flush_dcache_all();
flush_cost_time += get_time_elaps(start_time);
irq = enter_critical_section();
for (int i = 0; i < REPEAT_NUM; i++)
{
memset((void *)cs->addr, 1, cs->alloc);
TIMESTAMP(start);
func(cs->addr, (uintptr_t)(cs->addr + bytes));
TIMESTAMP(end);
cost += end - start;
} }
leave_critical_section(flags); leave_critical_section(irq);
print_result("clean dcache():\t", total_size, clean_cost_time, repeat_cnt); report_line(bytes, cost);
print_result("flush dcache():\t", total_size, flush_cost_time, repeat_cnt);
for (pt = 32; pt <= opt_size; pt <<= 1)
{
total_size = pt * repeat_cnt;
invalidate_cost_time = 0;
clean_cost_time = 0;
flush_cost_time = 0;
if (pt < 1024)
{
printf("______do %" PRIu32 " B operation______\n", pt);
}
else
{
printf("______do %" PRIu32 " KB operation______\n", pt / 1024);
}
flags = enter_critical_section();
for (cnt = 0; cnt < repeat_cnt; cnt++)
{
uint32_t start_time;
memset(begin, 0, memset_size);
start_time = get_perf_time();
up_invalidate_dcache((uintptr_t)begin,
(uintptr_t)((uint8_t *)begin + pt));
invalidate_cost_time += get_time_elaps(start_time);
memset(begin, 0, memset_size);
start_time = get_perf_time();
up_clean_dcache((uintptr_t)begin,
(uintptr_t)((uint8_t *)begin + pt));
clean_cost_time += get_time_elaps(start_time);
memset(begin, 0, memset_size);
start_time = get_perf_time();
up_flush_dcache((uintptr_t)begin,
(uintptr_t)((uint8_t *)begin + pt));
flush_cost_time += get_time_elaps(start_time);
}
leave_critical_section(flags);
print_result("invalidate dcache():\t",
total_size, invalidate_cost_time, repeat_cnt);
print_result("clean dcache():\t", total_size, clean_cost_time,
repeat_cnt);
print_result("flush dcache():\t", total_size, flush_cost_time,
repeat_cnt);
} }
} }
/**************************************************************************** /****************************************************************************
* Name: icache_speed_test * Name: cachespeed_common
****************************************************************************/ ****************************************************************************/
static void icache_speed_test(FAR void *begin, size_t memset_size, static void cachespeed_common(struct cachespeed_s *cs)
size_t opt_size, uint32_t repeat_cnt)
{ {
irqstate_t flags; test_skeleton(cs, GET_DCACHE_SIZE, GET_DCACHE_LINE, 1,
int32_t cnt; up_invalidate_dcache, "dcache invalidate");
uint32_t pt; test_skeleton(cs, GET_DCACHE_SIZE, GET_DCACHE_LINE, 0,
uint32_t invalidate_cost_time = 0; up_invalidate_dcache, "dcache invalidate");
test_skeleton(cs, GET_DCACHE_SIZE, GET_DCACHE_LINE, 1,
/* Accumulate the time to get the total time */ up_clean_dcache, "dcache clean");
test_skeleton(cs, GET_DCACHE_SIZE, GET_DCACHE_LINE, 0,
printf("______icache performance______\n"); up_clean_dcache, "dcache clean");
test_skeleton(cs, GET_DCACHE_SIZE, GET_DCACHE_LINE, 1,
printf("______do all operation______\n"); up_flush_dcache, "dcache flush");
flags = enter_critical_section(); test_skeleton(cs, GET_DCACHE_SIZE, GET_DCACHE_LINE, 0,
for (cnt = 0; cnt < repeat_cnt; cnt++) up_flush_dcache, "dcache flush");
{ test_skeleton(cs, GET_ICACHE_SIZE, GET_ICACHE_LINE, 1,
uint32_t start_time; up_invalidate_icache, "icache invalidate");
memset(begin, 0, memset_size); test_skeleton(cs, GET_ICACHE_SIZE, GET_ICACHE_LINE, 0,
start_time = get_perf_time(); up_invalidate_icache, "icache invalidate");
up_invalidate_icache_all();
invalidate_cost_time += get_time_elaps(start_time);
}
leave_critical_section(flags);
print_result("invalidate dcache():\t",
memset_size * repeat_cnt, invalidate_cost_time, repeat_cnt);
for (pt = 32; pt <= opt_size; pt <<= 1)
{
const size_t total_size = pt * repeat_cnt;
invalidate_cost_time = 0;
if (pt < 1024)
{
printf("______do %" PRIu32 " B operation______\n", pt);
}
else
{
printf("______do %" PRIu32 " KB operation______\n", pt / 1024);
}
flags = enter_critical_section();
for (cnt = 0; cnt < repeat_cnt; cnt++)
{
uint32_t start_time;
memset(begin, 0, memset_size);
start_time = get_perf_time();
up_invalidate_icache((uintptr_t)begin,
(uintptr_t)((uint8_t *)begin + pt));
invalidate_cost_time += get_time_elaps(start_time);
}
leave_critical_section(flags);
print_result("invalidate icache():\t",
total_size, invalidate_cost_time, repeat_cnt);
}
} }
/**************************************************************************** /****************************************************************************
@ -353,19 +240,14 @@ static void icache_speed_test(FAR void *begin, size_t memset_size,
int main(int argc, FAR char *argv[]) int main(int argc, FAR char *argv[])
{ {
struct cachespeed_s cachespeed; struct cachespeed_s cs =
{
/* Setup defaults and parse the command line */ .addr = 0,
.alloc = 0
parse_commandline(argc, argv, &cachespeed); };
/* Perform the dcache and icache test */
dcache_speed_test(cachespeed.begin, cachespeed.memset_size,
cachespeed.opt_size, cachespeed.repeat_num);
icache_speed_test(cachespeed.begin, cachespeed.memset_size,
cachespeed.opt_size, cachespeed.repeat_num);
setup(&cs);
cachespeed_common(&cs);
teardown(&cs);
return 0; return 0;
} }