system/cachespeed:Redesign of the overall test tool.
Compared to the previous version, the following factors have been taken into account in the new version of cachespeed: 1. the user needs to care about getting a piece of available memory (even if this piece of memory may memory stomp during use) 2. High repetition of test code, poor readability, and overall high test coupling 3. Not taking into account the alignment cache line situation 4. When comparing values between different test tools, there is a little error because of the different ways to get the values (different fine reading) Therefore, in the new version according to the above problems have been improved: 1. Provide a "test_skeleton", which contains the overall testing process 2. The user only needs to execute, not to care about how to get the available memory address, and the memory allocated by the program also ensures the memory security and data accuracy 3. The system to obtain the data required for the test reduces the difficulty of use and the possibility of inaccurate results due to data errors. 4. Provide two kinds of precision data results, which can be configured through Kconfig 5. Optimize the output log, now more intuitive and concise, to help the subsequent data organization and observation 6. New test items for aligned/unaligned cache line 7. Better readability and extensibility, making it easier to add/remove test items Signed-off-by: chenrun1 <chenrun1@xiaomi.com>
This commit is contained in:
parent
6f56e69a14
commit
f3b1ac089b
@ -4,7 +4,8 @@
|
||||
#
|
||||
|
||||
config SYSTEM_CACHESPEED
|
||||
tristate "CACHE Speed Test"
|
||||
bool "CACHE Speed Test"
|
||||
depends on (ARCH_ICACHE && ARCH_DCACHE) || (ARMV7M_ICACHE && ARMV7M_DCACHE) || (ARMV8M_ICACHE && ARMV8M_DCACHE)
|
||||
default n
|
||||
---help---
|
||||
Enable a simple CACHE speed test.
|
||||
@ -26,5 +27,18 @@ config SYSTEM_CACHESPEED_STACKSIZE
|
||||
int "CACHE speed test stack size"
|
||||
default DEFAULT_TASK_STACKSIZE
|
||||
|
||||
config CACHESPEED_MONOTONIC_TIME
|
||||
bool "Using Clock Gettime"
|
||||
default n
|
||||
---help---
|
||||
When you turn on this option, your test loop will go from 10000 -> 1000 times
|
||||
Please consider the following scenario:
|
||||
1. By default (PERFTIME) will have a relatively high precision, but this also
|
||||
tends to cause failure in getting values on some smaller bytes, so reducing
|
||||
the precision with this option can make it easier to get data values.
|
||||
2. whether to compare data with the content of other tools (using CLOCK
|
||||
GETTIME method), if so, it is recommended to turn on, so as to ensure that
|
||||
the data precision of both consistent.
|
||||
|
||||
endif
|
||||
|
||||
|
@ -22,10 +22,14 @@ include $(APPDIR)/Make.defs
|
||||
|
||||
#CACHE speed test
|
||||
|
||||
ifneq ($(CONFIG_CACHESPEED_MONOTONIC_TIME),y)
|
||||
CFLAGS += -DCACHESPEED_PERFTIME
|
||||
endif
|
||||
|
||||
PROGNAME = $(CONFIG_SYSTEM_CACHESPEED_PROGNAME)
|
||||
PRIORITY = $(CONFIG_SYSTEM_CACHESPEED_PRIORITY)
|
||||
STACKSIZE = $(CONFIG_SYSTEM_CACHESPEED_STACKSIZE)
|
||||
MODULE = $(CONFIG_SYSTEM_CACHESPEED)
|
||||
MODULE = $(CONFIG_SYSTEM_CACHESPEED)
|
||||
|
||||
MAINSRC = cachespeed_main.c
|
||||
|
||||
|
@ -24,15 +24,14 @@
|
||||
|
||||
#include <nuttx/arch.h>
|
||||
#include <nuttx/cache.h>
|
||||
#include <nuttx/config.h>
|
||||
#include <nuttx/irq.h>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <time.h>
|
||||
#include <unistd.h>
|
||||
#include <inttypes.h>
|
||||
#include <malloc.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <time.h>
|
||||
|
||||
/****************************************************************************
|
||||
* Pre-processor Definitions
|
||||
@ -40,28 +39,47 @@
|
||||
|
||||
#define CACHESPEED_PREFIX "CACHE Speed: "
|
||||
|
||||
#define GET_VALUE(value, type) \
|
||||
#ifdef CACHESPEED_PERFTIME
|
||||
#define TIME uint64_t
|
||||
#define REPEAT_NUM 10000
|
||||
|
||||
#define CONVERT(cost) \
|
||||
do \
|
||||
{ \
|
||||
FAR char *ptr; \
|
||||
value = (type)strtoul(optarg, &ptr, 0); \
|
||||
if (*ptr != '\0') \
|
||||
{ \
|
||||
printf(CACHESPEED_PREFIX "Parameter error: -%c %s\n", option, optarg); \
|
||||
show_usage(argv[0], EXIT_FAILURE); \
|
||||
} \
|
||||
struct timespec ts; \
|
||||
up_perf_convert(cost, &ts); \
|
||||
cost = ts.tv_sec * 1000000000 + ts.tv_nsec; \
|
||||
} while (0)
|
||||
|
||||
#define TIMESTAMP(x) (x) = up_perf_gettime()
|
||||
#else
|
||||
#define TIME time_t
|
||||
#define REPEAT_NUM 1000
|
||||
|
||||
#define CONVERT(cost)
|
||||
|
||||
#define TIMESTAMP(x) \
|
||||
do \
|
||||
{ \
|
||||
struct timespec ts; \
|
||||
clock_gettime(CLOCK_MONOTONIC, &ts); \
|
||||
x = ts.tv_sec * 1000000000 + ts.tv_nsec; \
|
||||
} while (0)
|
||||
#endif
|
||||
|
||||
#define GET_DCACHE_LINE up_get_dcache_linesize()
|
||||
#define GET_ICACHE_LINE up_get_icache_linesize()
|
||||
#define GET_DCACHE_SIZE up_get_dcache_size()
|
||||
#define GET_ICACHE_SIZE up_get_icache_size()
|
||||
|
||||
/****************************************************************************
|
||||
* Private Types
|
||||
****************************************************************************/
|
||||
|
||||
struct cachespeed_s
|
||||
{
|
||||
FAR void *begin;
|
||||
size_t memset_size;
|
||||
uint32_t repeat_num;
|
||||
size_t opt_size;
|
||||
uintptr_t addr;
|
||||
size_t alloc;
|
||||
};
|
||||
|
||||
/****************************************************************************
|
||||
@ -73,274 +91,143 @@ struct cachespeed_s
|
||||
****************************************************************************/
|
||||
|
||||
/****************************************************************************
|
||||
* Name: show_usage
|
||||
* Name: setup
|
||||
****************************************************************************/
|
||||
|
||||
static void show_usage(FAR const char *progname, int exitcode)
|
||||
static void setup(FAR struct cachespeed_s *cs)
|
||||
{
|
||||
printf("\nUsage: %s -b <address> -o <operation size>"
|
||||
" -s <memset size>[262144] -n <repeat number>[100] \n",
|
||||
progname);
|
||||
printf("\nWhere:\n");
|
||||
printf(" -b <hex-address> begin memset address.\n");
|
||||
printf(" -o <operation size> The size of the operation.\n");
|
||||
printf(" -s <memset size> Execute memset size (in bytes)."
|
||||
" [default value: 262144].\n");
|
||||
printf(" -n <repeat num> number of repetitions"
|
||||
" [default value: 1000].\n");
|
||||
exit(exitcode);
|
||||
}
|
||||
struct mallinfo info = mallinfo();
|
||||
|
||||
/****************************************************************************
|
||||
* Name: parse_commandline
|
||||
****************************************************************************/
|
||||
/* Get the currently available memory from the system. We want the
|
||||
* memset range to be as large as possible in our tests to ensure
|
||||
* that the cache is filled with our dirty data
|
||||
*/
|
||||
|
||||
static void parse_commandline(int argc, FAR char **argv,
|
||||
FAR struct cachespeed_s *info)
|
||||
{
|
||||
int option;
|
||||
|
||||
memset(info, 0, sizeof(struct cachespeed_s));
|
||||
info->repeat_num = 1000;
|
||||
info->memset_size = 262144;
|
||||
|
||||
while ((option = getopt(argc, argv, "b:o:s:n:")) != ERROR)
|
||||
cs->alloc = info.fordblks / 2;
|
||||
cs->addr = (uintptr_t)malloc(cs->alloc);
|
||||
if (cs->addr == 0)
|
||||
{
|
||||
switch (option)
|
||||
{
|
||||
case 'b':
|
||||
GET_VALUE(info->begin, void *);
|
||||
break;
|
||||
case 'o':
|
||||
GET_VALUE(info->opt_size, size_t);
|
||||
break;
|
||||
case 's':
|
||||
GET_VALUE(info->memset_size, size_t);
|
||||
break;
|
||||
case 'n':
|
||||
GET_VALUE(info->repeat_num, uint32_t);
|
||||
if (info->repeat_num == 0)
|
||||
{
|
||||
printf(CACHESPEED_PREFIX "<repeat number> must > 0\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
break;
|
||||
case '?':
|
||||
printf(CACHESPEED_PREFIX "Unknown option: %c\n", optopt);
|
||||
show_usage(argv[0], EXIT_FAILURE);
|
||||
break;
|
||||
}
|
||||
printf(CACHESPEED_PREFIX "Unable to request memory.\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
if (info->opt_size == 0 || info->begin == 0)
|
||||
{
|
||||
printf(CACHESPEED_PREFIX "Missing required arguments\n");
|
||||
show_usage(argv[0], EXIT_FAILURE);
|
||||
}
|
||||
/* Let's export the test message */
|
||||
|
||||
printf(CACHESPEED_PREFIX "address src: %" PRIxPTR "\n", cs->addr);
|
||||
}
|
||||
|
||||
/****************************************************************************
|
||||
* Name: get_perf_time
|
||||
* Name: teardown
|
||||
****************************************************************************/
|
||||
|
||||
static uint32_t get_perf_time(void)
|
||||
static void teardown(FAR struct cachespeed_s *cs)
|
||||
{
|
||||
return up_perf_gettime();
|
||||
free((void *)cs->addr);
|
||||
printf(CACHESPEED_PREFIX "Done!\n");
|
||||
}
|
||||
|
||||
/****************************************************************************
|
||||
* Name: get_time_elaps
|
||||
* Name: report_line
|
||||
****************************************************************************/
|
||||
|
||||
static uint32_t get_time_elaps(uint32_t prev_time)
|
||||
static void report_line(size_t bytes, TIME cost)
|
||||
{
|
||||
return get_perf_time() - prev_time;
|
||||
}
|
||||
double rate;
|
||||
|
||||
/****************************************************************************
|
||||
* Name: print_result
|
||||
****************************************************************************/
|
||||
/* There is a situation: if the time is 0, then the
|
||||
* calculated speed is wrong.
|
||||
*/
|
||||
|
||||
static void print_result(FAR const char *name, size_t bytes,
|
||||
uint32_t cost_time, uint32_t repeat_cnt)
|
||||
{
|
||||
uint32_t rate;
|
||||
struct timespec ts;
|
||||
CONVERT(cost);
|
||||
|
||||
/* Converted to ns */
|
||||
|
||||
up_perf_convert(cost_time, &ts);
|
||||
cost_time = ts.tv_sec * 1000000000 + ts.tv_nsec;
|
||||
|
||||
if (cost_time / 1000000 == 0)
|
||||
if (cost == 0)
|
||||
{
|
||||
printf(CACHESPEED_PREFIX
|
||||
"The total overhead time in millisecond precision"
|
||||
"is too short.\n");
|
||||
printf(CACHESPEED_PREFIX "%d bytes cost time too small!\n", bytes);
|
||||
return;
|
||||
}
|
||||
|
||||
/* rate = (bytes / 1024) / (cost_time / 1000000000) */
|
||||
/* rate = Test Data Size / Execution Time */
|
||||
|
||||
rate = (uint64_t)bytes * 1000000000 / cost_time / 1024;
|
||||
printf(CACHESPEED_PREFIX
|
||||
"%s avg = %"PRIu32 " ns\t Rate: %" PRIu32 " KB/s\t"
|
||||
"[cost = %" PRIu32 " ms]\n",
|
||||
name, cost_time / repeat_cnt, rate, cost_time / 1000000);
|
||||
rate = 1.00 * bytes * REPEAT_NUM / cost;
|
||||
|
||||
printf("%d Bytes: %4lf, %4llu, %4llu\n\r",
|
||||
bytes, rate, cost / REPEAT_NUM, cost);
|
||||
}
|
||||
|
||||
/****************************************************************************
|
||||
* Name: dcache_speed_test
|
||||
* Name: test_skeleton
|
||||
****************************************************************************/
|
||||
|
||||
static void dcache_speed_test(FAR void *begin, size_t memset_size,
|
||||
size_t opt_size, uint32_t repeat_cnt)
|
||||
static void test_skeleton(FAR struct cachespeed_s *cs,
|
||||
const size_t cache_size,
|
||||
const size_t cache_line_size, int align,
|
||||
void (*func)(uintptr_t, uintptr_t),
|
||||
const char *name)
|
||||
{
|
||||
size_t total_size = memset_size * repeat_cnt;
|
||||
uint32_t invalidate_cost_time;
|
||||
uint32_t clean_cost_time;
|
||||
uint32_t flush_cost_time;
|
||||
uint32_t cnt;
|
||||
uint32_t pt;
|
||||
irqstate_t flags;
|
||||
size_t update_size;
|
||||
printf("** %s [rate, avg, cost] in nanoseconds(bytes/nesc) %s **\n",
|
||||
name, align ? "align" : "unalign");
|
||||
|
||||
/* Initialize a variable */
|
||||
|
||||
invalidate_cost_time = 0;
|
||||
clean_cost_time = 0;
|
||||
flush_cost_time = 0;
|
||||
|
||||
/* Accumulate the time to get the total time */
|
||||
|
||||
printf("______dcache performance______\n");
|
||||
|
||||
printf("______do all operation______\n");
|
||||
flags = enter_critical_section();
|
||||
for (cnt = 0; cnt < repeat_cnt; cnt++)
|
||||
if (!align)
|
||||
{
|
||||
uint32_t start_time;
|
||||
memset(begin, 0, memset_size);
|
||||
start_time = get_perf_time();
|
||||
up_clean_dcache_all();
|
||||
clean_cost_time += get_time_elaps(start_time);
|
||||
update_size = cache_line_size - 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
update_size = cache_line_size;
|
||||
}
|
||||
|
||||
for (size_t bytes = cache_line_size;
|
||||
bytes <= cache_size; bytes += update_size)
|
||||
{
|
||||
irqstate_t irq;
|
||||
TIME start;
|
||||
TIME end;
|
||||
TIME cost = 0;
|
||||
|
||||
/* Make sure that test with all the contents
|
||||
* of our address in the cache.
|
||||
*/
|
||||
|
||||
memset(begin, 0, memset_size);
|
||||
start_time = get_perf_time();
|
||||
up_flush_dcache_all();
|
||||
flush_cost_time += get_time_elaps(start_time);
|
||||
}
|
||||
|
||||
leave_critical_section(flags);
|
||||
print_result("clean dcache():\t", total_size, clean_cost_time, repeat_cnt);
|
||||
print_result("flush dcache():\t", total_size, flush_cost_time, repeat_cnt);
|
||||
|
||||
for (pt = 32; pt <= opt_size; pt <<= 1)
|
||||
{
|
||||
total_size = pt * repeat_cnt;
|
||||
invalidate_cost_time = 0;
|
||||
clean_cost_time = 0;
|
||||
flush_cost_time = 0;
|
||||
|
||||
if (pt < 1024)
|
||||
irq = enter_critical_section();
|
||||
for (int i = 0; i < REPEAT_NUM; i++)
|
||||
{
|
||||
printf("______do %" PRIu32 " B operation______\n", pt);
|
||||
}
|
||||
else
|
||||
{
|
||||
printf("______do %" PRIu32 " KB operation______\n", pt / 1024);
|
||||
memset((void *)cs->addr, 1, cs->alloc);
|
||||
TIMESTAMP(start);
|
||||
func(cs->addr, (uintptr_t)(cs->addr + bytes));
|
||||
TIMESTAMP(end);
|
||||
cost += end - start;
|
||||
}
|
||||
|
||||
flags = enter_critical_section();
|
||||
for (cnt = 0; cnt < repeat_cnt; cnt++)
|
||||
{
|
||||
uint32_t start_time;
|
||||
memset(begin, 0, memset_size);
|
||||
start_time = get_perf_time();
|
||||
up_invalidate_dcache((uintptr_t)begin,
|
||||
(uintptr_t)((uint8_t *)begin + pt));
|
||||
invalidate_cost_time += get_time_elaps(start_time);
|
||||
|
||||
memset(begin, 0, memset_size);
|
||||
start_time = get_perf_time();
|
||||
up_clean_dcache((uintptr_t)begin,
|
||||
(uintptr_t)((uint8_t *)begin + pt));
|
||||
clean_cost_time += get_time_elaps(start_time);
|
||||
|
||||
memset(begin, 0, memset_size);
|
||||
start_time = get_perf_time();
|
||||
up_flush_dcache((uintptr_t)begin,
|
||||
(uintptr_t)((uint8_t *)begin + pt));
|
||||
flush_cost_time += get_time_elaps(start_time);
|
||||
}
|
||||
|
||||
leave_critical_section(flags);
|
||||
print_result("invalidate dcache():\t",
|
||||
total_size, invalidate_cost_time, repeat_cnt);
|
||||
print_result("clean dcache():\t", total_size, clean_cost_time,
|
||||
repeat_cnt);
|
||||
print_result("flush dcache():\t", total_size, flush_cost_time,
|
||||
repeat_cnt);
|
||||
leave_critical_section(irq);
|
||||
report_line(bytes, cost);
|
||||
}
|
||||
}
|
||||
|
||||
/****************************************************************************
|
||||
* Name: icache_speed_test
|
||||
* Name: cachespeed_common
|
||||
****************************************************************************/
|
||||
|
||||
static void icache_speed_test(FAR void *begin, size_t memset_size,
|
||||
size_t opt_size, uint32_t repeat_cnt)
|
||||
static void cachespeed_common(struct cachespeed_s *cs)
|
||||
{
|
||||
irqstate_t flags;
|
||||
int32_t cnt;
|
||||
uint32_t pt;
|
||||
uint32_t invalidate_cost_time = 0;
|
||||
|
||||
/* Accumulate the time to get the total time */
|
||||
|
||||
printf("______icache performance______\n");
|
||||
|
||||
printf("______do all operation______\n");
|
||||
flags = enter_critical_section();
|
||||
for (cnt = 0; cnt < repeat_cnt; cnt++)
|
||||
{
|
||||
uint32_t start_time;
|
||||
memset(begin, 0, memset_size);
|
||||
start_time = get_perf_time();
|
||||
up_invalidate_icache_all();
|
||||
invalidate_cost_time += get_time_elaps(start_time);
|
||||
}
|
||||
|
||||
leave_critical_section(flags);
|
||||
print_result("invalidate dcache():\t",
|
||||
memset_size * repeat_cnt, invalidate_cost_time, repeat_cnt);
|
||||
|
||||
for (pt = 32; pt <= opt_size; pt <<= 1)
|
||||
{
|
||||
const size_t total_size = pt * repeat_cnt;
|
||||
invalidate_cost_time = 0;
|
||||
if (pt < 1024)
|
||||
{
|
||||
printf("______do %" PRIu32 " B operation______\n", pt);
|
||||
}
|
||||
else
|
||||
{
|
||||
printf("______do %" PRIu32 " KB operation______\n", pt / 1024);
|
||||
}
|
||||
|
||||
flags = enter_critical_section();
|
||||
for (cnt = 0; cnt < repeat_cnt; cnt++)
|
||||
{
|
||||
uint32_t start_time;
|
||||
memset(begin, 0, memset_size);
|
||||
start_time = get_perf_time();
|
||||
up_invalidate_icache((uintptr_t)begin,
|
||||
(uintptr_t)((uint8_t *)begin + pt));
|
||||
invalidate_cost_time += get_time_elaps(start_time);
|
||||
}
|
||||
|
||||
leave_critical_section(flags);
|
||||
print_result("invalidate icache():\t",
|
||||
total_size, invalidate_cost_time, repeat_cnt);
|
||||
}
|
||||
test_skeleton(cs, GET_DCACHE_SIZE, GET_DCACHE_LINE, 1,
|
||||
up_invalidate_dcache, "dcache invalidate");
|
||||
test_skeleton(cs, GET_DCACHE_SIZE, GET_DCACHE_LINE, 0,
|
||||
up_invalidate_dcache, "dcache invalidate");
|
||||
test_skeleton(cs, GET_DCACHE_SIZE, GET_DCACHE_LINE, 1,
|
||||
up_clean_dcache, "dcache clean");
|
||||
test_skeleton(cs, GET_DCACHE_SIZE, GET_DCACHE_LINE, 0,
|
||||
up_clean_dcache, "dcache clean");
|
||||
test_skeleton(cs, GET_DCACHE_SIZE, GET_DCACHE_LINE, 1,
|
||||
up_flush_dcache, "dcache flush");
|
||||
test_skeleton(cs, GET_DCACHE_SIZE, GET_DCACHE_LINE, 0,
|
||||
up_flush_dcache, "dcache flush");
|
||||
test_skeleton(cs, GET_ICACHE_SIZE, GET_ICACHE_LINE, 1,
|
||||
up_invalidate_icache, "icache invalidate");
|
||||
test_skeleton(cs, GET_ICACHE_SIZE, GET_ICACHE_LINE, 0,
|
||||
up_invalidate_icache, "icache invalidate");
|
||||
}
|
||||
|
||||
/****************************************************************************
|
||||
@ -353,19 +240,14 @@ static void icache_speed_test(FAR void *begin, size_t memset_size,
|
||||
|
||||
int main(int argc, FAR char *argv[])
|
||||
{
|
||||
struct cachespeed_s cachespeed;
|
||||
|
||||
/* Setup defaults and parse the command line */
|
||||
|
||||
parse_commandline(argc, argv, &cachespeed);
|
||||
|
||||
/* Perform the dcache and icache test */
|
||||
|
||||
dcache_speed_test(cachespeed.begin, cachespeed.memset_size,
|
||||
cachespeed.opt_size, cachespeed.repeat_num);
|
||||
|
||||
icache_speed_test(cachespeed.begin, cachespeed.memset_size,
|
||||
cachespeed.opt_size, cachespeed.repeat_num);
|
||||
struct cachespeed_s cs =
|
||||
{
|
||||
.addr = 0,
|
||||
.alloc = 0
|
||||
};
|
||||
|
||||
setup(&cs);
|
||||
cachespeed_common(&cs);
|
||||
teardown(&cs);
|
||||
return 0;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user