From 1de538282ec4c25bea8721770feb3b96a3f25a27 Mon Sep 17 00:00:00 2001 From: buxiasen Date: Wed, 31 Jul 2024 18:04:58 +0800 Subject: [PATCH] tools/parsememdump: speed up use mem addr cache and multi-thread Signed-off-by: buxiasen --- tools/parsememdump.py | 131 ++++++++++++++++++++++++++++++++---------- 1 file changed, 101 insertions(+), 30 deletions(-) diff --git a/tools/parsememdump.py b/tools/parsememdump.py index c807b0253c..aabcae21ac 100755 --- a/tools/parsememdump.py +++ b/tools/parsememdump.py @@ -19,6 +19,7 @@ import argparse import os import re +from concurrent.futures import ThreadPoolExecutor program_description = """ This program will help you analyze memdump log files, @@ -32,28 +33,28 @@ pid size seq addr mem class dump_line: def __init__(self, line_str): self.mem = [] - self.err = 0 + self.err = False self.cnt = 1 tmp = re.search("( \d+ )", line_str) if tmp is None: - self.err = 1 + self.err = True return self.pid = int(tmp.group(0)[1:]) tmp = re.search("( \d+ )", line_str[tmp.span()[1] :]) if tmp is None: - self.err = 1 + self.err = True return self.size = int(tmp.group(0)[1:]) tmp = re.search("( \d+ )", line_str[tmp.span()[1] :]) if tmp is None: - self.err = 1 + self.err = True return self.seq = int(tmp.group(0)[1:]) tmp = re.findall("0x([0-9a-fA-F]+)", line_str[tmp.span()[1] :]) self.addr = tmp[0] - for str in tmp[1:]: - self.mem.append(str) + for s in tmp[1:]: + self.mem.append(s) class log_output: @@ -82,16 +83,72 @@ def compare_dump_line(dump_line_list, str): return find = 0 - for tmp in dump_line_list: - if tmp.mem == t.mem and tmp.size == t.size and t.mem != []: + for line in dump_line_list: + if line.mem == t.mem and line.size == t.size and t.mem != []: find = 1 - tmp.cnt += 1 + line.cnt += 1 break if find == 0: dump_line_list.append(t) +def multi_thread_executer(cmd): + result = "" + p = os.popen(cmd, "r") + while True: + line = p.readline() + if line == "": + break + result += f" {line}" + return result + + +class addr2line_db: + def __init__(self, mem=[], ncpu=1, prefix="", file="nuttx.elf", batch_max=1): + self.mem = mem + self.ncpu = ncpu + self.db = {} + self.prefix = prefix + self.file = file + self.batch_max = batch_max + self.parse_all() + + def split_array(self, arr, num_splits): + k, m = divmod(len(arr), num_splits) + return [ + arr[i * k + min(i, m) : (i + 1) * k + min(i + 1, m)] + for i in range(num_splits) + ] + + def parse_all(self): + cmds = [] + batch_cnt = len(self.mem) // self.ncpu + if batch_cnt > self.batch_max: + batch_cnt = self.batch_max + segments = self.split_array(self.mem, batch_cnt) + + for seg in segments: + addrs = " ".join(seg) + cmds.append(f"{self.prefix}addr2line -Cfe {self.file} {addrs}") + + with ThreadPoolExecutor(max_workers=self.ncpu) as executor: + for keys, v in zip(segments, executor.map(multi_thread_executer, cmds)): + lines = v.split("\n") + values = [ + lines[i] + "\n" + lines[i + 1] + "\n" + for i in range(0, len(lines) - 1, 2) + ] + for i in range(len(keys)): + self.db[keys[i]] = values[i] + + def parse(self, mem): + if mem in self.db.keys(): + return self.db[mem] + else: + return "" + + if __name__ == "__main__": parser = argparse.ArgumentParser( description=program_description, formatter_class=argparse.RawTextHelpFormatter @@ -100,7 +157,14 @@ if __name__ == "__main__": parser.add_argument( "-p", "--prefix", help="addr2line program prefix", nargs=1, default="" ) - + parser.add_argument( + "-j", + "--ncpu", + help="multi thread count, default all", + type=int, + default=0, + required=False, + ) parser.add_argument( "-e", "--elffile", @@ -109,22 +173,22 @@ if __name__ == "__main__": nargs=1, ) - parser.add_argument("-o", "--output", help="output file,default output shell") + parser.add_argument("-o", "--output", help="output file, default output shell") args = parser.parse_args() dump_file = open("%s" % args.file[0], "r") - list = [] + lines = [] while 1: - str = dump_file.readline() - if str == "": + line = dump_file.readline() + if line == "": break - compare_dump_line(list, str) + compare_dump_line(lines, line) dump_file.close() - list.sort(key=lambda x: x.cnt, reverse=True) + lines.sort(key=lambda x: x.cnt, reverse=True) log = log_output(args) total_dir = {} - for t in list: + for t in lines: if t.pid in total_dir: total_dir[t.pid] += t.size * t.cnt else: @@ -139,24 +203,31 @@ if __name__ == "__main__": log.output("all used memory %-6d\n" % (total_size)) log.output("cnt size pid addr mem\n") - for t in list: - memstr = "" + + mems = [] + for line in lines: + if line.mem == []: + continue + for mem in line.mem: + if mem not in mems: + mems.append(mem) + + ncpu = args.ncpu + if ncpu == 0: + ncpu = os.cpu_count() + + db = addr2line_db(mem=mems, ncpu=ncpu, prefix=args.prefix[0], file=args.elffile[0]) + for t in lines: + addr2line_str = "" log.output("%-4d %-6d %-3d %s " % (t.cnt, t.size, t.pid, t.addr)) if t.mem == []: log.output("\n") continue for mem in t.mem: log.output("%s " % mem) - memstr += mem + " " + addr2line_str += db.parse(mem) + log.output("\n") + if addr2line_str != "": + log.output(addr2line_str) log.output("\n") - if args.elffile != "": - addr2line_file = os.popen( - "%saddr2line -Cfe %s %s" % (args.prefix[0], args.elffile[0], memstr), "r" - ) - while 1: - add2line_str = addr2line_file.readline() - if add2line_str == "": - break - log.output(" " + add2line_str) - log.output("\n" + add2line_str) log.__del__()