nuttx/tools/parsememdump.py
buxiasen 1de538282e tools/parsememdump: speed up use mem addr cache and multi-thread
Signed-off-by: buxiasen <buxiasen@xiaomi.com>
2024-08-02 13:40:14 +08:00

234 lines
6.6 KiB
Python
Executable File

#!/usr/bin/env python3
# tools/parsememdump.py
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership. The
# ASF licenses this file to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance with the
# License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
#
import argparse
import os
import re
from concurrent.futures import ThreadPoolExecutor
program_description = """
This program will help you analyze memdump log files,
analyze the number of occurrences of backtrace,
and output stack information
memdump log files need this format:
pid size seq addr mem
"""
class dump_line:
def __init__(self, line_str):
self.mem = []
self.err = False
self.cnt = 1
tmp = re.search("( \d+ )", line_str)
if tmp is None:
self.err = True
return
self.pid = int(tmp.group(0)[1:])
tmp = re.search("( \d+ )", line_str[tmp.span()[1] :])
if tmp is None:
self.err = True
return
self.size = int(tmp.group(0)[1:])
tmp = re.search("( \d+ )", line_str[tmp.span()[1] :])
if tmp is None:
self.err = True
return
self.seq = int(tmp.group(0)[1:])
tmp = re.findall("0x([0-9a-fA-F]+)", line_str[tmp.span()[1] :])
self.addr = tmp[0]
for s in tmp[1:]:
self.mem.append(s)
class log_output:
def __init__(self, args):
if args.output:
self.file = open(args.output, "w")
def output(self, str):
if hasattr(self, "file"):
self.file.write(str)
else:
print(str, end="")
def __del__(self):
if hasattr(self, "file"):
self.file.close()
def compare_dump_line(dump_line_list, str):
t = dump_line(str)
if t.err:
return
if dump_line_list.__len__() == 0:
dump_line_list.append(t)
return
find = 0
for line in dump_line_list:
if line.mem == t.mem and line.size == t.size and t.mem != []:
find = 1
line.cnt += 1
break
if find == 0:
dump_line_list.append(t)
def multi_thread_executer(cmd):
result = ""
p = os.popen(cmd, "r")
while True:
line = p.readline()
if line == "":
break
result += f" {line}"
return result
class addr2line_db:
def __init__(self, mem=[], ncpu=1, prefix="", file="nuttx.elf", batch_max=1):
self.mem = mem
self.ncpu = ncpu
self.db = {}
self.prefix = prefix
self.file = file
self.batch_max = batch_max
self.parse_all()
def split_array(self, arr, num_splits):
k, m = divmod(len(arr), num_splits)
return [
arr[i * k + min(i, m) : (i + 1) * k + min(i + 1, m)]
for i in range(num_splits)
]
def parse_all(self):
cmds = []
batch_cnt = len(self.mem) // self.ncpu
if batch_cnt > self.batch_max:
batch_cnt = self.batch_max
segments = self.split_array(self.mem, batch_cnt)
for seg in segments:
addrs = " ".join(seg)
cmds.append(f"{self.prefix}addr2line -Cfe {self.file} {addrs}")
with ThreadPoolExecutor(max_workers=self.ncpu) as executor:
for keys, v in zip(segments, executor.map(multi_thread_executer, cmds)):
lines = v.split("\n")
values = [
lines[i] + "\n" + lines[i + 1] + "\n"
for i in range(0, len(lines) - 1, 2)
]
for i in range(len(keys)):
self.db[keys[i]] = values[i]
def parse(self, mem):
if mem in self.db.keys():
return self.db[mem]
else:
return ""
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description=program_description, formatter_class=argparse.RawTextHelpFormatter
)
parser.add_argument("-f", "--file", help="dump file", nargs=1, required=True)
parser.add_argument(
"-p", "--prefix", help="addr2line program prefix", nargs=1, default=""
)
parser.add_argument(
"-j",
"--ncpu",
help="multi thread count, default all",
type=int,
default=0,
required=False,
)
parser.add_argument(
"-e",
"--elffile",
default="",
help="elf file,use it can output stack info",
nargs=1,
)
parser.add_argument("-o", "--output", help="output file, default output shell")
args = parser.parse_args()
dump_file = open("%s" % args.file[0], "r")
lines = []
while 1:
line = dump_file.readline()
if line == "":
break
compare_dump_line(lines, line)
dump_file.close()
lines.sort(key=lambda x: x.cnt, reverse=True)
log = log_output(args)
total_dir = {}
for t in lines:
if t.pid in total_dir:
total_dir[t.pid] += t.size * t.cnt
else:
total_dir.setdefault(t.pid, t.size * t.cnt)
log.output("total memory used for ervey pid\n")
log.output("pid total size\n")
total_size = 0
for pid, size in sorted(total_dir.items(), key=lambda x: x[1]):
log.output("%-3d %-6d\n" % (pid, size))
total_size += size
log.output("all used memory %-6d\n" % (total_size))
log.output("cnt size pid addr mem\n")
mems = []
for line in lines:
if line.mem == []:
continue
for mem in line.mem:
if mem not in mems:
mems.append(mem)
ncpu = args.ncpu
if ncpu == 0:
ncpu = os.cpu_count()
db = addr2line_db(mem=mems, ncpu=ncpu, prefix=args.prefix[0], file=args.elffile[0])
for t in lines:
addr2line_str = ""
log.output("%-4d %-6d %-3d %s " % (t.cnt, t.size, t.pid, t.addr))
if t.mem == []:
log.output("\n")
continue
for mem in t.mem:
log.output("%s " % mem)
addr2line_str += db.parse(mem)
log.output("\n")
if addr2line_str != "":
log.output(addr2line_str)
log.output("\n")
log.__del__()