lint licensing

This commit is contained in:
Brennan Ashton 2021-04-04 17:09:12 -07:00 committed by Xiang Xiao
parent 6b4e7d0fe7
commit 3a97f0e5bd
2 changed files with 137 additions and 93 deletions

View File

@ -25,22 +25,30 @@ apache = r"""
* *
****************************************************************************/""" ****************************************************************************/"""
def apachize(path, header): def apachize(path, header):
relpath = os.path.relpath(path, os.environ['TOPDIR']) relpath = os.path.relpath(path, os.environ["TOPDIR"])
header = re.sub('PATH', relpath, header) header = re.sub("PATH", relpath, header)
with open(path) as f: with open(path) as f:
s = f.read() s = f.read()
s = re.sub('(?i)/\*\*\*.+?(?:Copyright).+?\*\*\*+/', header, s, 1, re.DOTALL) s = re.sub("(?i)/\*\*\*.+?(?:Copyright).+?\*\*\*+/", header, s, 1, re.DOTALL)
print(s) print(s)
if (len(sys.argv) != 2):
if len(sys.argv) != 2:
print("Usage: ./apachize.py <file>", file=sys.stderr) print("Usage: ./apachize.py <file>", file=sys.stderr)
print("This will replace the license header of the passed file to that of Apache 2.0 and print it to stdout", file = sys.sterr) print(
"This will replace the license header of the passed file to that of Apache 2.0 and print it to stdout",
file=sys.sterr,
)
sys.exit(2) sys.exit(2)
if (not 'TOPDIR' in os.environ): if not "TOPDIR" in os.environ:
print("Please define the TOPDIR environment variable to the full path to nuttx/ root", file = sys.stderr) print(
"Please define the TOPDIR environment variable to the full path to nuttx/ root",
file=sys.stderr,
)
sys.exit(2) sys.exit(2)
apachize(sys.argv[1], apache) apachize(sys.argv[1], apache)

View File

@ -34,146 +34,167 @@ author_mappings_json = None
verbose_level = 0 verbose_level = 0
color = True color = True
def colored(s, c): def colored(s, c):
if color: if color:
return termcolor.colored(s, c) return termcolor.colored(s, c)
else: else:
return s return s
def commit_attributions(c): def commit_attributions(c):
regex = re.compile('(?i)(?:by|from|author|Co-authored-by):? +(.+)') regex = re.compile("(?i)(?:by|from|author|Co-authored-by):? +(.+)")
return re.findall(regex, c['message']) + re.findall(regex,c['body']) return re.findall(regex, c["message"]) + re.findall(regex, c["body"])
def get_headers(s): def get_headers(s):
return re.findall('(?i)/\*\*\*.+?(?:Copyright).+?\*\*\*+/', s, re.DOTALL) return re.findall("(?i)/\*\*\*.+?(?:Copyright).+?\*\*\*+/", s, re.DOTALL)
def get_file(blob): def get_file(blob):
try: try:
return subprocess.check_output(['git','cat-file','-p',blob], stderr=subprocess.DEVNULL).decode() return subprocess.check_output(
["git", "cat-file", "-p", blob], stderr=subprocess.DEVNULL
).decode()
except subprocess.CalledProcessError as err: except subprocess.CalledProcessError as err:
return None return None
def header_authors(header): def header_authors(header):
results = re.findall('[Aa]uthors?: +(.+?) *(?:Redistribution)', header, re.DOTALL) results = re.findall("[Aa]uthors?: +(.+?) *(?:Redistribution)", header, re.DOTALL)
results = [re.split('\n[ *]+',result) for result in results] results = [re.split("\n[ *]+", result) for result in results]
results = sum(results, []) # flatten results = sum(results, []) # flatten
results = [re.sub('[Cc]opyright:?( ?.[Cc].)? *([12][0-9]{3}[,-]? ?)','',result) for result in results] results = [
results = list(filter(lambda s: s != '', results)) # remove empty strings re.sub("[Cc]opyright:?( ?.[Cc].)? *([12][0-9]{3}[,-]? ?)", "", result)
for result in results
]
results = list(filter(lambda s: s != "", results)) # remove empty strings
return results return results
# Search for an author name in Apache's committers/non-committers # Search for an author name in Apache's committers/non-committers
# database. It will return (apacheID,name) if there's a match or # database. It will return (apacheID,name) if there's a match or
# None if not. apacheID might be None if there's no Apache ID # None if not. apacheID might be None if there's no Apache ID
# for author # for author
def search_for_cla(name): def search_for_cla(name):
for k,v in committers_json['committers'].items(): for k, v in committers_json["committers"].items():
if (v == name): if v == name:
return (k, v) return (k, v)
if name in non_committers_json['non_committers']: if name in non_committers_json["non_committers"]:
return (None, name) return (None, name)
return None return None
# Returns the same as above, but this takes an author # Returns the same as above, but this takes an author
# (which may include an email include an email used # (which may include an email include an email used
# to look for alternative author names for this person) # to look for alternative author names for this person)
def author_has_cla(author): def author_has_cla(author):
if ('@' in author): if "@" in author:
matches = re.match('^(.+?)(?: +([^ ]+@[^ ]+ *))$', author) matches = re.match("^(.+?)(?: +([^ ]+@[^ ]+ *))$", author)
if (not matches): if not matches:
return None # found an '@' but it wasn't an email, so this is most likely not really an author return None # found an '@' but it wasn't an email, so this is most likely not really an author
name = matches.group(1) name = matches.group(1)
email = matches.group(2).lstrip('<').rstrip('>') email = matches.group(2).lstrip("<").rstrip(">")
else: else:
name = author.strip() name = author.strip()
email = None email = None
vvvprint('name: %s email: %s' % (name,email if email else '?')) vvvprint("name: %s email: %s" % (name, email if email else "?"))
# first look for name directly # first look for name directly
result = search_for_cla(name) result = search_for_cla(name)
if (result): if result:
return result return result
# otherwise, get all available alternative names for author # otherwise, get all available alternative names for author
# and look for each # and look for each
if (email and (email in author_mappings_json)): if email and (email in author_mappings_json):
result = search_for_cla(author_mappings_json[email]) result = search_for_cla(author_mappings_json[email])
if (result): if result:
return result return result
# Nothing matched # Nothing matched
return None return None
def header_copyrights(header): def header_copyrights(header):
results = re.findall(' \* *[Cc]opyright:?(?: ?.[Cc].)? *(?:[12][0-9]{3}[,-]? ?)* *(.+)', header) results = re.findall(
return [re.sub('(. )?[Aa]ll rights reserved.?','',result) for result in results] " \* *[Cc]opyright:?(?: ?.[Cc].)? *(?:[12][0-9]{3}[,-]? ?)* *(.+)", header
)
return [re.sub("(. )?[Aa]ll rights reserved.?", "", result) for result in results]
def report_cla(author): def report_cla(author):
cla = author_has_cla(author) cla = author_has_cla(author)
if cla: if cla:
(apacheid, name) = cla (apacheid, name) = cla
print(colored('','green'), end = ' ') print(colored("", "green"), end=" ")
else: else:
apacheid = None apacheid = None
print(colored('','red'), end = ' ') print(colored("", "red"), end=" ")
if apacheid: if apacheid:
print('%s (ID: %s)' % (author, apacheid)) print("%s (ID: %s)" % (author, apacheid))
else: else:
print(author) print(author)
def analyze(j): def analyze(j):
complete_attributions = set() complete_attributions = set()
complete_authors = set() complete_authors = set()
complete_copyrights = set() complete_copyrights = set()
vprint('file has %i commits' % len(j)) vprint("file has %i commits" % len(j))
for commit in j: for commit in j:
authors = set() authors = set()
vprint(colored('-','yellow')) vprint(colored("-", "yellow"))
vprint(colored('commit: ', 'green') + commit['commit']) vprint(colored("commit: ", "green") + commit["commit"])
vprint(colored('blob: ', 'green') + commit['blob']) vprint(colored("blob: ", "green") + commit["blob"])
vprint(colored('date: ','green') + commit['date']) vprint(colored("date: ", "green") + commit["date"])
vprint(colored('author: ','green') + ('%s <%s>' % (commit['author'], commit['author-email']))) vprint(
colored("author: ", "green")
+ ("%s <%s>" % (commit["author"], commit["author-email"]))
)
attributions = commit_attributions(commit) attributions = commit_attributions(commit)
if (len(attributions) > 0): if len(attributions) > 0:
vprint(colored('attributions:','green')) vprint(colored("attributions:", "green"))
for attribution in attributions: for attribution in attributions:
vprint(attribution) vprint(attribution)
complete_attributions |= set(attributions) complete_attributions |= set(attributions)
complete_authors |= set([commit['author'] + ' ' + commit['author-email']]) complete_authors |= set([commit["author"] + " " + commit["author-email"]])
# skip deletion commits # skip deletion commits
vprint(colored('blob:','green'), end = ' ') vprint(colored("blob:", "green"), end=" ")
if (commit['blob'] == '0000000000000000000000000000000000000000'): if commit["blob"] == "0000000000000000000000000000000000000000":
vprint('zero (deletion)') vprint("zero (deletion)")
continue continue
file_contents = get_file(commit['blob']) file_contents = get_file(commit["blob"])
# skip inaccessible blobs (probably lived in a submodule) # skip inaccessible blobs (probably lived in a submodule)
if not file_contents:
if (not file_contents): vprint("inaccessible")
vprint('inaccessible')
continue continue
else: else:
vprint('available') vprint("available")
headers = get_headers(file_contents) headers = get_headers(file_contents)
vprint(colored('header authors:','green')) vprint(colored("header authors:", "green"))
for header in headers: for header in headers:
ha = header_authors(header) ha = header_authors(header)
authors |= set(ha) authors |= set(ha)
@ -181,77 +202,92 @@ def analyze(j):
complete_authors |= set(authors) complete_authors |= set(authors)
vprint(colored('header copyrights:','green')) vprint(colored("header copyrights:", "green"))
copyrights = set() copyrights = set()
for header in headers: for header in headers:
hc = header_copyrights(header) hc = header_copyrights(header)
copyrights |= set(hc) copyrights |= set(hc)
vprint(hc) vprint(hc)
vprint(colored('commit description:','green')) vprint(colored("commit description:", "green"))
vprint(commit['message']) vprint(commit["message"])
if commit['body']: if commit["body"]:
vprint(colored('commit msg body:','green')) vprint(colored("commit msg body:", "green"))
vprint(commit['body']) vprint(commit["body"])
vvprint(colored('headers:','green')) vvprint(colored("headers:", "green"))
for header in headers: for header in headers:
vvprint(header) vvprint(header)
complete_copyrights |= copyrights complete_copyrights |= copyrights
vprint(colored("----\n",'yellow')) vprint(colored("----\n", "yellow"))
print(colored("COMPLETE REPORT:",'blue')) print(colored("COMPLETE REPORT:", "blue"))
print(colored("attributions:",'green')) print(colored("attributions:", "green"))
if (len(complete_attributions) == 0): if len(complete_attributions) == 0:
print("*none detected*") print("*none detected*")
else: else:
for attribution in complete_attributions: for attribution in complete_attributions:
report_cla(attribution) report_cla(attribution)
print(colored("authors:",'green')) print(colored("authors:", "green"))
for author in complete_authors: for author in complete_authors:
report_cla(author) report_cla(author)
print(colored("copyrights:",'green')) print(colored("copyrights:", "green"))
print('\n'.join(complete_copyrights)) print("\n".join(complete_copyrights))
def print_help(): def print_help():
print("Usage: check.py [-v] [-n] <JSON file>\n") print("Usage: check.py [-v] [-n] <JSON file>\n")
print(" -v\tIncrease verbosity (add up to three times)\n" print(
" -n\tDo not use color for output") " -v\tIncrease verbosity (add up to three times)\n"
" -n\tDo not use color for output"
)
def vprint(*args, **kwargs): def vprint(*args, **kwargs):
if (verbose_level > 0): if verbose_level > 0:
print(*args, **kwargs) print(*args, **kwargs)
def vvprint(*args, **kwargs): def vvprint(*args, **kwargs):
if (verbose_level > 1): if verbose_level > 1:
print(*args, **kwargs) print(*args, **kwargs)
def vvvprint(*args, **kwargs): def vvvprint(*args, **kwargs):
if (verbose_level > 2): if verbose_level > 2:
print(*args, **kwargs) print(*args, **kwargs)
##### #####
# First try to load the CLAs JSONs: # First try to load the CLAs JSONs:
try: try:
with open(os.path.dirname(os.path.abspath(__file__)) + '/icla-info.json','r') as file: with open(
os.path.dirname(os.path.abspath(__file__)) + "/icla-info.json", "r"
) as file:
committers_json = json.load(file) committers_json = json.load(file)
with open(os.path.dirname(os.path.abspath(__file__)) + '/icla-info_noid.json','r') as file: with open(
os.path.dirname(os.path.abspath(__file__)) + "/icla-info_noid.json", "r"
) as file:
non_committers_json = json.load(file) non_committers_json = json.load(file)
except: except:
print('Could not open CLA JSON files, please read README.md for download instructions') print(
"Could not open CLA JSON files, please read README.md for download instructions"
)
sys.exit(2) sys.exit(2)
# Open author mappings JSON # Open author mappings JSON
with open(os.path.dirname(os.path.abspath(__file__)) + '/author_mappings.json','r') as file: with open(
os.path.dirname(os.path.abspath(__file__)) + "/author_mappings.json", "r"
) as file:
author_mappings_json = json.load(file) author_mappings_json = json.load(file)
try: try:
@ -260,7 +296,7 @@ except getopt.GetoptError:
print_help() print_help()
sys.exit(2) sys.exit(2)
for opt, arg in opts: for opt, arg in opts:
if (opt == "-h"): if opt == "-h":
print_help() print_help()
sys.exit() sys.exit()
elif opt == "-v": elif opt == "-v":
@ -268,7 +304,7 @@ for opt, arg in opts:
elif opt == "-n": elif opt == "-n":
color = False color = False
if (len(args) != 1): if len(args) != 1:
print_help() print_help()
sys.exit(2) sys.exit(2)
@ -278,10 +314,10 @@ if not f:
print_help() print_help()
sys.exit(2) sys.exit(2)
if (f == '-'): if f == "-":
j = json.load(sys.stdin) j = json.load(sys.stdin)
else: else:
with open(f, 'r') as file: with open(f, "r") as file:
j = json.load(file) j = json.load(file)
analyze(j) analyze(j)