circuitpython/py/makeqstrdefs.py
2023-08-14 00:59:22 -04:00

280 lines
8.2 KiB
Python

"""
This script processes the output from the C preprocessor and extracts all
qstr. Each qstr is transformed into a qstr definition of the form 'Q(...)'.
This script works with Python 3.x
"""
from __future__ import print_function
import io
import os
import re
import subprocess
import sys
import multiprocessing, multiprocessing.dummy
from html.entities import name2codepoint
# Blocklist of qstrings that are specially handled in further
# processing and should be ignored
QSTRING_BLOCK_LIST = set(["NULL", "number_of"])
# add some custom names to map characters that aren't in HTML
name2codepoint["hyphen"] = ord("-")
name2codepoint["space"] = ord(" ")
name2codepoint["squot"] = ord("'")
name2codepoint["comma"] = ord(",")
name2codepoint["dot"] = ord(".")
name2codepoint["colon"] = ord(":")
name2codepoint["semicolon"] = ord(";")
name2codepoint["slash"] = ord("/")
name2codepoint["percent"] = ord("%")
name2codepoint["hash"] = ord("#")
name2codepoint["paren_open"] = ord("(")
name2codepoint["paren_close"] = ord(")")
name2codepoint["bracket_open"] = ord("[")
name2codepoint["bracket_close"] = ord("]")
name2codepoint["brace_open"] = ord("{")
name2codepoint["brace_close"] = ord("}")
name2codepoint["star"] = ord("*")
name2codepoint["bang"] = ord("!")
name2codepoint["backslash"] = ord("\\")
name2codepoint["plus"] = ord("+")
name2codepoint["dollar"] = ord("$")
name2codepoint["equals"] = ord("=")
name2codepoint["question"] = ord("?")
name2codepoint["at_sign"] = ord("@")
name2codepoint["caret"] = ord("^")
name2codepoint["pipe"] = ord("|")
name2codepoint["tilde"] = ord("~")
# These are just vexing!
del name2codepoint["and"]
del name2codepoint["or"]
del name2codepoint["not"]
# Extract MP_QSTR_FOO macros.
_MODE_QSTR = "qstr"
# Extract MP_COMPRESSED_ROM_TEXT("") macros. (Which come from MP_ERROR_TEXT)
_MODE_COMPRESS = "compress"
# Extract MP_REGISTER_MODULE(...) macros.
_MODE_MODULE = "module"
def is_c_source(fname):
return os.path.splitext(fname)[1] in [".c"]
def is_cxx_source(fname):
return os.path.splitext(fname)[1] in [".cc", ".cp", ".cxx", ".cpp", ".CPP", ".c++", ".C"]
def preprocess():
if any(src in args.dependencies for src in args.changed_sources):
sources = args.sources
elif any(args.changed_sources):
sources = args.changed_sources
else:
sources = args.sources
csources = []
cxxsources = []
for source in sources:
if is_cxx_source(source):
cxxsources.append(source)
elif is_c_source(source):
csources.append(source)
try:
os.makedirs(os.path.dirname(args.output[0]))
except OSError:
pass
def pp(flags):
def run(files):
return subprocess.check_output(args.pp + flags + files)
return run
try:
cpus = multiprocessing.cpu_count()
except NotImplementedError:
cpus = 1
p = multiprocessing.dummy.Pool(cpus)
with open(args.output[0], "wb") as out_file:
for flags, sources in (
(args.cflags, csources),
(args.cxxflags, cxxsources),
):
batch_size = (len(sources) + cpus - 1) // cpus
chunks = [sources[i : i + batch_size] for i in range(0, len(sources), batch_size or 1)]
for output in p.imap(pp(flags), chunks):
out_file.write(output)
def write_out(fname, output):
if output:
for m, r in [("/", "__"), ("\\", "__"), (":", "@"), ("..", "@@")]:
fname = fname.replace(m, r)
with open(args.output_dir + "/" + fname + "." + args.mode, "w") as f:
f.write("\n".join(output) + "\n")
def qstr_unescape(qstr):
for name in name2codepoint:
if "__" + name + "__" in qstr:
continue
if "_" + name + "_" in qstr:
qstr = qstr.replace("_" + name + "_", str(chr(name2codepoint[name])))
return qstr
def process_file(f):
re_line = re.compile(r"#[line]*\s\d+\s\"([^\"]+)\"")
if args.mode == _MODE_QSTR:
re_match = re.compile(r"MP_QSTR_[_a-zA-Z0-9]+")
elif args.mode == _MODE_COMPRESS:
re_match = re.compile(r'MP_COMPRESSED_ROM_TEXT\("([^"]*)"\)')
elif args.mode == _MODE_MODULE:
re_match = re.compile(r"MP_REGISTER_MODULE\(.*?,\s*.*?\);")
re_translate = re.compile(r"translate\(\"((?:(?=(\\?))\2.)*?)\"\)")
output = []
last_fname = None
for line in f:
if line.isspace():
continue
# match gcc-like output (# n "file") and msvc-like output (#line n "file")
if line.startswith(("# ", "#line")):
m = re_line.match(line)
assert m is not None
fname = m.group(1)
if os.path.splitext(fname)[1] not in [".c", ".cpp"]:
continue
if fname != last_fname:
write_out(last_fname, output)
output = []
last_fname = fname
continue
for match in re_match.findall(line):
if args.mode == _MODE_QSTR:
name = match.replace("MP_QSTR_", "")
if name not in QSTRING_BLOCK_LIST:
output.append("Q(" + qstr_unescape(name) + ")")
elif args.mode in (_MODE_COMPRESS, _MODE_MODULE):
output.append(match)
for match in re_translate.findall(line):
output.append('TRANSLATE("' + match[0] + '")')
if last_fname:
write_out(last_fname, output)
return ""
def cat_together():
import glob
import hashlib
hasher = hashlib.md5()
all_lines = []
outf = open(args.output_dir + "/out", "wb")
for fname in glob.glob(args.output_dir + "/*." + args.mode):
with open(fname, "rb") as f:
lines = f.readlines()
all_lines += lines
all_lines.sort()
all_lines = b"\n".join(all_lines)
outf.write(all_lines)
outf.close()
hasher.update(all_lines)
new_hash = hasher.hexdigest()
# print(new_hash)
old_hash = None
try:
with open(args.output_file + ".hash") as f:
old_hash = f.read()
except IOError:
pass
mode_full = "QSTR"
if args.mode == _MODE_COMPRESS:
mode_full = "Compressed data"
elif args.mode == _MODE_MODULE:
mode_full = "Module registrations"
if old_hash != new_hash:
print(mode_full, "updated")
try:
# rename below might fail if file exists
os.remove(args.output_file)
except:
pass
os.rename(args.output_dir + "/out", args.output_file)
with open(args.output_file + ".hash", "w") as f:
f.write(new_hash)
else:
print(mode_full, "not updated")
if __name__ == "__main__":
if len(sys.argv) < 6:
print("usage: %s command mode input_filename output_dir output_file" % sys.argv[0])
sys.exit(2)
class Args:
pass
args = Args()
args.command = sys.argv[1]
if args.command == "pp":
named_args = {
s: []
for s in [
"pp",
"output",
"cflags",
"cxxflags",
"sources",
"changed_sources",
"dependencies",
]
}
for arg in sys.argv[1:]:
if arg in named_args:
current_tok = arg
else:
named_args[current_tok].append(arg)
if not named_args["pp"] or len(named_args["output"]) != 1:
print("usage: %s %s ..." % (sys.argv[0], " ... ".join(named_args)))
sys.exit(2)
for k, v in named_args.items():
setattr(args, k, v)
preprocess()
sys.exit(0)
args.mode = sys.argv[2]
args.input_filename = sys.argv[3] # Unused for command=cat
args.output_dir = sys.argv[4]
args.output_file = None if len(sys.argv) == 5 else sys.argv[5] # Unused for command=split
if args.mode not in (_MODE_QSTR, _MODE_COMPRESS, _MODE_MODULE):
print("error: mode %s unrecognised" % sys.argv[2])
sys.exit(2)
try:
os.makedirs(args.output_dir)
except OSError:
pass
if args.command == "split":
with io.open(args.input_filename, encoding="utf-8") as infile:
process_file(infile)
if args.command == "cat":
cat_together()