2016-03-11 16:12:59 +00:00
|
|
|
"""
|
|
|
|
This script processes the output from the C preprocessor and extracts all
|
|
|
|
qstr. Each qstr is transformed into a qstr definition of the form 'Q(...)'.
|
|
|
|
|
|
|
|
This script works with Python 2.6, 2.7, 3.3 and 3.4.
|
|
|
|
"""
|
|
|
|
|
2017-06-09 13:42:13 +10:00
|
|
|
from __future__ import print_function
|
|
|
|
|
2021-04-22 17:55:39 -07:00
|
|
|
import io
|
2016-03-11 16:12:59 +00:00
|
|
|
import os
|
|
|
|
import re
|
2020-10-08 16:40:17 +02:00
|
|
|
import subprocess
|
2017-06-09 13:42:13 +10:00
|
|
|
import sys
|
2020-10-29 16:38:13 +11:00
|
|
|
import multiprocessing, multiprocessing.dummy
|
2016-03-11 16:12:59 +00:00
|
|
|
|
2016-09-19 13:00:44 -07:00
|
|
|
# Python 2/3 compatibility:
|
|
|
|
# - iterating through bytes is different
|
|
|
|
# - codepoint2name lives in a different module
|
|
|
|
import platform
|
2021-03-15 19:27:36 +05:30
|
|
|
|
|
|
|
if platform.python_version_tuple()[0] == "2":
|
2016-09-19 13:00:44 -07:00
|
|
|
bytes_cons = lambda val, enc=None: bytearray(val)
|
|
|
|
from htmlentitydefs import name2codepoint
|
2021-03-15 19:27:36 +05:30
|
|
|
elif platform.python_version_tuple()[0] == "3":
|
2016-09-19 13:00:44 -07:00
|
|
|
bytes_cons = bytes
|
|
|
|
from html.entities import name2codepoint
|
2021-03-15 19:27:36 +05:30
|
|
|
|
2016-11-28 19:20:51 -08:00
|
|
|
unichr = chr
|
2016-09-19 13:00:44 -07:00
|
|
|
# end compatibility code
|
|
|
|
|
2021-05-04 11:40:55 -07:00
|
|
|
# Blocklist of qstrings that are specially handled in further
|
2016-03-11 16:12:59 +00:00
|
|
|
# processing and should be ignored
|
2021-04-23 12:26:42 -07:00
|
|
|
QSTRING_BLOCK_LIST = set(["NULL", "number_of"])
|
2016-03-11 16:12:59 +00:00
|
|
|
|
2016-09-19 13:00:44 -07:00
|
|
|
# add some custom names to map characters that aren't in HTML
|
2021-03-15 19:27:36 +05:30
|
|
|
name2codepoint["hyphen"] = ord("-")
|
|
|
|
name2codepoint["space"] = ord(" ")
|
|
|
|
name2codepoint["squot"] = ord("'")
|
|
|
|
name2codepoint["comma"] = ord(",")
|
|
|
|
name2codepoint["dot"] = ord(".")
|
|
|
|
name2codepoint["colon"] = ord(":")
|
|
|
|
name2codepoint["semicolon"] = ord(";")
|
|
|
|
name2codepoint["slash"] = ord("/")
|
|
|
|
name2codepoint["percent"] = ord("%")
|
|
|
|
name2codepoint["hash"] = ord("#")
|
|
|
|
name2codepoint["paren_open"] = ord("(")
|
|
|
|
name2codepoint["paren_close"] = ord(")")
|
|
|
|
name2codepoint["bracket_open"] = ord("[")
|
|
|
|
name2codepoint["bracket_close"] = ord("]")
|
|
|
|
name2codepoint["brace_open"] = ord("{")
|
|
|
|
name2codepoint["brace_close"] = ord("}")
|
|
|
|
name2codepoint["star"] = ord("*")
|
|
|
|
name2codepoint["bang"] = ord("!")
|
|
|
|
name2codepoint["backslash"] = ord("\\")
|
|
|
|
name2codepoint["plus"] = ord("+")
|
|
|
|
name2codepoint["dollar"] = ord("$")
|
|
|
|
name2codepoint["equals"] = ord("=")
|
|
|
|
name2codepoint["question"] = ord("?")
|
|
|
|
name2codepoint["at_sign"] = ord("@")
|
|
|
|
name2codepoint["caret"] = ord("^")
|
|
|
|
name2codepoint["pipe"] = ord("|")
|
|
|
|
name2codepoint["tilde"] = ord("~")
|
2016-03-11 16:12:59 +00:00
|
|
|
|
2020-09-22 17:38:58 -05:00
|
|
|
# These are just vexing!
|
2021-03-15 19:27:36 +05:30
|
|
|
del name2codepoint["and"]
|
|
|
|
del name2codepoint["or"]
|
2022-12-01 19:41:28 -06:00
|
|
|
del name2codepoint["not"]
|
2021-03-15 19:27:36 +05:30
|
|
|
|
2020-09-22 17:38:58 -05:00
|
|
|
|
2020-10-08 16:40:17 +02:00
|
|
|
def preprocess():
|
|
|
|
if any(src in args.dependencies for src in args.changed_sources):
|
|
|
|
sources = args.sources
|
|
|
|
elif any(args.changed_sources):
|
|
|
|
sources = args.changed_sources
|
|
|
|
else:
|
|
|
|
sources = args.sources
|
|
|
|
csources = []
|
|
|
|
cxxsources = []
|
|
|
|
for source in sources:
|
|
|
|
if source.endswith(".cpp"):
|
|
|
|
cxxsources.append(source)
|
2021-06-24 09:43:54 +10:00
|
|
|
elif source.endswith(".c"):
|
2020-10-08 16:40:17 +02:00
|
|
|
csources.append(source)
|
|
|
|
try:
|
|
|
|
os.makedirs(os.path.dirname(args.output[0]))
|
|
|
|
except OSError:
|
|
|
|
pass
|
2020-10-29 16:38:13 +11:00
|
|
|
|
|
|
|
def pp(flags):
|
|
|
|
def run(files):
|
|
|
|
return subprocess.check_output(args.pp + flags + files)
|
|
|
|
|
|
|
|
return run
|
|
|
|
|
|
|
|
try:
|
|
|
|
cpus = multiprocessing.cpu_count()
|
|
|
|
except NotImplementedError:
|
|
|
|
cpus = 1
|
|
|
|
p = multiprocessing.dummy.Pool(cpus)
|
|
|
|
with open(args.output[0], "wb") as out_file:
|
|
|
|
for flags, sources in (
|
|
|
|
(args.cflags, csources),
|
|
|
|
(args.cxxflags, cxxsources),
|
|
|
|
):
|
|
|
|
batch_size = (len(sources) + cpus - 1) // cpus
|
|
|
|
chunks = [sources[i : i + batch_size] for i in range(0, len(sources), batch_size or 1)]
|
|
|
|
for output in p.imap(pp(flags), chunks):
|
|
|
|
out_file.write(output)
|
2020-10-08 16:40:17 +02:00
|
|
|
|
|
|
|
|
2016-04-19 11:30:06 +03:00
|
|
|
def write_out(fname, output):
|
|
|
|
if output:
|
2016-04-23 18:36:07 +02:00
|
|
|
for m, r in [("/", "__"), ("\\", "__"), (":", "@"), ("..", "@@")]:
|
|
|
|
fname = fname.replace(m, r)
|
2016-04-19 11:30:06 +03:00
|
|
|
with open(args.output_dir + "/" + fname + ".qstr", "w") as f:
|
|
|
|
f.write("\n".join(output) + "\n")
|
|
|
|
|
2021-03-15 19:27:36 +05:30
|
|
|
|
2016-09-19 13:00:44 -07:00
|
|
|
def qstr_unescape(qstr):
|
|
|
|
for name in name2codepoint:
|
|
|
|
if "__" + name + "__" in qstr:
|
|
|
|
continue
|
|
|
|
if "_" + name + "_" in qstr:
|
2021-03-15 19:27:36 +05:30
|
|
|
qstr = qstr.replace("_" + name + "_", str(unichr(name2codepoint[name])))
|
2016-09-19 13:00:44 -07:00
|
|
|
return qstr
|
|
|
|
|
2021-03-15 19:27:36 +05:30
|
|
|
|
2016-03-11 16:12:59 +00:00
|
|
|
def process_file(f):
|
2018-07-31 16:53:54 -07:00
|
|
|
re_line = re.compile(r"#[line]*\s(\d+)\s\"([^\"]+)\"")
|
2021-03-15 19:27:36 +05:30
|
|
|
re_qstr = re.compile(r"MP_QSTR_[_a-zA-Z0-9]+")
|
|
|
|
re_translate = re.compile(r"translate\(\"((?:(?=(\\?))\2.)*?)\"\)")
|
2016-03-11 16:12:59 +00:00
|
|
|
output = []
|
2016-04-19 11:30:06 +03:00
|
|
|
last_fname = None
|
2018-07-31 16:53:54 -07:00
|
|
|
lineno = 0
|
2016-03-11 16:12:59 +00:00
|
|
|
for line in f:
|
2018-03-16 23:54:06 +11:00
|
|
|
if line.isspace():
|
|
|
|
continue
|
2016-04-23 18:36:07 +02:00
|
|
|
# match gcc-like output (# n "file") and msvc-like output (#line n "file")
|
2021-03-15 19:27:36 +05:30
|
|
|
if line.startswith(("# ", "#line")):
|
2018-03-16 23:54:06 +11:00
|
|
|
m = re_line.match(line)
|
2016-04-23 18:36:07 +02:00
|
|
|
assert m is not None
|
2018-07-31 16:53:54 -07:00
|
|
|
lineno = int(m.group(1))
|
|
|
|
fname = m.group(2)
|
2020-10-22 14:09:33 +02:00
|
|
|
if os.path.splitext(fname)[1] not in [".c", ".cpp"]:
|
2016-04-19 11:30:06 +03:00
|
|
|
continue
|
|
|
|
if fname != last_fname:
|
|
|
|
write_out(last_fname, output)
|
|
|
|
output = []
|
|
|
|
last_fname = fname
|
|
|
|
continue
|
2018-03-16 23:54:06 +11:00
|
|
|
for match in re_qstr.findall(line):
|
2021-03-15 19:27:36 +05:30
|
|
|
name = match.replace("MP_QSTR_", "")
|
2021-04-23 12:26:42 -07:00
|
|
|
if name not in QSTRING_BLOCK_LIST:
|
2021-03-15 19:27:36 +05:30
|
|
|
output.append("Q(" + qstr_unescape(name) + ")")
|
2018-08-09 14:16:28 -07:00
|
|
|
for match in re_translate.findall(line):
|
|
|
|
output.append('TRANSLATE("' + match[0] + '")')
|
2018-07-31 16:53:54 -07:00
|
|
|
lineno += 1
|
2016-03-11 16:12:59 +00:00
|
|
|
|
2020-08-26 11:23:10 +02:00
|
|
|
if last_fname:
|
|
|
|
write_out(last_fname, output)
|
2016-04-19 11:30:06 +03:00
|
|
|
return ""
|
|
|
|
|
|
|
|
|
|
|
|
def cat_together():
|
|
|
|
import glob
|
|
|
|
import hashlib
|
2021-03-15 19:27:36 +05:30
|
|
|
|
2016-04-19 11:30:06 +03:00
|
|
|
hasher = hashlib.md5()
|
|
|
|
all_lines = []
|
|
|
|
outf = open(args.output_dir + "/out", "wb")
|
|
|
|
for fname in glob.glob(args.output_dir + "/*.qstr"):
|
|
|
|
with open(fname, "rb") as f:
|
|
|
|
lines = f.readlines()
|
|
|
|
all_lines += lines
|
|
|
|
all_lines.sort()
|
|
|
|
all_lines = b"\n".join(all_lines)
|
|
|
|
outf.write(all_lines)
|
|
|
|
outf.close()
|
|
|
|
hasher.update(all_lines)
|
|
|
|
new_hash = hasher.hexdigest()
|
2021-03-15 19:27:36 +05:30
|
|
|
# print(new_hash)
|
2016-04-19 11:30:06 +03:00
|
|
|
old_hash = None
|
|
|
|
try:
|
|
|
|
with open(args.output_file + ".hash") as f:
|
|
|
|
old_hash = f.read()
|
|
|
|
except IOError:
|
|
|
|
pass
|
|
|
|
if old_hash != new_hash:
|
|
|
|
print("QSTR updated")
|
2016-04-23 18:36:07 +02:00
|
|
|
try:
|
|
|
|
# rename below might fail if file exists
|
|
|
|
os.remove(args.output_file)
|
|
|
|
except:
|
|
|
|
pass
|
2016-04-19 11:30:06 +03:00
|
|
|
os.rename(args.output_dir + "/out", args.output_file)
|
|
|
|
with open(args.output_file + ".hash", "w") as f:
|
|
|
|
f.write(new_hash)
|
|
|
|
else:
|
|
|
|
print("QSTR not updated")
|
2016-03-11 16:12:59 +00:00
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
2017-06-09 13:42:13 +10:00
|
|
|
if len(sys.argv) != 5:
|
2021-03-15 19:27:36 +05:30
|
|
|
print("usage: %s command input_filename output_dir output_file" % sys.argv[0])
|
2017-06-09 13:42:13 +10:00
|
|
|
sys.exit(2)
|
|
|
|
|
|
|
|
class Args:
|
|
|
|
pass
|
2021-03-15 19:27:36 +05:30
|
|
|
|
2017-06-09 13:42:13 +10:00
|
|
|
args = Args()
|
|
|
|
args.command = sys.argv[1]
|
|
|
|
args.input_filename = sys.argv[2]
|
|
|
|
args.output_dir = sys.argv[3]
|
|
|
|
args.output_file = sys.argv[4]
|
|
|
|
|
2016-04-19 11:30:06 +03:00
|
|
|
try:
|
|
|
|
os.makedirs(args.output_dir)
|
|
|
|
except OSError:
|
|
|
|
pass
|
2016-03-11 16:12:59 +00:00
|
|
|
|
2016-04-19 14:39:08 +03:00
|
|
|
if args.command == "split":
|
2021-04-22 17:55:39 -07:00
|
|
|
with io.open(args.input_filename, encoding="utf-8") as infile:
|
2016-04-19 14:39:08 +03:00
|
|
|
process_file(infile)
|
2016-03-11 16:12:59 +00:00
|
|
|
|
2016-04-19 14:39:08 +03:00
|
|
|
if args.command == "cat":
|
|
|
|
cat_together()
|