build: Make genlast write the "split" files

This gets a further speedup of about 2s (12s -> 9.5s elapsed build time)
for stm32f405_feather

For what are probably historical reasons, the qstr process involves
preprocessing a large number of source files into a single "qstr.i.last"
file, then reading this and splitting it into one "qstr" file for each
original source ("*.c") file.

By eliminating the step of writing qstr.i.last as well as making the
regular-expression-matching part be parallelized, build speed is further
improved.

Because the step to build QSTR_DEFS_COLLECTED does not access
qstr.i.last, the path is replaced with "-" in the Makefile.
This commit is contained in:
Jeff Epler 2020-10-11 16:03:46 -05:00
parent 607e4a905a
commit 479552ce56
2 changed files with 58 additions and 29 deletions

View File

@ -1,38 +1,71 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import sys import sys
from concurrent.futures import ThreadPoolExecutor import re
import os
import itertools
from concurrent.futures import ProcessPoolExecutor
import multiprocessing import multiprocessing
import threading import threading
import subprocess import subprocess
from makeqstrdefs import qstr_unescape, QSTRING_BLACK_LIST
re_line = re.compile(r"#[line]*\s(\d+)\s\"([^\"]+)\"", re.DOTALL)
re_qstr = re.compile(r'MP_QSTR_[_a-zA-Z0-9]+', re.DOTALL)
re_translate = re.compile(r'translate\(\"((?:(?=(\\?))\2.)*?)\"\)', re.DOTALL)
def write_out(fname, output_dir, output):
if output:
for m, r in [("/", "__"), ("\\", "__"), (":", "@"), ("..", "@@")]:
fname = fname.replace(m, r)
with open(output_dir + "/" + fname + ".qstr", "w") as f:
f.write("\n".join(output) + "\n")
def process_file(fname, output_dir, content):
content = content.decode('utf-8', errors='ignore')
output = []
for match in re_qstr.findall(content):
name = match.replace('MP_QSTR_', '')
if name not in QSTRING_BLACK_LIST:
output.append('Q(' + qstr_unescape(name) + ')')
for match in re_translate.findall(content):
output.append('TRANSLATE("' + match[0] + '")')
write_out(fname, output_dir, output)
def checkoutput1(args): def checkoutput1(args):
info = subprocess.run(args, check=True, stdout=subprocess.PIPE, input='') info = subprocess.run(args, check=True, stdout=subprocess.PIPE, input='')
return info.stdout return info.stdout
idx1 = sys.argv.index('--') def preprocess(command, output_dir, fn):
idx2 = sys.argv.index('--', idx1+1)
check = sys.argv[1:idx1]
always = sys.argv[idx1+1:idx2]
command = sys.argv[idx2+1:]
output_lock = threading.Lock()
def preprocess(fn):
output = checkoutput1(command + [fn])
# Ensure our output doesn't interleave with others
# a threading.Lock is not a context manager object :(
try: try:
output_lock.acquire() output = checkoutput1(command + [fn])
sys.stdout.buffer.write(output) process_file(fn, output_dir, output)
finally: except Exception as e:
output_lock.release() print(e, file=sys.stderr)
def maybe_preprocess(fn): def maybe_preprocess(command, output_dir, fn):
if subprocess.call(["grep", "-lqE", "(MP_QSTR|translate)", fn]) == 0: if subprocess.call(["grep", "-lqE", "(MP_QSTR|translate)", fn]) == 0:
preprocess(fn) preprocess(command, output_dir, fn)
executor = ThreadPoolExecutor(max_workers=multiprocessing.cpu_count() + 1) if __name__ == '__main__':
executor.map(maybe_preprocess, check)
executor.map(preprocess, always)
executor.shutdown() idx1 = sys.argv.index('--')
idx2 = sys.argv.index('--', idx1+1)
output_dir = sys.argv[1]
check = sys.argv[2:idx1]
always = sys.argv[idx1+1:idx2]
command = sys.argv[idx2+1:]
if not os.path.isdir(output_dir):
os.makedirs(output_dir)
# Mac and Windows use 'spawn'. Uncomment this during testing to catch spawn-specific problems on Linux.
#multiprocessing.set_start_method("spawn")
executor = ProcessPoolExecutor(max_workers=multiprocessing.cpu_count() + 1)
executor.map(maybe_preprocess, itertools.repeat(command), itertools.repeat(output_dir), check)
executor.map(preprocess, itertools.repeat(command), itertools.repeat(output_dir), always)
executor.shutdown()

View File

@ -76,18 +76,14 @@ $(OBJ): | $(HEADER_BUILD)/qstrdefs.enum.h $(HEADER_BUILD)/mpversion.h
# - if anything in QSTR_GLOBAL_DEPENDENCIES is newer, then process all source files ($^) # - if anything in QSTR_GLOBAL_DEPENDENCIES is newer, then process all source files ($^)
# - else, if list of newer prerequisites ($?) is not empty, then process just these ($?) # - else, if list of newer prerequisites ($?) is not empty, then process just these ($?)
# - else, process all source files ($^) [this covers "make -B" which can set $? to empty] # - else, process all source files ($^) [this covers "make -B" which can set $? to empty]
$(HEADER_BUILD)/qstr.i.last: $(SRC_QSTR) $(SRC_QSTR_PREPROCESSOR) $(QSTR_GLOBAL_DEPENDENCIES) | $(HEADER_BUILD)/mpversion.h $(HEADER_BUILD)/qstr.split: $(SRC_QSTR) $(SRC_QSTR_PREPROCESSOR) $(QSTR_GLOBAL_DEPENDENCIES) | $(HEADER_BUILD)/mpversion.h $(PY_SRC)/genlast.py
$(STEPECHO) "GEN $@" $(STEPECHO) "GEN $@"
$(Q)$(PYTHON3) $(PY_SRC)/genlast.py $(if $(filter $?,$(QSTR_GLOBAL_DEPENDENCIES)),$^,$(if $?,$?,$^)) -- $(SRC_QSTR_PREPROCESSOR) -- $(CPP) $(QSTR_GEN_EXTRA_CFLAGS) $(CFLAGS) >$(HEADER_BUILD)/qstr.i.last; $(Q)$(PYTHON3) $(PY_SRC)/genlast.py $(HEADER_BUILD)/qstr $(if $(filter $?,$(QSTR_GLOBAL_DEPENDENCIES)),$^,$(if $?,$?,$^)) -- $(SRC_QSTR_PREPROCESSOR) -- $(CPP) $(QSTR_GEN_EXTRA_CFLAGS) $(CFLAGS)
$(HEADER_BUILD)/qstr.split: $(HEADER_BUILD)/qstr.i.last $(PY_SRC)/makeqstrdefs.py
$(STEPECHO) "GEN $@"
$(Q)$(PYTHON3) $(PY_SRC)/makeqstrdefs.py split $(HEADER_BUILD)/qstr.i.last $(HEADER_BUILD)/qstr $(QSTR_DEFS_COLLECTED)
$(Q)touch $@ $(Q)touch $@
$(QSTR_DEFS_COLLECTED): $(HEADER_BUILD)/qstr.split $(PY_SRC)/makeqstrdefs.py $(QSTR_DEFS_COLLECTED): $(HEADER_BUILD)/qstr.split $(PY_SRC)/makeqstrdefs.py
$(STEPECHO) "GEN $@" $(STEPECHO) "GEN $@"
$(Q)$(PYTHON3) $(PY_SRC)/makeqstrdefs.py cat $(HEADER_BUILD)/qstr.i.last $(HEADER_BUILD)/qstr $(QSTR_DEFS_COLLECTED) $(Q)$(PYTHON3) $(PY_SRC)/makeqstrdefs.py cat - $(HEADER_BUILD)/qstr $(QSTR_DEFS_COLLECTED)
# $(sort $(var)) removes duplicates # $(sort $(var)) removes duplicates
# #