circuitpython/py/genlast.py
Jeff Epler 607e4a905a build: parallelize the creation of qstr.i.last
Rather than simply invoking gcc in preprocessor mode with a list of files, use
a Python script with the (python3) ThreadPoolExecutor to invoke the
preprocessor in parallel.

The amount of concurrency is the number of system CPUs, not the makefile "-j"
parallelism setting, because there is no simple and correct way for a Python
program to correctly work together with make's idea of parallelism.

This reduces the build time of stm32f405 feather (a non-LTO build) from 16s to
12s on my 16-thread Ryzen machine.
2020-10-11 20:19:59 -05:00

39 lines
1.0 KiB
Python

#!/usr/bin/env python3
import sys
from concurrent.futures import ThreadPoolExecutor
import multiprocessing
import threading
import subprocess
def checkoutput1(args):
info = subprocess.run(args, check=True, stdout=subprocess.PIPE, input='')
return info.stdout
idx1 = sys.argv.index('--')
idx2 = sys.argv.index('--', idx1+1)
check = sys.argv[1:idx1]
always = sys.argv[idx1+1:idx2]
command = sys.argv[idx2+1:]
output_lock = threading.Lock()
def preprocess(fn):
output = checkoutput1(command + [fn])
# Ensure our output doesn't interleave with others
# a threading.Lock is not a context manager object :(
try:
output_lock.acquire()
sys.stdout.buffer.write(output)
finally:
output_lock.release()
def maybe_preprocess(fn):
if subprocess.call(["grep", "-lqE", "(MP_QSTR|translate)", fn]) == 0:
preprocess(fn)
executor = ThreadPoolExecutor(max_workers=multiprocessing.cpu_count() + 1)
executor.map(maybe_preprocess, check)
executor.map(preprocess, always)
executor.shutdown()