tools: Add C middle-processor to make builtin tables proper hash tables.
This commit is contained in:
parent
521759ee18
commit
4bd95f8b44
262
tools/cc1
Executable file
262
tools/cc1
Executable file
@ -0,0 +1,262 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
"""
|
||||||
|
This is a middle-processor for MicroPython source files. It takes the output
|
||||||
|
of the C preprocessor, has the option to change it, then feeds this into the
|
||||||
|
C compiler.
|
||||||
|
|
||||||
|
It currently has the ability to reorder static hash tables so they are actually
|
||||||
|
hashed, resulting in faster lookup times at runtime.
|
||||||
|
|
||||||
|
To use, configure the Python variables below, and add the following line to the
|
||||||
|
Makefile:
|
||||||
|
|
||||||
|
CFLAGS += -no-integrated-cpp -B$(shell pwd)/../tools
|
||||||
|
"""
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
|
||||||
|
################################################################################
|
||||||
|
# these are the configuration variables
|
||||||
|
# TODO somehow make them externally configurable
|
||||||
|
|
||||||
|
# this is the path to the true C compiler
|
||||||
|
cc1_path = '/usr/lib/gcc/x86_64-unknown-linux-gnu/5.3.0/cc1'
|
||||||
|
#cc1_path = '/usr/lib/gcc/arm-none-eabi/5.3.0/cc1'
|
||||||
|
|
||||||
|
# this must be the same as MICROPY_QSTR_BYTES_IN_HASH
|
||||||
|
bytes_in_qstr_hash = 2
|
||||||
|
|
||||||
|
# this must be 1 or more (can be a decimal)
|
||||||
|
# larger uses more code size but yields faster lookups
|
||||||
|
table_size_mult = 1
|
||||||
|
|
||||||
|
# these control output during processing
|
||||||
|
print_stats = True
|
||||||
|
print_debug = False
|
||||||
|
|
||||||
|
# end configuration variables
|
||||||
|
################################################################################
|
||||||
|
|
||||||
|
# precompile regexs
|
||||||
|
re_preproc_line = re.compile(r'# [0-9]+ ')
|
||||||
|
re_map_entry = re.compile(r'\{.+?\(MP_QSTR_([A-Za-z0-9_]+)\).+\},')
|
||||||
|
re_mp_obj_dict_t = re.compile(r'(?P<head>(static )?const mp_obj_dict_t (?P<id>[a-z0-9_]+) = \{ \.base = \{&mp_type_dict\}, \.map = \{ \.all_keys_are_qstrs = 1, \.is_fixed = 1, \.is_ordered = )1(?P<tail>, \.used = .+ };)$')
|
||||||
|
re_mp_map_t = re.compile(r'(?P<head>(static )?const mp_map_t (?P<id>[a-z0-9_]+) = \{ \.all_keys_are_qstrs = 1, \.is_fixed = 1, \.is_ordered = )1(?P<tail>, \.used = .+ };)$')
|
||||||
|
re_mp_rom_map_elem_t = re.compile(r'static const mp_rom_map_elem_t [a-z_0-9]+\[\] = {$')
|
||||||
|
|
||||||
|
# this must match the equivalent function in qstr.c
|
||||||
|
def compute_hash(qstr):
|
||||||
|
hash = 5381
|
||||||
|
for char in qstr:
|
||||||
|
hash = (hash * 33) ^ ord(char)
|
||||||
|
# Make sure that valid hash is never zero, zero means "hash not computed"
|
||||||
|
return (hash & ((1 << (8 * bytes_in_qstr_hash)) - 1)) or 1
|
||||||
|
|
||||||
|
# this algo must match the equivalent in map.c
|
||||||
|
def hash_insert(map, key, value):
|
||||||
|
hash = compute_hash(key)
|
||||||
|
pos = hash % len(map)
|
||||||
|
start_pos = pos
|
||||||
|
if print_debug:
|
||||||
|
print(' insert %s: start at %u/%u -- ' % (key, pos, len(map)), end='')
|
||||||
|
while True:
|
||||||
|
if map[pos] is None:
|
||||||
|
# found empty slot, so key is not in table
|
||||||
|
if print_debug:
|
||||||
|
print('put at %u' % pos)
|
||||||
|
map[pos] = (key, value)
|
||||||
|
return
|
||||||
|
else:
|
||||||
|
# not yet found, keep searching
|
||||||
|
if map[pos][0] == key:
|
||||||
|
raise AssertionError("duplicate key '%s'" % (key,))
|
||||||
|
pos = (pos + 1) % len(map)
|
||||||
|
assert pos != start_pos
|
||||||
|
|
||||||
|
def hash_find(map, key):
|
||||||
|
hash = compute_hash(key)
|
||||||
|
pos = hash % len(map)
|
||||||
|
start_pos = pos
|
||||||
|
attempts = 0
|
||||||
|
while True:
|
||||||
|
attempts += 1
|
||||||
|
if map[pos] is None:
|
||||||
|
return attempts, None
|
||||||
|
elif map[pos][0] == key:
|
||||||
|
return attempts, map[pos][1]
|
||||||
|
else:
|
||||||
|
pos = (pos + 1) % len(map)
|
||||||
|
if pos == start_pos:
|
||||||
|
return attempts, None
|
||||||
|
|
||||||
|
def process_map_table(file, line, output):
|
||||||
|
output.append(line)
|
||||||
|
|
||||||
|
# consume all lines that are entries of the table and concat them
|
||||||
|
# (we do it this way because there can be multiple entries on one line)
|
||||||
|
table_contents = []
|
||||||
|
while True:
|
||||||
|
line = file.readline()
|
||||||
|
if len(line) == 0:
|
||||||
|
print('unexpected end of input')
|
||||||
|
sys.exit(1)
|
||||||
|
line = line.strip()
|
||||||
|
if len(line) == 0:
|
||||||
|
# empty line
|
||||||
|
continue
|
||||||
|
if re_preproc_line.match(line):
|
||||||
|
# preprocessor line number comment
|
||||||
|
continue
|
||||||
|
if line == '};':
|
||||||
|
# end of table (we assume it appears on a single line)
|
||||||
|
break
|
||||||
|
table_contents.append(line)
|
||||||
|
|
||||||
|
# make combined string of entries
|
||||||
|
entries_str = ''.join(table_contents)
|
||||||
|
|
||||||
|
# split into individual entries
|
||||||
|
entries = []
|
||||||
|
while entries_str:
|
||||||
|
# look for single entry, by matching nested braces
|
||||||
|
match = None
|
||||||
|
if entries_str[0] == '{':
|
||||||
|
nested_braces = 0
|
||||||
|
for i in range(len(entries_str)):
|
||||||
|
if entries_str[i] == '{':
|
||||||
|
nested_braces += 1
|
||||||
|
elif entries_str[i] == '}':
|
||||||
|
nested_braces -= 1
|
||||||
|
if nested_braces == 0:
|
||||||
|
match = re_map_entry.match(entries_str[:i + 2])
|
||||||
|
break
|
||||||
|
|
||||||
|
if not match:
|
||||||
|
print('unknown line in table:', entries_str)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
# extract single entry
|
||||||
|
line = match.group(0)
|
||||||
|
qstr = match.group(1)
|
||||||
|
entries_str = entries_str[len(line):].lstrip()
|
||||||
|
|
||||||
|
# add the qstr and the whole line to list of all entries
|
||||||
|
entries.append((qstr, line))
|
||||||
|
|
||||||
|
# sort entries so hash table construction is deterministic
|
||||||
|
entries.sort()
|
||||||
|
|
||||||
|
# create hash table
|
||||||
|
map = [None] * int(len(entries) * table_size_mult)
|
||||||
|
for qstr, line in entries:
|
||||||
|
# We assume that qstr does not have any escape sequences in it.
|
||||||
|
# This is reasonably safe, since keys in a module or class dict
|
||||||
|
# should be standard identifiers.
|
||||||
|
# TODO verify this and raise an error if escape sequence found
|
||||||
|
hash_insert(map, qstr, line)
|
||||||
|
|
||||||
|
# compute statistics
|
||||||
|
total_attempts = 0
|
||||||
|
for qstr, _ in entries:
|
||||||
|
attempts, line = hash_find(map, qstr)
|
||||||
|
assert line is not None
|
||||||
|
if print_debug:
|
||||||
|
print(' %s lookup took %u attempts' % (qstr, attempts))
|
||||||
|
total_attempts += attempts
|
||||||
|
if len(entries):
|
||||||
|
stats = len(map), len(entries) / len(map), total_attempts / len(entries)
|
||||||
|
else:
|
||||||
|
stats = 0, 0, 0
|
||||||
|
if print_debug:
|
||||||
|
print(' table stats: size=%d, load=%.2f, avg_lookups=%.1f' % stats)
|
||||||
|
|
||||||
|
# output hash table
|
||||||
|
for row in map:
|
||||||
|
if row is None:
|
||||||
|
output.append('{ 0, 0 },\n')
|
||||||
|
else:
|
||||||
|
output.append(row[1] + '\n')
|
||||||
|
output.append('};\n')
|
||||||
|
|
||||||
|
# skip to next non-blank line
|
||||||
|
while True:
|
||||||
|
line = file.readline()
|
||||||
|
if len(line) == 0:
|
||||||
|
print('unexpected end of input')
|
||||||
|
sys.exit(1)
|
||||||
|
line = line.strip()
|
||||||
|
if len(line) == 0:
|
||||||
|
continue
|
||||||
|
break
|
||||||
|
|
||||||
|
# transform the is_ordered param from 1 to 0
|
||||||
|
match = re_mp_obj_dict_t.match(line)
|
||||||
|
if match is None:
|
||||||
|
match = re_mp_map_t.match(line)
|
||||||
|
if match is None:
|
||||||
|
print('expecting mp_obj_dict_t or mp_map_t definition')
|
||||||
|
print(output[0])
|
||||||
|
print(line)
|
||||||
|
sys.exit(1)
|
||||||
|
line = match.group('head') + '0' + match.group('tail') + '\n'
|
||||||
|
output.append(line)
|
||||||
|
|
||||||
|
return (match.group('id'),) + stats
|
||||||
|
|
||||||
|
def process_file(filename):
|
||||||
|
output = []
|
||||||
|
file_changed = False
|
||||||
|
with open(filename, 'rt') as f:
|
||||||
|
while True:
|
||||||
|
line = f.readline()
|
||||||
|
if not line:
|
||||||
|
break
|
||||||
|
if re_mp_rom_map_elem_t.match(line):
|
||||||
|
file_changed = True
|
||||||
|
stats = process_map_table(f, line, output)
|
||||||
|
if print_stats:
|
||||||
|
print(' [%s: size=%d, load=%.2f, avg_lookups=%.1f]' % stats)
|
||||||
|
else:
|
||||||
|
output.append(line)
|
||||||
|
|
||||||
|
if file_changed:
|
||||||
|
if print_debug:
|
||||||
|
print(' modifying static maps in', output[0].strip())
|
||||||
|
with open(filename, 'wt') as f:
|
||||||
|
for line in output:
|
||||||
|
f.write(line)
|
||||||
|
|
||||||
|
def main():
|
||||||
|
# run actual C compiler
|
||||||
|
# need to quote args that have special characters in them
|
||||||
|
def quote(s):
|
||||||
|
if s.find('<') != -1 or s.find('>') != -1:
|
||||||
|
return "'" + s + "'"
|
||||||
|
else:
|
||||||
|
return s
|
||||||
|
ret = os.system(cc1_path + ' ' + ' '.join(quote(s) for s in sys.argv[1:]))
|
||||||
|
if ret != 0:
|
||||||
|
ret = (ret & 0x7f) or 127 # make it in range 0-127, but non-zero
|
||||||
|
sys.exit(ret)
|
||||||
|
|
||||||
|
if sys.argv[1] == '-E':
|
||||||
|
# CPP has been run, now do our processing stage
|
||||||
|
for i, arg in enumerate(sys.argv):
|
||||||
|
if arg == '-o':
|
||||||
|
return process_file(sys.argv[i + 1])
|
||||||
|
|
||||||
|
print('%s: could not find "-o" option' % (sys.argv[0],))
|
||||||
|
sys.exit(1)
|
||||||
|
elif sys.argv[1] == '-fpreprocessed':
|
||||||
|
# compiler has been run, nothing more to do
|
||||||
|
return
|
||||||
|
else:
|
||||||
|
# unknown processing stage
|
||||||
|
print('%s: unknown first option "%s"' % (sys.argv[0], sys.argv[1]))
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
Loading…
Reference in New Issue
Block a user