tools: Add C middle-processor to make builtin tables proper hash tables.
This commit is contained in:
parent
521759ee18
commit
4bd95f8b44
262
tools/cc1
Executable file
262
tools/cc1
Executable file
@ -0,0 +1,262 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
"""
|
||||
This is a middle-processor for MicroPython source files. It takes the output
|
||||
of the C preprocessor, has the option to change it, then feeds this into the
|
||||
C compiler.
|
||||
|
||||
It currently has the ability to reorder static hash tables so they are actually
|
||||
hashed, resulting in faster lookup times at runtime.
|
||||
|
||||
To use, configure the Python variables below, and add the following line to the
|
||||
Makefile:
|
||||
|
||||
CFLAGS += -no-integrated-cpp -B$(shell pwd)/../tools
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
import re
|
||||
|
||||
################################################################################
|
||||
# these are the configuration variables
|
||||
# TODO somehow make them externally configurable
|
||||
|
||||
# this is the path to the true C compiler
|
||||
cc1_path = '/usr/lib/gcc/x86_64-unknown-linux-gnu/5.3.0/cc1'
|
||||
#cc1_path = '/usr/lib/gcc/arm-none-eabi/5.3.0/cc1'
|
||||
|
||||
# this must be the same as MICROPY_QSTR_BYTES_IN_HASH
|
||||
bytes_in_qstr_hash = 2
|
||||
|
||||
# this must be 1 or more (can be a decimal)
|
||||
# larger uses more code size but yields faster lookups
|
||||
table_size_mult = 1
|
||||
|
||||
# these control output during processing
|
||||
print_stats = True
|
||||
print_debug = False
|
||||
|
||||
# end configuration variables
|
||||
################################################################################
|
||||
|
||||
# precompile regexs
|
||||
re_preproc_line = re.compile(r'# [0-9]+ ')
|
||||
re_map_entry = re.compile(r'\{.+?\(MP_QSTR_([A-Za-z0-9_]+)\).+\},')
|
||||
re_mp_obj_dict_t = re.compile(r'(?P<head>(static )?const mp_obj_dict_t (?P<id>[a-z0-9_]+) = \{ \.base = \{&mp_type_dict\}, \.map = \{ \.all_keys_are_qstrs = 1, \.is_fixed = 1, \.is_ordered = )1(?P<tail>, \.used = .+ };)$')
|
||||
re_mp_map_t = re.compile(r'(?P<head>(static )?const mp_map_t (?P<id>[a-z0-9_]+) = \{ \.all_keys_are_qstrs = 1, \.is_fixed = 1, \.is_ordered = )1(?P<tail>, \.used = .+ };)$')
|
||||
re_mp_rom_map_elem_t = re.compile(r'static const mp_rom_map_elem_t [a-z_0-9]+\[\] = {$')
|
||||
|
||||
# this must match the equivalent function in qstr.c
|
||||
def compute_hash(qstr):
|
||||
hash = 5381
|
||||
for char in qstr:
|
||||
hash = (hash * 33) ^ ord(char)
|
||||
# Make sure that valid hash is never zero, zero means "hash not computed"
|
||||
return (hash & ((1 << (8 * bytes_in_qstr_hash)) - 1)) or 1
|
||||
|
||||
# this algo must match the equivalent in map.c
|
||||
def hash_insert(map, key, value):
|
||||
hash = compute_hash(key)
|
||||
pos = hash % len(map)
|
||||
start_pos = pos
|
||||
if print_debug:
|
||||
print(' insert %s: start at %u/%u -- ' % (key, pos, len(map)), end='')
|
||||
while True:
|
||||
if map[pos] is None:
|
||||
# found empty slot, so key is not in table
|
||||
if print_debug:
|
||||
print('put at %u' % pos)
|
||||
map[pos] = (key, value)
|
||||
return
|
||||
else:
|
||||
# not yet found, keep searching
|
||||
if map[pos][0] == key:
|
||||
raise AssertionError("duplicate key '%s'" % (key,))
|
||||
pos = (pos + 1) % len(map)
|
||||
assert pos != start_pos
|
||||
|
||||
def hash_find(map, key):
|
||||
hash = compute_hash(key)
|
||||
pos = hash % len(map)
|
||||
start_pos = pos
|
||||
attempts = 0
|
||||
while True:
|
||||
attempts += 1
|
||||
if map[pos] is None:
|
||||
return attempts, None
|
||||
elif map[pos][0] == key:
|
||||
return attempts, map[pos][1]
|
||||
else:
|
||||
pos = (pos + 1) % len(map)
|
||||
if pos == start_pos:
|
||||
return attempts, None
|
||||
|
||||
def process_map_table(file, line, output):
|
||||
output.append(line)
|
||||
|
||||
# consume all lines that are entries of the table and concat them
|
||||
# (we do it this way because there can be multiple entries on one line)
|
||||
table_contents = []
|
||||
while True:
|
||||
line = file.readline()
|
||||
if len(line) == 0:
|
||||
print('unexpected end of input')
|
||||
sys.exit(1)
|
||||
line = line.strip()
|
||||
if len(line) == 0:
|
||||
# empty line
|
||||
continue
|
||||
if re_preproc_line.match(line):
|
||||
# preprocessor line number comment
|
||||
continue
|
||||
if line == '};':
|
||||
# end of table (we assume it appears on a single line)
|
||||
break
|
||||
table_contents.append(line)
|
||||
|
||||
# make combined string of entries
|
||||
entries_str = ''.join(table_contents)
|
||||
|
||||
# split into individual entries
|
||||
entries = []
|
||||
while entries_str:
|
||||
# look for single entry, by matching nested braces
|
||||
match = None
|
||||
if entries_str[0] == '{':
|
||||
nested_braces = 0
|
||||
for i in range(len(entries_str)):
|
||||
if entries_str[i] == '{':
|
||||
nested_braces += 1
|
||||
elif entries_str[i] == '}':
|
||||
nested_braces -= 1
|
||||
if nested_braces == 0:
|
||||
match = re_map_entry.match(entries_str[:i + 2])
|
||||
break
|
||||
|
||||
if not match:
|
||||
print('unknown line in table:', entries_str)
|
||||
sys.exit(1)
|
||||
|
||||
# extract single entry
|
||||
line = match.group(0)
|
||||
qstr = match.group(1)
|
||||
entries_str = entries_str[len(line):].lstrip()
|
||||
|
||||
# add the qstr and the whole line to list of all entries
|
||||
entries.append((qstr, line))
|
||||
|
||||
# sort entries so hash table construction is deterministic
|
||||
entries.sort()
|
||||
|
||||
# create hash table
|
||||
map = [None] * int(len(entries) * table_size_mult)
|
||||
for qstr, line in entries:
|
||||
# We assume that qstr does not have any escape sequences in it.
|
||||
# This is reasonably safe, since keys in a module or class dict
|
||||
# should be standard identifiers.
|
||||
# TODO verify this and raise an error if escape sequence found
|
||||
hash_insert(map, qstr, line)
|
||||
|
||||
# compute statistics
|
||||
total_attempts = 0
|
||||
for qstr, _ in entries:
|
||||
attempts, line = hash_find(map, qstr)
|
||||
assert line is not None
|
||||
if print_debug:
|
||||
print(' %s lookup took %u attempts' % (qstr, attempts))
|
||||
total_attempts += attempts
|
||||
if len(entries):
|
||||
stats = len(map), len(entries) / len(map), total_attempts / len(entries)
|
||||
else:
|
||||
stats = 0, 0, 0
|
||||
if print_debug:
|
||||
print(' table stats: size=%d, load=%.2f, avg_lookups=%.1f' % stats)
|
||||
|
||||
# output hash table
|
||||
for row in map:
|
||||
if row is None:
|
||||
output.append('{ 0, 0 },\n')
|
||||
else:
|
||||
output.append(row[1] + '\n')
|
||||
output.append('};\n')
|
||||
|
||||
# skip to next non-blank line
|
||||
while True:
|
||||
line = file.readline()
|
||||
if len(line) == 0:
|
||||
print('unexpected end of input')
|
||||
sys.exit(1)
|
||||
line = line.strip()
|
||||
if len(line) == 0:
|
||||
continue
|
||||
break
|
||||
|
||||
# transform the is_ordered param from 1 to 0
|
||||
match = re_mp_obj_dict_t.match(line)
|
||||
if match is None:
|
||||
match = re_mp_map_t.match(line)
|
||||
if match is None:
|
||||
print('expecting mp_obj_dict_t or mp_map_t definition')
|
||||
print(output[0])
|
||||
print(line)
|
||||
sys.exit(1)
|
||||
line = match.group('head') + '0' + match.group('tail') + '\n'
|
||||
output.append(line)
|
||||
|
||||
return (match.group('id'),) + stats
|
||||
|
||||
def process_file(filename):
|
||||
output = []
|
||||
file_changed = False
|
||||
with open(filename, 'rt') as f:
|
||||
while True:
|
||||
line = f.readline()
|
||||
if not line:
|
||||
break
|
||||
if re_mp_rom_map_elem_t.match(line):
|
||||
file_changed = True
|
||||
stats = process_map_table(f, line, output)
|
||||
if print_stats:
|
||||
print(' [%s: size=%d, load=%.2f, avg_lookups=%.1f]' % stats)
|
||||
else:
|
||||
output.append(line)
|
||||
|
||||
if file_changed:
|
||||
if print_debug:
|
||||
print(' modifying static maps in', output[0].strip())
|
||||
with open(filename, 'wt') as f:
|
||||
for line in output:
|
||||
f.write(line)
|
||||
|
||||
def main():
|
||||
# run actual C compiler
|
||||
# need to quote args that have special characters in them
|
||||
def quote(s):
|
||||
if s.find('<') != -1 or s.find('>') != -1:
|
||||
return "'" + s + "'"
|
||||
else:
|
||||
return s
|
||||
ret = os.system(cc1_path + ' ' + ' '.join(quote(s) for s in sys.argv[1:]))
|
||||
if ret != 0:
|
||||
ret = (ret & 0x7f) or 127 # make it in range 0-127, but non-zero
|
||||
sys.exit(ret)
|
||||
|
||||
if sys.argv[1] == '-E':
|
||||
# CPP has been run, now do our processing stage
|
||||
for i, arg in enumerate(sys.argv):
|
||||
if arg == '-o':
|
||||
return process_file(sys.argv[i + 1])
|
||||
|
||||
print('%s: could not find "-o" option' % (sys.argv[0],))
|
||||
sys.exit(1)
|
||||
elif sys.argv[1] == '-fpreprocessed':
|
||||
# compiler has been run, nothing more to do
|
||||
return
|
||||
else:
|
||||
# unknown processing stage
|
||||
print('%s: unknown first option "%s"' % (sys.argv[0], sys.argv[1]))
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
Loading…
Reference in New Issue
Block a user