From 4f27337207a74351a4d85016428df815896c0efa Mon Sep 17 00:00:00 2001 From: Jeff Epler Date: Thu, 2 Jun 2022 21:24:56 +0200 Subject: [PATCH] Only check the savings if a word occurs at least twice Profiling shows that `est_net_savings` is one of the highest costs of the whole process. Approximately, you can save storage only if a word appears more than once, and doing this greatly reduces the number of `est_net_savings` calls. Locally, it reduces the time for this specific build step by 50% on ports/unix coverage build, without affecting the size of the generated binary. --- py/makeqstrdata.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/py/makeqstrdata.py b/py/makeqstrdata.py index 74ad78c47d..7222ded805 100644 --- a/py/makeqstrdata.py +++ b/py/makeqstrdata.py @@ -400,7 +400,8 @@ def compute_huffman_coding(translations, compression_filename): # words[] array. scores = sorted( - ((s, -est_net_savings(s, occ)) for (s, occ) in counter.items()), key=lambda x: x[1] + ((s, -est_net_savings(s, occ)) for (s, occ) in counter.items() if occ > 1), + key=lambda x: x[1], ) # Pick the one with the highest score. The score must be negative.