From 8836198ff1d03254f8630703e0c8c941c0f733b7 Mon Sep 17 00:00:00 2001 From: Jeff Epler Date: Wed, 3 Feb 2021 17:18:47 -0600 Subject: [PATCH] TextSplitter: don't mutate 'words' I was puzzled by why the dictionary words were sorted by length. It was because TextSplitter sorted its parameter, instead of a copy. This doesn't affect encoding size, but does affect the encoding NUMBER of the found words. We'll deliberately restore sorting by length next, for other reasons, but not by spooky action. --- py/makeqstrdata.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/py/makeqstrdata.py b/py/makeqstrdata.py index dc004d2df3..bbd298e931 100644 --- a/py/makeqstrdata.py +++ b/py/makeqstrdata.py @@ -280,7 +280,7 @@ def translate(translation_file, i18ns): class TextSplitter: def __init__(self, words): - words.sort(key=lambda x: len(x), reverse=True) + words = sorted(words, key=lambda x: len(x), reverse=True) self.words = set(words) if words: pat = "|".join(re.escape(w) for w in words) + "|."