From 0318eb359fbeb91c6b37ed2050e57711ec2740bc Mon Sep 17 00:00:00 2001 From: Jeff Epler Date: Mon, 21 Sep 2020 10:02:27 -0500 Subject: [PATCH] makeqstrdata: Work around python3.6 compatibility problem Discord user Folknology encountered a problem building with Python 3.6.9, `TypeError: ord() expected a character, but string of length 0 found`. I was able to reproduce the problem using Python3.5*, and discovered that the meaning of the regular expression `"|."` had changed in 3.7. Before, ``` >>> [m.group(0) for m in re.finditer("|.", "hello")] ['', '', '', '', '', ''] ``` After: ``` >>> [m.group(0) for m in re.finditer("|.", "hello")] ['', 'h', '', 'e', '', 'l', '', 'l', '', 'o', ''] ``` Check if `words` is empty and if so use `"."` as the regular expression instead. This gives the same result on both versions: ``` ['h', 'e', 'l', 'l', 'o'] ``` and fixes the generation of the huffman dictionary. Folknology verified that this fix worked for them. * I could easily install 3.5 but not 3.6. 3.5 reproduced the same problem --- py/makeqstrdata.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/py/makeqstrdata.py b/py/makeqstrdata.py index 96e3956b42..b4f4f1b035 100644 --- a/py/makeqstrdata.py +++ b/py/makeqstrdata.py @@ -109,7 +109,11 @@ class TextSplitter: def __init__(self, words): words.sort(key=lambda x: len(x), reverse=True) self.words = set(words) - self.pat = re.compile("|".join(re.escape(w) for w in words) + "|.", flags=re.DOTALL) + if words: + pat = "|".join(re.escape(w) for w in words) + "|." + else: + pat = "." + self.pat = re.compile(pat, flags=re.DOTALL) def iter_words(self, text): s = []