makeqstrdata: Work around python3.6 compatibility problem
Discord user Folknology encountered a problem building with Python 3.6.9, `TypeError: ord() expected a character, but string of length 0 found`. I was able to reproduce the problem using Python3.5*, and discovered that the meaning of the regular expression `"|."` had changed in 3.7. Before, ``` >>> [m.group(0) for m in re.finditer("|.", "hello")] ['', '', '', '', '', ''] ``` After: ``` >>> [m.group(0) for m in re.finditer("|.", "hello")] ['', 'h', '', 'e', '', 'l', '', 'l', '', 'o', ''] ``` Check if `words` is empty and if so use `"."` as the regular expression instead. This gives the same result on both versions: ``` ['h', 'e', 'l', 'l', 'o'] ``` and fixes the generation of the huffman dictionary. Folknology verified that this fix worked for them. * I could easily install 3.5 but not 3.6. 3.5 reproduced the same problem
This commit is contained in:
parent
8eda9174b2
commit
0318eb359f
@ -109,7 +109,11 @@ class TextSplitter:
|
|||||||
def __init__(self, words):
|
def __init__(self, words):
|
||||||
words.sort(key=lambda x: len(x), reverse=True)
|
words.sort(key=lambda x: len(x), reverse=True)
|
||||||
self.words = set(words)
|
self.words = set(words)
|
||||||
self.pat = re.compile("|".join(re.escape(w) for w in words) + "|.", flags=re.DOTALL)
|
if words:
|
||||||
|
pat = "|".join(re.escape(w) for w in words) + "|."
|
||||||
|
else:
|
||||||
|
pat = "."
|
||||||
|
self.pat = re.compile(pat, flags=re.DOTALL)
|
||||||
|
|
||||||
def iter_words(self, text):
|
def iter_words(self, text):
|
||||||
s = []
|
s = []
|
||||||
|
Loading…
Reference in New Issue
Block a user