py: Support unicode (utf-8 encoded) identifiers in Python source.
Enabled simply by making the identifier lexing code 8-bit clean.
This commit is contained in:
parent
6e56bb623c
commit
7ed58cb663
11
py/lexer.c
11
py/lexer.c
@ -112,12 +112,11 @@ STATIC bool is_following_odigit(mp_lexer_t *lex) {
|
|||||||
return lex->chr1 >= '0' && lex->chr1 <= '7';
|
return lex->chr1 >= '0' && lex->chr1 <= '7';
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO UNICODE include unicode characters in definition of identifiers
|
// to easily parse utf-8 identifiers we allow any raw byte with high bit set
|
||||||
STATIC bool is_head_of_identifier(mp_lexer_t *lex) {
|
STATIC bool is_head_of_identifier(mp_lexer_t *lex) {
|
||||||
return is_letter(lex) || lex->chr0 == '_';
|
return is_letter(lex) || lex->chr0 == '_' || lex->chr0 >= 0x80;
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO UNICODE include unicode characters in definition of identifiers
|
|
||||||
STATIC bool is_tail_of_identifier(mp_lexer_t *lex) {
|
STATIC bool is_tail_of_identifier(mp_lexer_t *lex) {
|
||||||
return is_head_of_identifier(lex) || is_digit(lex);
|
return is_head_of_identifier(lex) || is_digit(lex);
|
||||||
}
|
}
|
||||||
@ -523,13 +522,13 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, bool first_token) {
|
|||||||
} else if (is_head_of_identifier(lex)) {
|
} else if (is_head_of_identifier(lex)) {
|
||||||
lex->tok_kind = MP_TOKEN_NAME;
|
lex->tok_kind = MP_TOKEN_NAME;
|
||||||
|
|
||||||
// get first char
|
// get first char (add as byte to remain 8-bit clean and support utf-8)
|
||||||
vstr_add_char(&lex->vstr, CUR_CHAR(lex));
|
vstr_add_byte(&lex->vstr, CUR_CHAR(lex));
|
||||||
next_char(lex);
|
next_char(lex);
|
||||||
|
|
||||||
// get tail chars
|
// get tail chars
|
||||||
while (!is_end(lex) && is_tail_of_identifier(lex)) {
|
while (!is_end(lex) && is_tail_of_identifier(lex)) {
|
||||||
vstr_add_char(&lex->vstr, CUR_CHAR(lex));
|
vstr_add_byte(&lex->vstr, CUR_CHAR(lex));
|
||||||
next_char(lex);
|
next_char(lex);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
27
tests/unicode/unicode_id.py
Normal file
27
tests/unicode/unicode_id.py
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
# test unicode in identifiers
|
||||||
|
|
||||||
|
# comment
|
||||||
|
# αβγδϵφζ
|
||||||
|
|
||||||
|
# global identifiers
|
||||||
|
α = 1
|
||||||
|
αβγ = 2
|
||||||
|
bβ = 3
|
||||||
|
βb = 4
|
||||||
|
print(α, αβγ, bβ, βb)
|
||||||
|
|
||||||
|
# function, argument, local identifiers
|
||||||
|
def α(β, γ):
|
||||||
|
δ = β + γ
|
||||||
|
print(β, γ, δ)
|
||||||
|
α(1, 2)
|
||||||
|
|
||||||
|
# class, method identifiers
|
||||||
|
class φ:
|
||||||
|
def __init__(self):
|
||||||
|
pass
|
||||||
|
def δ(self, ϵ):
|
||||||
|
print(ϵ)
|
||||||
|
zζzζz = φ()
|
||||||
|
if hasattr(zζzζz, "δ"):
|
||||||
|
zζzζz.δ(ϵ=123)
|
Loading…
Reference in New Issue
Block a user