diff --git a/py/lexer.c b/py/lexer.c index 6a3fa656b1..5c942f9344 100644 --- a/py/lexer.c +++ b/py/lexer.c @@ -25,6 +25,7 @@ */ #include +#include #include #include "py/mpstate.h" @@ -39,19 +40,6 @@ // TODO seems that CPython allows NULL byte in the input stream // don't know if that's intentional or not, but we don't allow it -// TODO replace with a call to a standard function -STATIC bool str_strn_equal(const char *str, const char *strn, mp_uint_t len) { - mp_uint_t i = 0; - - while (i < len && *str == *strn) { - ++i; - ++str; - ++strn; - } - - return i == len && *str == 0; -} - #define MP_LEXER_EOF ((unichar)MP_READER_EOF) #define CUR_CHAR(lex) ((lex)->chr0) @@ -225,10 +213,12 @@ STATIC const uint8_t tok_enc_kind[] = { }; // must have the same order as enum in lexer.h +// must be sorted according to strcmp STATIC const char *const tok_kw[] = { "False", "None", "True", + "__debug__", "and", "as", "assert", @@ -263,7 +253,6 @@ STATIC const char *const tok_kw[] = { "while", "with", "yield", - "__debug__", }; // This is called with CUR_CHAR() before first hex digit, and should return with @@ -531,16 +520,18 @@ void mp_lexer_to_next(mp_lexer_t *lex) { // We also check for __debug__ here and convert it to its value. This is // so the parser gives a syntax error on, eg, x.__debug__. Otherwise, we // need to check for this special token in many places in the compiler. - // TODO improve speed of these string comparisons + const char *s = vstr_null_terminated_str(&lex->vstr); for (size_t i = 0; i < MP_ARRAY_SIZE(tok_kw); i++) { - if (str_strn_equal(tok_kw[i], lex->vstr.buf, lex->vstr.len)) { - if (i == MP_ARRAY_SIZE(tok_kw) - 1) { - // tok_kw[MP_ARRAY_SIZE(tok_kw) - 1] == "__debug__" + int cmp = strcmp(s, tok_kw[i]); + if (cmp == 0) { + lex->tok_kind = MP_TOKEN_KW_FALSE + i; + if (lex->tok_kind == MP_TOKEN_KW___DEBUG__) { lex->tok_kind = (MP_STATE_VM(mp_optimise_value) == 0 ? MP_TOKEN_KW_TRUE : MP_TOKEN_KW_FALSE); - } else { - lex->tok_kind = MP_TOKEN_KW_FALSE + i; } break; + } else if (cmp < 0) { + // Table is sorted and comparison was less-than, so stop searching + break; } } diff --git a/py/lexer.h b/py/lexer.h index 32aef96266..d407192856 100644 --- a/py/lexer.h +++ b/py/lexer.h @@ -61,6 +61,7 @@ typedef enum _mp_token_kind_t { MP_TOKEN_KW_FALSE, // 14 MP_TOKEN_KW_NONE, MP_TOKEN_KW_TRUE, + MP_TOKEN_KW___DEBUG__, MP_TOKEN_KW_AND, MP_TOKEN_KW_AS, MP_TOKEN_KW_ASSERT, @@ -71,7 +72,7 @@ typedef enum _mp_token_kind_t { MP_TOKEN_KW_BREAK, MP_TOKEN_KW_CLASS, MP_TOKEN_KW_CONTINUE, - MP_TOKEN_KW_DEF, // 23 + MP_TOKEN_KW_DEF, MP_TOKEN_KW_DEL, MP_TOKEN_KW_ELIF, MP_TOKEN_KW_ELSE, @@ -81,7 +82,7 @@ typedef enum _mp_token_kind_t { MP_TOKEN_KW_FROM, MP_TOKEN_KW_GLOBAL, MP_TOKEN_KW_IF, - MP_TOKEN_KW_IMPORT, // 33 + MP_TOKEN_KW_IMPORT, MP_TOKEN_KW_IN, MP_TOKEN_KW_IS, MP_TOKEN_KW_LAMBDA, @@ -91,12 +92,12 @@ typedef enum _mp_token_kind_t { MP_TOKEN_KW_PASS, MP_TOKEN_KW_RAISE, MP_TOKEN_KW_RETURN, - MP_TOKEN_KW_TRY, // 43 + MP_TOKEN_KW_TRY, MP_TOKEN_KW_WHILE, MP_TOKEN_KW_WITH, MP_TOKEN_KW_YIELD, - MP_TOKEN_OP_PLUS, // 47 + MP_TOKEN_OP_PLUS, MP_TOKEN_OP_MINUS, MP_TOKEN_OP_STAR, MP_TOKEN_OP_DBL_STAR, @@ -106,7 +107,7 @@ typedef enum _mp_token_kind_t { MP_TOKEN_OP_LESS, MP_TOKEN_OP_DBL_LESS, MP_TOKEN_OP_MORE, - MP_TOKEN_OP_DBL_MORE, // 57 + MP_TOKEN_OP_DBL_MORE, MP_TOKEN_OP_AMPERSAND, MP_TOKEN_OP_PIPE, MP_TOKEN_OP_CARET, @@ -116,7 +117,7 @@ typedef enum _mp_token_kind_t { MP_TOKEN_OP_DBL_EQUAL, MP_TOKEN_OP_NOT_EQUAL, - MP_TOKEN_DEL_PAREN_OPEN, // 66 + MP_TOKEN_DEL_PAREN_OPEN, MP_TOKEN_DEL_PAREN_CLOSE, MP_TOKEN_DEL_BRACKET_OPEN, MP_TOKEN_DEL_BRACKET_CLOSE, @@ -126,7 +127,7 @@ typedef enum _mp_token_kind_t { MP_TOKEN_DEL_COLON, MP_TOKEN_DEL_PERIOD, MP_TOKEN_DEL_SEMICOLON, - MP_TOKEN_DEL_AT, // 76 + MP_TOKEN_DEL_AT, MP_TOKEN_DEL_EQUAL, MP_TOKEN_DEL_PLUS_EQUAL, MP_TOKEN_DEL_MINUS_EQUAL, @@ -136,7 +137,7 @@ typedef enum _mp_token_kind_t { MP_TOKEN_DEL_PERCENT_EQUAL, MP_TOKEN_DEL_AMPERSAND_EQUAL, MP_TOKEN_DEL_PIPE_EQUAL, - MP_TOKEN_DEL_CARET_EQUAL, // 86 + MP_TOKEN_DEL_CARET_EQUAL, MP_TOKEN_DEL_DBL_MORE_EQUAL, MP_TOKEN_DEL_DBL_LESS_EQUAL, MP_TOKEN_DEL_DBL_STAR_EQUAL,