From 473e9c5ffb081ae2750948099ac8288b039d4255 Mon Sep 17 00:00:00 2001 From: Jeff Epler Date: Mon, 9 Mar 2020 09:02:47 -0500 Subject: [PATCH] f-strings: Make optional, defaulting to !CIRCUITPY_MINIMAL_BUILD This should reclaim *most* code space added to handle f-strings. However, there may be some small code growth as parse_string_literal takes a new parameter (which will always be 0, so hopefully the optimizer eliminates it) --- py/circuitpy_mpconfig.h | 3 +++ py/lexer.c | 33 ++++++++++++++++++++++++++++++++- py/lexer.h | 6 ++++++ py/mpconfig.h | 5 +++++ py/parse.c | 2 ++ 5 files changed, 48 insertions(+), 1 deletion(-) diff --git a/py/circuitpy_mpconfig.h b/py/circuitpy_mpconfig.h index 61dd901ed5..acff27bcf5 100644 --- a/py/circuitpy_mpconfig.h +++ b/py/circuitpy_mpconfig.h @@ -187,6 +187,9 @@ typedef long mp_off_t; #if !defined(MICROPY_CPYTHON_COMPAT) #define MICROPY_CPYTHON_COMPAT (CIRCUITPY_FULL_BUILD) #endif +#if !defined(MICROPY_COMP_FSTRING_LITERAL) +#define MICROPY_COMP_FSTRING_LITERAL (MICROPY_CPYTHON_COMPAT) +#endif #define MICROPY_MODULE_WEAK_LINKS (CIRCUITPY_FULL_BUILD) #define MICROPY_PY_ALL_SPECIAL_METHODS (CIRCUITPY_FULL_BUILD) #define MICROPY_PY_BUILTINS_COMPLEX (CIRCUITPY_FULL_BUILD) diff --git a/py/lexer.c b/py/lexer.c index 80f8f043c7..00cd59bcae 100644 --- a/py/lexer.c +++ b/py/lexer.c @@ -64,9 +64,11 @@ STATIC bool is_char_or3(mp_lexer_t *lex, byte c1, byte c2, byte c3) { return lex->chr0 == c1 || lex->chr0 == c2 || lex->chr0 == c3; } +#if MICROPY_COMP_FSTRING_LITERAL STATIC bool is_char_or4(mp_lexer_t *lex, byte c1, byte c2, byte c3, byte c4) { return lex->chr0 == c1 || lex->chr0 == c2 || lex->chr0 == c3 || lex->chr0 == c4; } +#endif STATIC bool is_char_following(mp_lexer_t *lex, byte c) { return lex->chr1 == c; @@ -111,9 +113,13 @@ STATIC bool is_following_odigit(mp_lexer_t *lex) { STATIC bool is_string_or_bytes(mp_lexer_t *lex) { return is_char_or(lex, '\'', '\"') +#if MICROPY_COMP_FSTRING_LITERAL || (is_char_or4(lex, 'r', 'u', 'b', 'f') && is_char_following_or(lex, '\'', '\"')) || ((is_char_and(lex, 'r', 'f') || is_char_and(lex, 'f', 'r')) && is_char_following_following_or(lex, '\'', '\"')) +#else + || (is_char_or3(lex, 'r', 'u', 'b') && is_char_following_or(lex, '\'', '\"')) +#endif || ((is_char_and(lex, 'r', 'b') || is_char_and(lex, 'b', 'r')) && is_char_following_following_or(lex, '\'', '\"')); } @@ -127,6 +133,7 @@ STATIC bool is_tail_of_identifier(mp_lexer_t *lex) { return is_head_of_identifier(lex) || is_digit(lex); } +#if MICROPY_COMP_FSTRING_LITERAL STATIC void swap_char_banks(mp_lexer_t *lex) { if (lex->vstr_postfix_processing) { lex->chr3 = lex->chr0; @@ -149,6 +156,7 @@ STATIC void swap_char_banks(mp_lexer_t *lex) { lex->vstr_postfix_idx = 0; } } +#endif STATIC void next_char(mp_lexer_t *lex) { if (lex->chr0 == '\n') { @@ -166,13 +174,16 @@ STATIC void next_char(mp_lexer_t *lex) { lex->chr0 = lex->chr1; lex->chr1 = lex->chr2; +#if MICROPY_COMP_FSTRING_LITERAL if (lex->vstr_postfix_processing) { if (lex->vstr_postfix_idx == lex->vstr_postfix.len) { lex->chr2 = '\0'; } else { lex->chr2 = lex->vstr_postfix.buf[lex->vstr_postfix_idx++]; } - } else { + } else +#endif + { lex->chr2 = lex->reader.readbyte(lex->reader.data); } @@ -190,10 +201,12 @@ STATIC void next_char(mp_lexer_t *lex) { lex->chr2 = '\n'; } +#if MICROPY_COMP_FSTRING_LITERAL if (lex->vstr_postfix_processing && lex->chr0 == '\0') { lex->vstr_postfix_processing = false; swap_char_banks(lex); } +#endif } STATIC void indent_push(mp_lexer_t *lex, size_t indent) { @@ -334,8 +347,10 @@ STATIC void parse_string_literal(mp_lexer_t *lex, bool is_raw, bool is_fstring) } size_t n_closing = 0; +#if MICROPY_COMP_FSTRING_LITERAL bool in_expression = false; bool expression_eat = true; +#endif while (!is_end(lex) && (num_quotes > 1 || !is_char(lex, '\n')) && n_closing < num_quotes) { if (is_char(lex, quote_char)) { @@ -343,6 +358,7 @@ STATIC void parse_string_literal(mp_lexer_t *lex, bool is_raw, bool is_fstring) vstr_add_char(&lex->vstr, CUR_CHAR(lex)); } else { n_closing = 0; +#if MICROPY_COMP_FSTRING_LITERAL if (is_fstring && is_char(lex, '{')) { vstr_add_char(&lex->vstr, CUR_CHAR(lex)); in_expression = !in_expression; @@ -390,6 +406,7 @@ STATIC void parse_string_literal(mp_lexer_t *lex, bool is_raw, bool is_fstring) next_char(lex); continue; } +#endif if (is_char(lex, '\\')) { next_char(lex); @@ -525,12 +542,14 @@ STATIC bool skip_whitespace(mp_lexer_t *lex, bool stop_at_newline) { } void mp_lexer_to_next(mp_lexer_t *lex) { +#if MICROPY_COMP_FSTRING_LITERAL if (lex->vstr_postfix.len && !lex->vstr_postfix_processing) { // end format call injection vstr_add_char(&lex->vstr_postfix, ')'); lex->vstr_postfix_processing = true; swap_char_banks(lex); } +#endif // start new token text vstr_reset(&lex->vstr); @@ -583,13 +602,19 @@ void mp_lexer_to_next(mp_lexer_t *lex) { // MP_TOKEN_END is used to indicate that this is the first string token lex->tok_kind = MP_TOKEN_END; +#if MICROPY_COMP_FSTRING_LITERAL bool saw_normal = false, saw_fstring = false; +#endif // Loop to accumulate string/bytes literals do { // parse type codes bool is_raw = false; +#if MICROPY_COMP_FSTRING_LITERAL bool is_fstring = false; +#else + const bool is_fstring = false; +#endif mp_token_kind_t kind = MP_TOKEN_STRING; int n_char = 0; if (is_char(lex, 'u')) { @@ -608,6 +633,7 @@ void mp_lexer_to_next(mp_lexer_t *lex) { kind = MP_TOKEN_BYTES; n_char = 2; } +#if MICROPY_COMP_FSTRING_LITERAL if (is_char_following(lex, 'f')) { lex->tok_kind = MP_TOKEN_FSTRING_RAW; break; @@ -619,8 +645,10 @@ void mp_lexer_to_next(mp_lexer_t *lex) { } n_char = 1; is_fstring = true; +#endif } +#if MICROPY_COMP_FSTRING_LITERAL if (is_fstring) { saw_fstring = true; } else { @@ -631,6 +659,7 @@ void mp_lexer_to_next(mp_lexer_t *lex) { // Can't concatenate f-string with normal string break; } +#endif // Set or check token kind if (lex->tok_kind == MP_TOKEN_END) { @@ -808,7 +837,9 @@ mp_lexer_t *mp_lexer_new(qstr src_name, mp_reader_t reader) { lex->num_indent_level = 1; lex->indent_level = m_new(uint16_t, lex->alloc_indent_level); vstr_init(&lex->vstr, 32); +#if MICROPY_COMP_FSTRING_LITERAL vstr_init(&lex->vstr_postfix, 0); +#endif // store sentinel for first indentation level lex->indent_level[0] = 0; diff --git a/py/lexer.h b/py/lexer.h index 7fe271e841..a3eaa2a7e6 100644 --- a/py/lexer.h +++ b/py/lexer.h @@ -44,12 +44,14 @@ typedef enum _mp_token_kind_t { MP_TOKEN_INVALID, MP_TOKEN_DEDENT_MISMATCH, MP_TOKEN_LONELY_STRING_OPEN, +#if MICROPY_COMP_FSTRING_LITERAL MP_TOKEN_FSTRING_BACKSLASH, MP_TOKEN_FSTRING_COMMENT, MP_TOKEN_FSTRING_UNCLOSED, MP_TOKEN_FSTRING_UNOPENED, MP_TOKEN_FSTRING_EMPTY_EXP, MP_TOKEN_FSTRING_RAW, +#endif MP_TOKEN_NEWLINE, MP_TOKEN_INDENT, @@ -156,7 +158,9 @@ typedef struct _mp_lexer_t { mp_reader_t reader; // stream source unichar chr0, chr1, chr2; // current cached characters from source +#if MICROPY_COMP_FSTRING_LITERAL unichar chr3, chr4, chr5; // current cached characters from alt source +#endif size_t line; // current source line size_t column; // current source column @@ -172,9 +176,11 @@ typedef struct _mp_lexer_t { size_t tok_column; // token source column mp_token_kind_t tok_kind; // token kind vstr_t vstr; // token data +#if MICROPY_COMP_FSTRING_LITERAL vstr_t vstr_postfix; // postfix to apply to string bool vstr_postfix_processing; uint16_t vstr_postfix_idx; +#endif } mp_lexer_t; mp_lexer_t *mp_lexer_new(qstr src_name, mp_reader_t reader); diff --git a/py/mpconfig.h b/py/mpconfig.h index 8f78ade5cc..01572f546e 100755 --- a/py/mpconfig.h +++ b/py/mpconfig.h @@ -377,6 +377,11 @@ #define MICROPY_COMP_RETURN_IF_EXPR (0) #endif +// Whether to include parsing of f-string literals +#ifndef MICROPY_COMP_FSTRING_LITERAL +#define MICROPY_COMP_FSTRING_LITERAL (1) +#endif + /*****************************************************************************/ /* Internal debugging stuff */ diff --git a/py/parse.c b/py/parse.c index f86ee25fe0..b8cfda2cb5 100644 --- a/py/parse.c +++ b/py/parse.c @@ -1178,6 +1178,7 @@ mp_parse_tree_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) { exc = mp_obj_new_exception_msg(&mp_type_IndentationError, translate("unindent does not match any outer indentation level")); break; +#if MICROPY_COMP_FSTRING_LITERAL #if MICROPY_ERROR_REPORTING == MICROPY_ERROR_REPORTING_DETAILED case MP_TOKEN_FSTRING_BACKSLASH: exc = mp_obj_new_exception_msg(&mp_type_SyntaxError, @@ -1213,6 +1214,7 @@ mp_parse_tree_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) { exc = mp_obj_new_exception_msg(&mp_type_SyntaxError, translate("malformed f-string")); break; +#endif #endif default: exc = mp_obj_new_exception_msg(&mp_type_SyntaxError,