f-strings: Make optional, defaulting to !CIRCUITPY_MINIMAL_BUILD
This should reclaim *most* code space added to handle f-strings. However, there may be some small code growth as parse_string_literal takes a new parameter (which will always be 0, so hopefully the optimizer eliminates it)
This commit is contained in:
parent
32647cd9b4
commit
473e9c5ffb
@ -187,6 +187,9 @@ typedef long mp_off_t;
|
|||||||
#if !defined(MICROPY_CPYTHON_COMPAT)
|
#if !defined(MICROPY_CPYTHON_COMPAT)
|
||||||
#define MICROPY_CPYTHON_COMPAT (CIRCUITPY_FULL_BUILD)
|
#define MICROPY_CPYTHON_COMPAT (CIRCUITPY_FULL_BUILD)
|
||||||
#endif
|
#endif
|
||||||
|
#if !defined(MICROPY_COMP_FSTRING_LITERAL)
|
||||||
|
#define MICROPY_COMP_FSTRING_LITERAL (MICROPY_CPYTHON_COMPAT)
|
||||||
|
#endif
|
||||||
#define MICROPY_MODULE_WEAK_LINKS (CIRCUITPY_FULL_BUILD)
|
#define MICROPY_MODULE_WEAK_LINKS (CIRCUITPY_FULL_BUILD)
|
||||||
#define MICROPY_PY_ALL_SPECIAL_METHODS (CIRCUITPY_FULL_BUILD)
|
#define MICROPY_PY_ALL_SPECIAL_METHODS (CIRCUITPY_FULL_BUILD)
|
||||||
#define MICROPY_PY_BUILTINS_COMPLEX (CIRCUITPY_FULL_BUILD)
|
#define MICROPY_PY_BUILTINS_COMPLEX (CIRCUITPY_FULL_BUILD)
|
||||||
|
33
py/lexer.c
33
py/lexer.c
@ -64,9 +64,11 @@ STATIC bool is_char_or3(mp_lexer_t *lex, byte c1, byte c2, byte c3) {
|
|||||||
return lex->chr0 == c1 || lex->chr0 == c2 || lex->chr0 == c3;
|
return lex->chr0 == c1 || lex->chr0 == c2 || lex->chr0 == c3;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if MICROPY_COMP_FSTRING_LITERAL
|
||||||
STATIC bool is_char_or4(mp_lexer_t *lex, byte c1, byte c2, byte c3, byte c4) {
|
STATIC bool is_char_or4(mp_lexer_t *lex, byte c1, byte c2, byte c3, byte c4) {
|
||||||
return lex->chr0 == c1 || lex->chr0 == c2 || lex->chr0 == c3 || lex->chr0 == c4;
|
return lex->chr0 == c1 || lex->chr0 == c2 || lex->chr0 == c3 || lex->chr0 == c4;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
STATIC bool is_char_following(mp_lexer_t *lex, byte c) {
|
STATIC bool is_char_following(mp_lexer_t *lex, byte c) {
|
||||||
return lex->chr1 == c;
|
return lex->chr1 == c;
|
||||||
@ -111,9 +113,13 @@ STATIC bool is_following_odigit(mp_lexer_t *lex) {
|
|||||||
|
|
||||||
STATIC bool is_string_or_bytes(mp_lexer_t *lex) {
|
STATIC bool is_string_or_bytes(mp_lexer_t *lex) {
|
||||||
return is_char_or(lex, '\'', '\"')
|
return is_char_or(lex, '\'', '\"')
|
||||||
|
#if MICROPY_COMP_FSTRING_LITERAL
|
||||||
|| (is_char_or4(lex, 'r', 'u', 'b', 'f') && is_char_following_or(lex, '\'', '\"'))
|
|| (is_char_or4(lex, 'r', 'u', 'b', 'f') && is_char_following_or(lex, '\'', '\"'))
|
||||||
|| ((is_char_and(lex, 'r', 'f') || is_char_and(lex, 'f', 'r'))
|
|| ((is_char_and(lex, 'r', 'f') || is_char_and(lex, 'f', 'r'))
|
||||||
&& is_char_following_following_or(lex, '\'', '\"'))
|
&& is_char_following_following_or(lex, '\'', '\"'))
|
||||||
|
#else
|
||||||
|
|| (is_char_or3(lex, 'r', 'u', 'b') && is_char_following_or(lex, '\'', '\"'))
|
||||||
|
#endif
|
||||||
|| ((is_char_and(lex, 'r', 'b') || is_char_and(lex, 'b', 'r'))
|
|| ((is_char_and(lex, 'r', 'b') || is_char_and(lex, 'b', 'r'))
|
||||||
&& is_char_following_following_or(lex, '\'', '\"'));
|
&& is_char_following_following_or(lex, '\'', '\"'));
|
||||||
}
|
}
|
||||||
@ -127,6 +133,7 @@ STATIC bool is_tail_of_identifier(mp_lexer_t *lex) {
|
|||||||
return is_head_of_identifier(lex) || is_digit(lex);
|
return is_head_of_identifier(lex) || is_digit(lex);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if MICROPY_COMP_FSTRING_LITERAL
|
||||||
STATIC void swap_char_banks(mp_lexer_t *lex) {
|
STATIC void swap_char_banks(mp_lexer_t *lex) {
|
||||||
if (lex->vstr_postfix_processing) {
|
if (lex->vstr_postfix_processing) {
|
||||||
lex->chr3 = lex->chr0;
|
lex->chr3 = lex->chr0;
|
||||||
@ -149,6 +156,7 @@ STATIC void swap_char_banks(mp_lexer_t *lex) {
|
|||||||
lex->vstr_postfix_idx = 0;
|
lex->vstr_postfix_idx = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
STATIC void next_char(mp_lexer_t *lex) {
|
STATIC void next_char(mp_lexer_t *lex) {
|
||||||
if (lex->chr0 == '\n') {
|
if (lex->chr0 == '\n') {
|
||||||
@ -166,13 +174,16 @@ STATIC void next_char(mp_lexer_t *lex) {
|
|||||||
lex->chr0 = lex->chr1;
|
lex->chr0 = lex->chr1;
|
||||||
lex->chr1 = lex->chr2;
|
lex->chr1 = lex->chr2;
|
||||||
|
|
||||||
|
#if MICROPY_COMP_FSTRING_LITERAL
|
||||||
if (lex->vstr_postfix_processing) {
|
if (lex->vstr_postfix_processing) {
|
||||||
if (lex->vstr_postfix_idx == lex->vstr_postfix.len) {
|
if (lex->vstr_postfix_idx == lex->vstr_postfix.len) {
|
||||||
lex->chr2 = '\0';
|
lex->chr2 = '\0';
|
||||||
} else {
|
} else {
|
||||||
lex->chr2 = lex->vstr_postfix.buf[lex->vstr_postfix_idx++];
|
lex->chr2 = lex->vstr_postfix.buf[lex->vstr_postfix_idx++];
|
||||||
}
|
}
|
||||||
} else {
|
} else
|
||||||
|
#endif
|
||||||
|
{
|
||||||
lex->chr2 = lex->reader.readbyte(lex->reader.data);
|
lex->chr2 = lex->reader.readbyte(lex->reader.data);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -190,10 +201,12 @@ STATIC void next_char(mp_lexer_t *lex) {
|
|||||||
lex->chr2 = '\n';
|
lex->chr2 = '\n';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if MICROPY_COMP_FSTRING_LITERAL
|
||||||
if (lex->vstr_postfix_processing && lex->chr0 == '\0') {
|
if (lex->vstr_postfix_processing && lex->chr0 == '\0') {
|
||||||
lex->vstr_postfix_processing = false;
|
lex->vstr_postfix_processing = false;
|
||||||
swap_char_banks(lex);
|
swap_char_banks(lex);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
STATIC void indent_push(mp_lexer_t *lex, size_t indent) {
|
STATIC void indent_push(mp_lexer_t *lex, size_t indent) {
|
||||||
@ -334,8 +347,10 @@ STATIC void parse_string_literal(mp_lexer_t *lex, bool is_raw, bool is_fstring)
|
|||||||
}
|
}
|
||||||
|
|
||||||
size_t n_closing = 0;
|
size_t n_closing = 0;
|
||||||
|
#if MICROPY_COMP_FSTRING_LITERAL
|
||||||
bool in_expression = false;
|
bool in_expression = false;
|
||||||
bool expression_eat = true;
|
bool expression_eat = true;
|
||||||
|
#endif
|
||||||
|
|
||||||
while (!is_end(lex) && (num_quotes > 1 || !is_char(lex, '\n')) && n_closing < num_quotes) {
|
while (!is_end(lex) && (num_quotes > 1 || !is_char(lex, '\n')) && n_closing < num_quotes) {
|
||||||
if (is_char(lex, quote_char)) {
|
if (is_char(lex, quote_char)) {
|
||||||
@ -343,6 +358,7 @@ STATIC void parse_string_literal(mp_lexer_t *lex, bool is_raw, bool is_fstring)
|
|||||||
vstr_add_char(&lex->vstr, CUR_CHAR(lex));
|
vstr_add_char(&lex->vstr, CUR_CHAR(lex));
|
||||||
} else {
|
} else {
|
||||||
n_closing = 0;
|
n_closing = 0;
|
||||||
|
#if MICROPY_COMP_FSTRING_LITERAL
|
||||||
if (is_fstring && is_char(lex, '{')) {
|
if (is_fstring && is_char(lex, '{')) {
|
||||||
vstr_add_char(&lex->vstr, CUR_CHAR(lex));
|
vstr_add_char(&lex->vstr, CUR_CHAR(lex));
|
||||||
in_expression = !in_expression;
|
in_expression = !in_expression;
|
||||||
@ -390,6 +406,7 @@ STATIC void parse_string_literal(mp_lexer_t *lex, bool is_raw, bool is_fstring)
|
|||||||
next_char(lex);
|
next_char(lex);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
if (is_char(lex, '\\')) {
|
if (is_char(lex, '\\')) {
|
||||||
next_char(lex);
|
next_char(lex);
|
||||||
@ -525,12 +542,14 @@ STATIC bool skip_whitespace(mp_lexer_t *lex, bool stop_at_newline) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void mp_lexer_to_next(mp_lexer_t *lex) {
|
void mp_lexer_to_next(mp_lexer_t *lex) {
|
||||||
|
#if MICROPY_COMP_FSTRING_LITERAL
|
||||||
if (lex->vstr_postfix.len && !lex->vstr_postfix_processing) {
|
if (lex->vstr_postfix.len && !lex->vstr_postfix_processing) {
|
||||||
// end format call injection
|
// end format call injection
|
||||||
vstr_add_char(&lex->vstr_postfix, ')');
|
vstr_add_char(&lex->vstr_postfix, ')');
|
||||||
lex->vstr_postfix_processing = true;
|
lex->vstr_postfix_processing = true;
|
||||||
swap_char_banks(lex);
|
swap_char_banks(lex);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
// start new token text
|
// start new token text
|
||||||
vstr_reset(&lex->vstr);
|
vstr_reset(&lex->vstr);
|
||||||
@ -583,13 +602,19 @@ void mp_lexer_to_next(mp_lexer_t *lex) {
|
|||||||
// MP_TOKEN_END is used to indicate that this is the first string token
|
// MP_TOKEN_END is used to indicate that this is the first string token
|
||||||
lex->tok_kind = MP_TOKEN_END;
|
lex->tok_kind = MP_TOKEN_END;
|
||||||
|
|
||||||
|
#if MICROPY_COMP_FSTRING_LITERAL
|
||||||
bool saw_normal = false, saw_fstring = false;
|
bool saw_normal = false, saw_fstring = false;
|
||||||
|
#endif
|
||||||
|
|
||||||
// Loop to accumulate string/bytes literals
|
// Loop to accumulate string/bytes literals
|
||||||
do {
|
do {
|
||||||
// parse type codes
|
// parse type codes
|
||||||
bool is_raw = false;
|
bool is_raw = false;
|
||||||
|
#if MICROPY_COMP_FSTRING_LITERAL
|
||||||
bool is_fstring = false;
|
bool is_fstring = false;
|
||||||
|
#else
|
||||||
|
const bool is_fstring = false;
|
||||||
|
#endif
|
||||||
mp_token_kind_t kind = MP_TOKEN_STRING;
|
mp_token_kind_t kind = MP_TOKEN_STRING;
|
||||||
int n_char = 0;
|
int n_char = 0;
|
||||||
if (is_char(lex, 'u')) {
|
if (is_char(lex, 'u')) {
|
||||||
@ -608,6 +633,7 @@ void mp_lexer_to_next(mp_lexer_t *lex) {
|
|||||||
kind = MP_TOKEN_BYTES;
|
kind = MP_TOKEN_BYTES;
|
||||||
n_char = 2;
|
n_char = 2;
|
||||||
}
|
}
|
||||||
|
#if MICROPY_COMP_FSTRING_LITERAL
|
||||||
if (is_char_following(lex, 'f')) {
|
if (is_char_following(lex, 'f')) {
|
||||||
lex->tok_kind = MP_TOKEN_FSTRING_RAW;
|
lex->tok_kind = MP_TOKEN_FSTRING_RAW;
|
||||||
break;
|
break;
|
||||||
@ -619,8 +645,10 @@ void mp_lexer_to_next(mp_lexer_t *lex) {
|
|||||||
}
|
}
|
||||||
n_char = 1;
|
n_char = 1;
|
||||||
is_fstring = true;
|
is_fstring = true;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if MICROPY_COMP_FSTRING_LITERAL
|
||||||
if (is_fstring) {
|
if (is_fstring) {
|
||||||
saw_fstring = true;
|
saw_fstring = true;
|
||||||
} else {
|
} else {
|
||||||
@ -631,6 +659,7 @@ void mp_lexer_to_next(mp_lexer_t *lex) {
|
|||||||
// Can't concatenate f-string with normal string
|
// Can't concatenate f-string with normal string
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
// Set or check token kind
|
// Set or check token kind
|
||||||
if (lex->tok_kind == MP_TOKEN_END) {
|
if (lex->tok_kind == MP_TOKEN_END) {
|
||||||
@ -808,7 +837,9 @@ mp_lexer_t *mp_lexer_new(qstr src_name, mp_reader_t reader) {
|
|||||||
lex->num_indent_level = 1;
|
lex->num_indent_level = 1;
|
||||||
lex->indent_level = m_new(uint16_t, lex->alloc_indent_level);
|
lex->indent_level = m_new(uint16_t, lex->alloc_indent_level);
|
||||||
vstr_init(&lex->vstr, 32);
|
vstr_init(&lex->vstr, 32);
|
||||||
|
#if MICROPY_COMP_FSTRING_LITERAL
|
||||||
vstr_init(&lex->vstr_postfix, 0);
|
vstr_init(&lex->vstr_postfix, 0);
|
||||||
|
#endif
|
||||||
|
|
||||||
// store sentinel for first indentation level
|
// store sentinel for first indentation level
|
||||||
lex->indent_level[0] = 0;
|
lex->indent_level[0] = 0;
|
||||||
|
@ -44,12 +44,14 @@ typedef enum _mp_token_kind_t {
|
|||||||
MP_TOKEN_INVALID,
|
MP_TOKEN_INVALID,
|
||||||
MP_TOKEN_DEDENT_MISMATCH,
|
MP_TOKEN_DEDENT_MISMATCH,
|
||||||
MP_TOKEN_LONELY_STRING_OPEN,
|
MP_TOKEN_LONELY_STRING_OPEN,
|
||||||
|
#if MICROPY_COMP_FSTRING_LITERAL
|
||||||
MP_TOKEN_FSTRING_BACKSLASH,
|
MP_TOKEN_FSTRING_BACKSLASH,
|
||||||
MP_TOKEN_FSTRING_COMMENT,
|
MP_TOKEN_FSTRING_COMMENT,
|
||||||
MP_TOKEN_FSTRING_UNCLOSED,
|
MP_TOKEN_FSTRING_UNCLOSED,
|
||||||
MP_TOKEN_FSTRING_UNOPENED,
|
MP_TOKEN_FSTRING_UNOPENED,
|
||||||
MP_TOKEN_FSTRING_EMPTY_EXP,
|
MP_TOKEN_FSTRING_EMPTY_EXP,
|
||||||
MP_TOKEN_FSTRING_RAW,
|
MP_TOKEN_FSTRING_RAW,
|
||||||
|
#endif
|
||||||
|
|
||||||
MP_TOKEN_NEWLINE,
|
MP_TOKEN_NEWLINE,
|
||||||
MP_TOKEN_INDENT,
|
MP_TOKEN_INDENT,
|
||||||
@ -156,7 +158,9 @@ typedef struct _mp_lexer_t {
|
|||||||
mp_reader_t reader; // stream source
|
mp_reader_t reader; // stream source
|
||||||
|
|
||||||
unichar chr0, chr1, chr2; // current cached characters from source
|
unichar chr0, chr1, chr2; // current cached characters from source
|
||||||
|
#if MICROPY_COMP_FSTRING_LITERAL
|
||||||
unichar chr3, chr4, chr5; // current cached characters from alt source
|
unichar chr3, chr4, chr5; // current cached characters from alt source
|
||||||
|
#endif
|
||||||
|
|
||||||
size_t line; // current source line
|
size_t line; // current source line
|
||||||
size_t column; // current source column
|
size_t column; // current source column
|
||||||
@ -172,9 +176,11 @@ typedef struct _mp_lexer_t {
|
|||||||
size_t tok_column; // token source column
|
size_t tok_column; // token source column
|
||||||
mp_token_kind_t tok_kind; // token kind
|
mp_token_kind_t tok_kind; // token kind
|
||||||
vstr_t vstr; // token data
|
vstr_t vstr; // token data
|
||||||
|
#if MICROPY_COMP_FSTRING_LITERAL
|
||||||
vstr_t vstr_postfix; // postfix to apply to string
|
vstr_t vstr_postfix; // postfix to apply to string
|
||||||
bool vstr_postfix_processing;
|
bool vstr_postfix_processing;
|
||||||
uint16_t vstr_postfix_idx;
|
uint16_t vstr_postfix_idx;
|
||||||
|
#endif
|
||||||
} mp_lexer_t;
|
} mp_lexer_t;
|
||||||
|
|
||||||
mp_lexer_t *mp_lexer_new(qstr src_name, mp_reader_t reader);
|
mp_lexer_t *mp_lexer_new(qstr src_name, mp_reader_t reader);
|
||||||
|
@ -377,6 +377,11 @@
|
|||||||
#define MICROPY_COMP_RETURN_IF_EXPR (0)
|
#define MICROPY_COMP_RETURN_IF_EXPR (0)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// Whether to include parsing of f-string literals
|
||||||
|
#ifndef MICROPY_COMP_FSTRING_LITERAL
|
||||||
|
#define MICROPY_COMP_FSTRING_LITERAL (1)
|
||||||
|
#endif
|
||||||
|
|
||||||
/*****************************************************************************/
|
/*****************************************************************************/
|
||||||
/* Internal debugging stuff */
|
/* Internal debugging stuff */
|
||||||
|
|
||||||
|
@ -1178,6 +1178,7 @@ mp_parse_tree_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) {
|
|||||||
exc = mp_obj_new_exception_msg(&mp_type_IndentationError,
|
exc = mp_obj_new_exception_msg(&mp_type_IndentationError,
|
||||||
translate("unindent does not match any outer indentation level"));
|
translate("unindent does not match any outer indentation level"));
|
||||||
break;
|
break;
|
||||||
|
#if MICROPY_COMP_FSTRING_LITERAL
|
||||||
#if MICROPY_ERROR_REPORTING == MICROPY_ERROR_REPORTING_DETAILED
|
#if MICROPY_ERROR_REPORTING == MICROPY_ERROR_REPORTING_DETAILED
|
||||||
case MP_TOKEN_FSTRING_BACKSLASH:
|
case MP_TOKEN_FSTRING_BACKSLASH:
|
||||||
exc = mp_obj_new_exception_msg(&mp_type_SyntaxError,
|
exc = mp_obj_new_exception_msg(&mp_type_SyntaxError,
|
||||||
@ -1213,6 +1214,7 @@ mp_parse_tree_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) {
|
|||||||
exc = mp_obj_new_exception_msg(&mp_type_SyntaxError,
|
exc = mp_obj_new_exception_msg(&mp_type_SyntaxError,
|
||||||
translate("malformed f-string"));
|
translate("malformed f-string"));
|
||||||
break;
|
break;
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
default:
|
default:
|
||||||
exc = mp_obj_new_exception_msg(&mp_type_SyntaxError,
|
exc = mp_obj_new_exception_msg(&mp_type_SyntaxError,
|
||||||
|
Loading…
Reference in New Issue
Block a user