From a91f41407bae9d50213ad9dcae5c21fc7551750f Mon Sep 17 00:00:00 2001 From: Damien George Date: Thu, 10 Apr 2014 11:30:55 +0100 Subject: [PATCH] py, lexer: Fix parsing of raw strings (allow escaping of quote). --- py/lexer.c | 83 +++++++++++++++++++++++++++++------------------------- 1 file changed, 44 insertions(+), 39 deletions(-) diff --git a/py/lexer.c b/py/lexer.c index 58d54b6980..3487e69e8a 100644 --- a/py/lexer.c +++ b/py/lexer.c @@ -455,50 +455,55 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs vstr_add_char(&lex->vstr, CUR_CHAR(lex)); } else { n_closing = 0; - if (!is_raw && is_char(lex, '\\')) { + if (is_char(lex, '\\')) { next_char(lex); unichar c = CUR_CHAR(lex); - switch (c) { - case MP_LEXER_CHAR_EOF: break; // TODO a proper error message? - case '\n': c = MP_LEXER_CHAR_EOF; break; // TODO check this works correctly (we are supposed to ignore it - case '\\': break; - case '\'': break; - case '"': break; - case 'a': c = 0x07; break; - case 'b': c = 0x08; break; - case 't': c = 0x09; break; - case 'n': c = 0x0a; break; - case 'v': c = 0x0b; break; - case 'f': c = 0x0c; break; - case 'r': c = 0x0d; break; - case 'x': - { - uint num = 0; - if (!get_hex(lex, 2, &num)) { - // TODO error message - assert(0); - } - c = num; - break; - } - case 'N': break; // TODO \N{name} only in strings - case 'u': break; // TODO \uxxxx only in strings - case 'U': break; // TODO \Uxxxxxxxx only in strings - default: - if (c >= '0' && c <= '7') { - // Octal sequence, 1-3 chars - int digits = 3; - int num = c - '0'; - while (is_following_odigit(lex) && --digits != 0) { - next_char(lex); - num = num * 8 + (CUR_CHAR(lex) - '0'); + if (is_raw) { + // raw strings allow escaping of quotes, but the backslash is also emitted + vstr_add_char(&lex->vstr, '\\'); + } else { + switch (c) { + case MP_LEXER_CHAR_EOF: break; // TODO a proper error message? + case '\n': c = MP_LEXER_CHAR_EOF; break; // TODO check this works correctly (we are supposed to ignore it + case '\\': break; + case '\'': break; + case '"': break; + case 'a': c = 0x07; break; + case 'b': c = 0x08; break; + case 't': c = 0x09; break; + case 'n': c = 0x0a; break; + case 'v': c = 0x0b; break; + case 'f': c = 0x0c; break; + case 'r': c = 0x0d; break; + case 'x': + { + uint num = 0; + if (!get_hex(lex, 2, &num)) { + // TODO error message + assert(0); } c = num; - } else { - // unrecognised escape character; CPython lets this through verbatim as '\' and then the character - vstr_add_char(&lex->vstr, '\\'); + break; } - break; + case 'N': break; // TODO \N{name} only in strings + case 'u': break; // TODO \uxxxx only in strings + case 'U': break; // TODO \Uxxxxxxxx only in strings + default: + if (c >= '0' && c <= '7') { + // Octal sequence, 1-3 chars + int digits = 3; + int num = c - '0'; + while (is_following_odigit(lex) && --digits != 0) { + next_char(lex); + num = num * 8 + (CUR_CHAR(lex) - '0'); + } + c = num; + } else { + // unrecognised escape character; CPython lets this through verbatim as '\' and then the character + vstr_add_char(&lex->vstr, '\\'); + } + break; + } } if (c != MP_LEXER_CHAR_EOF) { vstr_add_char(&lex->vstr, c);