Improve indent/dedent error checking and reporting.
This commit is contained in:
parent
ff8ed77cc1
commit
91d387de7d
@ -2192,7 +2192,14 @@ void compile_node(compiler_t *comp, py_parse_node_t pn) {
|
|||||||
case PY_PARSE_NODE_DECIMAL: EMIT(load_const_dec, arg); break;
|
case PY_PARSE_NODE_DECIMAL: EMIT(load_const_dec, arg); break;
|
||||||
case PY_PARSE_NODE_STRING: EMIT(load_const_str, arg, false); break;
|
case PY_PARSE_NODE_STRING: EMIT(load_const_str, arg, false); break;
|
||||||
case PY_PARSE_NODE_BYTES: EMIT(load_const_str, arg, true); break;
|
case PY_PARSE_NODE_BYTES: EMIT(load_const_str, arg, true); break;
|
||||||
case PY_PARSE_NODE_TOKEN: EMIT(load_const_tok, arg); break;
|
case PY_PARSE_NODE_TOKEN:
|
||||||
|
if (arg == PY_TOKEN_NEWLINE) {
|
||||||
|
// this can occur when file_input lets through a NEWLINE (eg if file starts with a newline)
|
||||||
|
// do nothing
|
||||||
|
} else {
|
||||||
|
EMIT(load_const_tok, arg);
|
||||||
|
}
|
||||||
|
break;
|
||||||
default: assert(0);
|
default: assert(0);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
22
py/lexer.c
22
py/lexer.c
@ -331,9 +331,7 @@ static void py_lexer_next_token_into(py_lexer_t *lex, py_token_t *tok) {
|
|||||||
tok->kind = PY_TOKEN_INDENT;
|
tok->kind = PY_TOKEN_INDENT;
|
||||||
lex->emit_dent -= 1;
|
lex->emit_dent -= 1;
|
||||||
|
|
||||||
} else if (had_physical_newline && lex->nested_bracket_level == 0
|
} else if (had_physical_newline && lex->nested_bracket_level == 0) {
|
||||||
&& tok != &lex->tok_cur // so that we don't emit a newline if file starts with a comment
|
|
||||||
) {
|
|
||||||
tok->kind = PY_TOKEN_NEWLINE;
|
tok->kind = PY_TOKEN_NEWLINE;
|
||||||
|
|
||||||
uint num_spaces = lex->column - 1;
|
uint num_spaces = lex->column - 1;
|
||||||
@ -348,12 +346,11 @@ static void py_lexer_next_token_into(py_lexer_t *lex, py_token_t *tok) {
|
|||||||
lex->emit_dent -= 1;
|
lex->emit_dent -= 1;
|
||||||
}
|
}
|
||||||
if (num_spaces != indent_top(lex)) {
|
if (num_spaces != indent_top(lex)) {
|
||||||
//SyntaxError
|
tok->kind = PY_TOKEN_DEDENT_MISMATCH;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
} else if (is_end(lex)) {
|
} else if (is_end(lex)) {
|
||||||
// TODO emit a newline if file does not end in one
|
|
||||||
if (indent_top(lex) > 0) {
|
if (indent_top(lex) > 0) {
|
||||||
tok->kind = PY_TOKEN_NEWLINE;
|
tok->kind = PY_TOKEN_NEWLINE;
|
||||||
lex->emit_dent = 0;
|
lex->emit_dent = 0;
|
||||||
@ -613,7 +610,15 @@ py_lexer_t *py_lexer_from_str_len(const char *src_name, const char *str, uint le
|
|||||||
}
|
}
|
||||||
|
|
||||||
py_lexer_next_token_into(lex, &lex->tok_cur);
|
py_lexer_next_token_into(lex, &lex->tok_cur);
|
||||||
py_lexer_next_token_into(lex, &lex->tok_next);
|
|
||||||
|
// check that the first token is in the first column
|
||||||
|
// (done to get equivalence with CPython)
|
||||||
|
if (lex->tok_cur.src_line == 1 && lex->tok_cur.src_column != 1) {
|
||||||
|
lex->tok_next = lex->tok_cur;
|
||||||
|
lex->tok_cur.kind = PY_TOKEN_INDENT;
|
||||||
|
} else {
|
||||||
|
py_lexer_next_token_into(lex, &lex->tok_next);
|
||||||
|
}
|
||||||
|
|
||||||
return lex;
|
return lex;
|
||||||
}
|
}
|
||||||
@ -675,3 +680,8 @@ bool py_lexer_opt_str(py_lexer_t *lex, const char *str) {
|
|||||||
bool py_lexer_show_error(py_lexer_t *lex, const char *msg) {
|
bool py_lexer_show_error(py_lexer_t *lex, const char *msg) {
|
||||||
return py_token_show_error(&lex->tok_cur, msg);
|
return py_token_show_error(&lex->tok_cur, msg);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool py_lexer_show_error_pythonic(py_lexer_t *lex, const char *msg) {
|
||||||
|
printf(" File \"%s\", line %d column %d\n%s\n", lex->tok_cur.src_name, lex->tok_cur.src_line, lex->tok_cur.src_column, msg);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
28
py/lexer.h
28
py/lexer.h
@ -12,20 +12,21 @@ typedef enum _py_token_kind_t {
|
|||||||
PY_TOKEN_END, // 0
|
PY_TOKEN_END, // 0
|
||||||
|
|
||||||
PY_TOKEN_INVALID,
|
PY_TOKEN_INVALID,
|
||||||
|
PY_TOKEN_DEDENT_MISMATCH,
|
||||||
PY_TOKEN_LONELY_STRING_OPEN,
|
PY_TOKEN_LONELY_STRING_OPEN,
|
||||||
|
|
||||||
PY_TOKEN_NEWLINE, // 3
|
PY_TOKEN_NEWLINE, // 4
|
||||||
PY_TOKEN_INDENT, // 4
|
PY_TOKEN_INDENT, // 5
|
||||||
PY_TOKEN_DEDENT, // 5
|
PY_TOKEN_DEDENT, // 6
|
||||||
|
|
||||||
PY_TOKEN_NAME, // 6
|
PY_TOKEN_NAME, // 7
|
||||||
PY_TOKEN_NUMBER,
|
PY_TOKEN_NUMBER,
|
||||||
PY_TOKEN_STRING,
|
PY_TOKEN_STRING,
|
||||||
PY_TOKEN_BYTES,
|
PY_TOKEN_BYTES,
|
||||||
|
|
||||||
PY_TOKEN_ELLIPSES,
|
PY_TOKEN_ELLIPSES,
|
||||||
|
|
||||||
PY_TOKEN_KW_FALSE, // 11
|
PY_TOKEN_KW_FALSE, // 12
|
||||||
PY_TOKEN_KW_NONE,
|
PY_TOKEN_KW_NONE,
|
||||||
PY_TOKEN_KW_TRUE,
|
PY_TOKEN_KW_TRUE,
|
||||||
PY_TOKEN_KW_AND,
|
PY_TOKEN_KW_AND,
|
||||||
@ -34,7 +35,7 @@ typedef enum _py_token_kind_t {
|
|||||||
PY_TOKEN_KW_BREAK,
|
PY_TOKEN_KW_BREAK,
|
||||||
PY_TOKEN_KW_CLASS,
|
PY_TOKEN_KW_CLASS,
|
||||||
PY_TOKEN_KW_CONTINUE,
|
PY_TOKEN_KW_CONTINUE,
|
||||||
PY_TOKEN_KW_DEF, // 20
|
PY_TOKEN_KW_DEF, // 21
|
||||||
PY_TOKEN_KW_DEL,
|
PY_TOKEN_KW_DEL,
|
||||||
PY_TOKEN_KW_ELIF,
|
PY_TOKEN_KW_ELIF,
|
||||||
PY_TOKEN_KW_ELSE,
|
PY_TOKEN_KW_ELSE,
|
||||||
@ -44,7 +45,7 @@ typedef enum _py_token_kind_t {
|
|||||||
PY_TOKEN_KW_FROM,
|
PY_TOKEN_KW_FROM,
|
||||||
PY_TOKEN_KW_GLOBAL,
|
PY_TOKEN_KW_GLOBAL,
|
||||||
PY_TOKEN_KW_IF,
|
PY_TOKEN_KW_IF,
|
||||||
PY_TOKEN_KW_IMPORT, // 30
|
PY_TOKEN_KW_IMPORT, // 31
|
||||||
PY_TOKEN_KW_IN,
|
PY_TOKEN_KW_IN,
|
||||||
PY_TOKEN_KW_IS,
|
PY_TOKEN_KW_IS,
|
||||||
PY_TOKEN_KW_LAMBDA,
|
PY_TOKEN_KW_LAMBDA,
|
||||||
@ -54,12 +55,12 @@ typedef enum _py_token_kind_t {
|
|||||||
PY_TOKEN_KW_PASS,
|
PY_TOKEN_KW_PASS,
|
||||||
PY_TOKEN_KW_RAISE,
|
PY_TOKEN_KW_RAISE,
|
||||||
PY_TOKEN_KW_RETURN,
|
PY_TOKEN_KW_RETURN,
|
||||||
PY_TOKEN_KW_TRY, // 40
|
PY_TOKEN_KW_TRY, // 41
|
||||||
PY_TOKEN_KW_WHILE,
|
PY_TOKEN_KW_WHILE,
|
||||||
PY_TOKEN_KW_WITH,
|
PY_TOKEN_KW_WITH,
|
||||||
PY_TOKEN_KW_YIELD,
|
PY_TOKEN_KW_YIELD,
|
||||||
|
|
||||||
PY_TOKEN_OP_PLUS, // 44
|
PY_TOKEN_OP_PLUS, // 45
|
||||||
PY_TOKEN_OP_MINUS,
|
PY_TOKEN_OP_MINUS,
|
||||||
PY_TOKEN_OP_STAR,
|
PY_TOKEN_OP_STAR,
|
||||||
PY_TOKEN_OP_DBL_STAR,
|
PY_TOKEN_OP_DBL_STAR,
|
||||||
@ -69,7 +70,7 @@ typedef enum _py_token_kind_t {
|
|||||||
PY_TOKEN_OP_LESS,
|
PY_TOKEN_OP_LESS,
|
||||||
PY_TOKEN_OP_DBL_LESS,
|
PY_TOKEN_OP_DBL_LESS,
|
||||||
PY_TOKEN_OP_MORE,
|
PY_TOKEN_OP_MORE,
|
||||||
PY_TOKEN_OP_DBL_MORE, // 54
|
PY_TOKEN_OP_DBL_MORE, // 55
|
||||||
PY_TOKEN_OP_AMPERSAND,
|
PY_TOKEN_OP_AMPERSAND,
|
||||||
PY_TOKEN_OP_PIPE,
|
PY_TOKEN_OP_PIPE,
|
||||||
PY_TOKEN_OP_CARET,
|
PY_TOKEN_OP_CARET,
|
||||||
@ -79,7 +80,7 @@ typedef enum _py_token_kind_t {
|
|||||||
PY_TOKEN_OP_DBL_EQUAL,
|
PY_TOKEN_OP_DBL_EQUAL,
|
||||||
PY_TOKEN_OP_NOT_EQUAL,
|
PY_TOKEN_OP_NOT_EQUAL,
|
||||||
|
|
||||||
PY_TOKEN_DEL_PAREN_OPEN, // 63
|
PY_TOKEN_DEL_PAREN_OPEN, // 64
|
||||||
PY_TOKEN_DEL_PAREN_CLOSE,
|
PY_TOKEN_DEL_PAREN_CLOSE,
|
||||||
PY_TOKEN_DEL_BRACKET_OPEN,
|
PY_TOKEN_DEL_BRACKET_OPEN,
|
||||||
PY_TOKEN_DEL_BRACKET_CLOSE,
|
PY_TOKEN_DEL_BRACKET_CLOSE,
|
||||||
@ -89,7 +90,7 @@ typedef enum _py_token_kind_t {
|
|||||||
PY_TOKEN_DEL_COLON,
|
PY_TOKEN_DEL_COLON,
|
||||||
PY_TOKEN_DEL_PERIOD,
|
PY_TOKEN_DEL_PERIOD,
|
||||||
PY_TOKEN_DEL_SEMICOLON,
|
PY_TOKEN_DEL_SEMICOLON,
|
||||||
PY_TOKEN_DEL_AT, // 73
|
PY_TOKEN_DEL_AT, // 74
|
||||||
PY_TOKEN_DEL_EQUAL,
|
PY_TOKEN_DEL_EQUAL,
|
||||||
PY_TOKEN_DEL_PLUS_EQUAL,
|
PY_TOKEN_DEL_PLUS_EQUAL,
|
||||||
PY_TOKEN_DEL_MINUS_EQUAL,
|
PY_TOKEN_DEL_MINUS_EQUAL,
|
||||||
@ -99,7 +100,7 @@ typedef enum _py_token_kind_t {
|
|||||||
PY_TOKEN_DEL_PERCENT_EQUAL,
|
PY_TOKEN_DEL_PERCENT_EQUAL,
|
||||||
PY_TOKEN_DEL_AMPERSAND_EQUAL,
|
PY_TOKEN_DEL_AMPERSAND_EQUAL,
|
||||||
PY_TOKEN_DEL_PIPE_EQUAL,
|
PY_TOKEN_DEL_PIPE_EQUAL,
|
||||||
PY_TOKEN_DEL_CARET_EQUAL, // 83
|
PY_TOKEN_DEL_CARET_EQUAL, // 84
|
||||||
PY_TOKEN_DEL_DBL_MORE_EQUAL,
|
PY_TOKEN_DEL_DBL_MORE_EQUAL,
|
||||||
PY_TOKEN_DEL_DBL_LESS_EQUAL,
|
PY_TOKEN_DEL_DBL_LESS_EQUAL,
|
||||||
PY_TOKEN_DEL_DBL_STAR_EQUAL,
|
PY_TOKEN_DEL_DBL_STAR_EQUAL,
|
||||||
@ -137,5 +138,6 @@ bool py_lexer_opt_kind(py_lexer_t *lex, py_token_kind_t kind);
|
|||||||
bool py_lexer_opt_str(py_lexer_t *lex, const char *str);
|
bool py_lexer_opt_str(py_lexer_t *lex, const char *str);
|
||||||
*/
|
*/
|
||||||
bool py_lexer_show_error(py_lexer_t *lex, const char *msg);
|
bool py_lexer_show_error(py_lexer_t *lex, const char *msg);
|
||||||
|
bool py_lexer_show_error_pythonic(py_lexer_t *lex, const char *msg);
|
||||||
|
|
||||||
#endif /* INCLUDED_LEXER_H */
|
#endif /* INCLUDED_LEXER_H */
|
||||||
|
12
py/main.c
12
py/main.c
@ -31,11 +31,13 @@ int main(int argc, char **argv) {
|
|||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
py_parse_node_t pn = py_parse(lex, 0);
|
py_parse_node_t pn = py_parse(lex, 0);
|
||||||
//printf("----------------\n");
|
if (pn != PY_PARSE_NODE_NULL) {
|
||||||
//parse_node_show(pn, 0);
|
//printf("----------------\n");
|
||||||
//printf("----------------\n");
|
parse_node_show(pn, 0);
|
||||||
py_compile(pn);
|
//printf("----------------\n");
|
||||||
//printf("----------------\n");
|
py_compile(pn);
|
||||||
|
//printf("----------------\n");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
py_lexer_free(lex);
|
py_lexer_free(lex);
|
||||||
|
18
py/parse.c
18
py/parse.c
@ -545,10 +545,12 @@ py_parse_node_t py_parse(py_lexer_t *lex, int wanted_rule) {
|
|||||||
assert(0);
|
assert(0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// check we are at the end of the token stream
|
||||||
if (!py_lexer_is_kind(lex, PY_TOKEN_END)) {
|
if (!py_lexer_is_kind(lex, PY_TOKEN_END)) {
|
||||||
py_lexer_show_error(lex, "unexpected token at end:");
|
goto syntax_error;
|
||||||
py_token_show(py_lexer_cur(lex));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//printf("--------------\n");
|
//printf("--------------\n");
|
||||||
//result_stack_show(parser);
|
//result_stack_show(parser);
|
||||||
assert(parser->result_stack_top == 1);
|
assert(parser->result_stack_top == 1);
|
||||||
@ -557,10 +559,16 @@ py_parse_node_t py_parse(py_lexer_t *lex, int wanted_rule) {
|
|||||||
return parser->result_stack[0];
|
return parser->result_stack[0];
|
||||||
|
|
||||||
syntax_error:
|
syntax_error:
|
||||||
py_lexer_show_error(lex, "syntax error:");
|
if (py_lexer_is_kind(lex, PY_TOKEN_INDENT)) {
|
||||||
|
py_lexer_show_error_pythonic(lex, "IndentationError: unexpected indent");
|
||||||
|
} else if (py_lexer_is_kind(lex, PY_TOKEN_DEDENT_MISMATCH)) {
|
||||||
|
py_lexer_show_error_pythonic(lex, "IndentationError: unindent does not match any outer indentation level");
|
||||||
|
} else {
|
||||||
|
py_lexer_show_error_pythonic(lex, "syntax error:");
|
||||||
#ifdef USE_RULE_NAME
|
#ifdef USE_RULE_NAME
|
||||||
py_lexer_show_error(lex, rule->rule_name);
|
py_lexer_show_error(lex, rule->rule_name);
|
||||||
#endif
|
#endif
|
||||||
py_token_show(py_lexer_cur(lex));
|
py_token_show(py_lexer_cur(lex));
|
||||||
|
}
|
||||||
return PY_PARSE_NODE_NULL;
|
return PY_PARSE_NODE_NULL;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user