2013-10-04 14:53:11 -04:00
|
|
|
/* lexer.h -- simple tokeniser for Python implementation
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef INCLUDED_LEXER_H
|
|
|
|
#define INCLUDED_LEXER_H
|
|
|
|
|
|
|
|
/* uses (byte) length instead of null termination
|
|
|
|
* tokens are the same - UTF-8 with (byte) length
|
|
|
|
*/
|
|
|
|
|
|
|
|
typedef enum _py_token_kind_t {
|
|
|
|
PY_TOKEN_END, // 0
|
|
|
|
|
|
|
|
PY_TOKEN_INVALID,
|
2013-10-09 10:09:52 -04:00
|
|
|
PY_TOKEN_DEDENT_MISMATCH,
|
2013-10-04 14:53:11 -04:00
|
|
|
PY_TOKEN_LONELY_STRING_OPEN,
|
|
|
|
|
2013-10-09 10:09:52 -04:00
|
|
|
PY_TOKEN_NEWLINE, // 4
|
|
|
|
PY_TOKEN_INDENT, // 5
|
|
|
|
PY_TOKEN_DEDENT, // 6
|
2013-10-04 14:53:11 -04:00
|
|
|
|
2013-10-09 10:09:52 -04:00
|
|
|
PY_TOKEN_NAME, // 7
|
2013-10-04 14:53:11 -04:00
|
|
|
PY_TOKEN_NUMBER,
|
|
|
|
PY_TOKEN_STRING,
|
|
|
|
PY_TOKEN_BYTES,
|
|
|
|
|
|
|
|
PY_TOKEN_ELLIPSES,
|
|
|
|
|
2013-10-09 10:09:52 -04:00
|
|
|
PY_TOKEN_KW_FALSE, // 12
|
2013-10-04 14:53:11 -04:00
|
|
|
PY_TOKEN_KW_NONE,
|
|
|
|
PY_TOKEN_KW_TRUE,
|
|
|
|
PY_TOKEN_KW_AND,
|
|
|
|
PY_TOKEN_KW_AS,
|
|
|
|
PY_TOKEN_KW_ASSERT,
|
|
|
|
PY_TOKEN_KW_BREAK,
|
|
|
|
PY_TOKEN_KW_CLASS,
|
|
|
|
PY_TOKEN_KW_CONTINUE,
|
2013-10-09 10:09:52 -04:00
|
|
|
PY_TOKEN_KW_DEF, // 21
|
2013-10-04 14:53:11 -04:00
|
|
|
PY_TOKEN_KW_DEL,
|
|
|
|
PY_TOKEN_KW_ELIF,
|
|
|
|
PY_TOKEN_KW_ELSE,
|
|
|
|
PY_TOKEN_KW_EXCEPT,
|
|
|
|
PY_TOKEN_KW_FINALLY,
|
|
|
|
PY_TOKEN_KW_FOR,
|
|
|
|
PY_TOKEN_KW_FROM,
|
|
|
|
PY_TOKEN_KW_GLOBAL,
|
|
|
|
PY_TOKEN_KW_IF,
|
2013-10-09 10:09:52 -04:00
|
|
|
PY_TOKEN_KW_IMPORT, // 31
|
2013-10-04 14:53:11 -04:00
|
|
|
PY_TOKEN_KW_IN,
|
|
|
|
PY_TOKEN_KW_IS,
|
|
|
|
PY_TOKEN_KW_LAMBDA,
|
|
|
|
PY_TOKEN_KW_NONLOCAL,
|
|
|
|
PY_TOKEN_KW_NOT,
|
|
|
|
PY_TOKEN_KW_OR,
|
|
|
|
PY_TOKEN_KW_PASS,
|
|
|
|
PY_TOKEN_KW_RAISE,
|
|
|
|
PY_TOKEN_KW_RETURN,
|
2013-10-09 10:09:52 -04:00
|
|
|
PY_TOKEN_KW_TRY, // 41
|
2013-10-04 14:53:11 -04:00
|
|
|
PY_TOKEN_KW_WHILE,
|
|
|
|
PY_TOKEN_KW_WITH,
|
|
|
|
PY_TOKEN_KW_YIELD,
|
|
|
|
|
2013-10-09 10:09:52 -04:00
|
|
|
PY_TOKEN_OP_PLUS, // 45
|
2013-10-04 14:53:11 -04:00
|
|
|
PY_TOKEN_OP_MINUS,
|
|
|
|
PY_TOKEN_OP_STAR,
|
|
|
|
PY_TOKEN_OP_DBL_STAR,
|
|
|
|
PY_TOKEN_OP_SLASH,
|
|
|
|
PY_TOKEN_OP_DBL_SLASH,
|
|
|
|
PY_TOKEN_OP_PERCENT,
|
|
|
|
PY_TOKEN_OP_LESS,
|
|
|
|
PY_TOKEN_OP_DBL_LESS,
|
|
|
|
PY_TOKEN_OP_MORE,
|
2013-10-09 10:09:52 -04:00
|
|
|
PY_TOKEN_OP_DBL_MORE, // 55
|
2013-10-04 14:53:11 -04:00
|
|
|
PY_TOKEN_OP_AMPERSAND,
|
|
|
|
PY_TOKEN_OP_PIPE,
|
|
|
|
PY_TOKEN_OP_CARET,
|
|
|
|
PY_TOKEN_OP_TILDE,
|
|
|
|
PY_TOKEN_OP_LESS_EQUAL,
|
|
|
|
PY_TOKEN_OP_MORE_EQUAL,
|
|
|
|
PY_TOKEN_OP_DBL_EQUAL,
|
|
|
|
PY_TOKEN_OP_NOT_EQUAL,
|
|
|
|
|
2013-10-09 10:09:52 -04:00
|
|
|
PY_TOKEN_DEL_PAREN_OPEN, // 64
|
2013-10-04 14:53:11 -04:00
|
|
|
PY_TOKEN_DEL_PAREN_CLOSE,
|
|
|
|
PY_TOKEN_DEL_BRACKET_OPEN,
|
|
|
|
PY_TOKEN_DEL_BRACKET_CLOSE,
|
|
|
|
PY_TOKEN_DEL_BRACE_OPEN,
|
|
|
|
PY_TOKEN_DEL_BRACE_CLOSE,
|
|
|
|
PY_TOKEN_DEL_COMMA,
|
|
|
|
PY_TOKEN_DEL_COLON,
|
|
|
|
PY_TOKEN_DEL_PERIOD,
|
|
|
|
PY_TOKEN_DEL_SEMICOLON,
|
2013-10-09 10:09:52 -04:00
|
|
|
PY_TOKEN_DEL_AT, // 74
|
2013-10-04 14:53:11 -04:00
|
|
|
PY_TOKEN_DEL_EQUAL,
|
|
|
|
PY_TOKEN_DEL_PLUS_EQUAL,
|
|
|
|
PY_TOKEN_DEL_MINUS_EQUAL,
|
|
|
|
PY_TOKEN_DEL_STAR_EQUAL,
|
|
|
|
PY_TOKEN_DEL_SLASH_EQUAL,
|
|
|
|
PY_TOKEN_DEL_DBL_SLASH_EQUAL,
|
|
|
|
PY_TOKEN_DEL_PERCENT_EQUAL,
|
|
|
|
PY_TOKEN_DEL_AMPERSAND_EQUAL,
|
|
|
|
PY_TOKEN_DEL_PIPE_EQUAL,
|
2013-10-09 10:09:52 -04:00
|
|
|
PY_TOKEN_DEL_CARET_EQUAL, // 84
|
2013-10-04 14:53:11 -04:00
|
|
|
PY_TOKEN_DEL_DBL_MORE_EQUAL,
|
|
|
|
PY_TOKEN_DEL_DBL_LESS_EQUAL,
|
|
|
|
PY_TOKEN_DEL_DBL_STAR_EQUAL,
|
|
|
|
PY_TOKEN_DEL_MINUS_MORE,
|
|
|
|
} py_token_kind_t;
|
|
|
|
|
|
|
|
typedef struct _py_token_t {
|
2013-10-20 09:41:27 -04:00
|
|
|
const char *src_name; // name of source
|
|
|
|
uint src_line; // source line
|
|
|
|
uint src_column; // source column
|
2013-10-04 14:53:11 -04:00
|
|
|
|
|
|
|
py_token_kind_t kind; // kind of token
|
2013-10-20 09:41:27 -04:00
|
|
|
const char *str; // string of token (valid only while this token is current token)
|
2013-10-04 14:53:11 -04:00
|
|
|
uint len; // (byte) length of string of token
|
|
|
|
} py_token_t;
|
|
|
|
|
2013-10-20 09:41:27 -04:00
|
|
|
// the next-char function must return the next character in the stream
|
|
|
|
// it must return PY_LEXER_CHAR_EOF if end of stream
|
|
|
|
// it can be called again after returning PY_LEXER_CHAR_EOF, and in that case must return PY_LEXER_CHAR_EOF
|
|
|
|
#define PY_LEXER_CHAR_EOF (-1)
|
|
|
|
typedef unichar (*py_lexer_stream_next_char_t)(void*);
|
2013-10-20 12:42:00 -04:00
|
|
|
typedef void (*py_lexer_stream_close_t)(void*);
|
2013-10-20 09:41:27 -04:00
|
|
|
|
2013-10-04 14:53:11 -04:00
|
|
|
typedef struct _py_lexer_t py_lexer_t;
|
|
|
|
|
|
|
|
void py_token_show(const py_token_t *tok);
|
|
|
|
void py_token_show_error_prefix(const py_token_t *tok);
|
|
|
|
bool py_token_show_error(const py_token_t *tok, const char *msg);
|
|
|
|
|
2013-10-20 12:42:00 -04:00
|
|
|
py_lexer_t *py_lexer_new(const char *src_name, void *stream_data, py_lexer_stream_next_char_t stream_next_char, py_lexer_stream_close_t stream_close);
|
2013-10-04 14:53:11 -04:00
|
|
|
void py_lexer_free(py_lexer_t *lex);
|
|
|
|
void py_lexer_to_next(py_lexer_t *lex);
|
|
|
|
const py_token_t *py_lexer_cur(const py_lexer_t *lex);
|
|
|
|
bool py_lexer_is_kind(py_lexer_t *lex, py_token_kind_t kind);
|
|
|
|
/* unused
|
|
|
|
bool py_lexer_is_str(py_lexer_t *lex, const char *str);
|
|
|
|
bool py_lexer_opt_kind(py_lexer_t *lex, py_token_kind_t kind);
|
|
|
|
bool py_lexer_opt_str(py_lexer_t *lex, const char *str);
|
|
|
|
*/
|
|
|
|
bool py_lexer_show_error(py_lexer_t *lex, const char *msg);
|
2013-10-09 10:09:52 -04:00
|
|
|
bool py_lexer_show_error_pythonic(py_lexer_t *lex, const char *msg);
|
2013-10-04 14:53:11 -04:00
|
|
|
|
|
|
|
#endif /* INCLUDED_LEXER_H */
|