/* lexer.h -- simple tokeniser for Python implementation
 */

#ifndef INCLUDED_LEXER_H
#define INCLUDED_LEXER_H

/* uses (byte) length instead of null termination
 * tokens are the same - UTF-8 with (byte) length
 */

typedef enum _py_token_kind_t {
    PY_TOKEN_END,                   // 0

    PY_TOKEN_INVALID,
    PY_TOKEN_DEDENT_MISMATCH,
    PY_TOKEN_LONELY_STRING_OPEN,

    PY_TOKEN_NEWLINE,               // 4
    PY_TOKEN_INDENT,                // 5
    PY_TOKEN_DEDENT,                // 6

    PY_TOKEN_NAME,                  // 7
    PY_TOKEN_NUMBER,
    PY_TOKEN_STRING,
    PY_TOKEN_BYTES,

    PY_TOKEN_ELLIPSES,

    PY_TOKEN_KW_FALSE,              // 12
    PY_TOKEN_KW_NONE,
    PY_TOKEN_KW_TRUE,
    PY_TOKEN_KW_AND,
    PY_TOKEN_KW_AS,
    PY_TOKEN_KW_ASSERT,
    PY_TOKEN_KW_BREAK,
    PY_TOKEN_KW_CLASS,
    PY_TOKEN_KW_CONTINUE,
    PY_TOKEN_KW_DEF,                // 21
    PY_TOKEN_KW_DEL,
    PY_TOKEN_KW_ELIF,
    PY_TOKEN_KW_ELSE,
    PY_TOKEN_KW_EXCEPT,
    PY_TOKEN_KW_FINALLY,
    PY_TOKEN_KW_FOR,
    PY_TOKEN_KW_FROM,
    PY_TOKEN_KW_GLOBAL,
    PY_TOKEN_KW_IF,
    PY_TOKEN_KW_IMPORT,             // 31
    PY_TOKEN_KW_IN,
    PY_TOKEN_KW_IS,
    PY_TOKEN_KW_LAMBDA,
    PY_TOKEN_KW_NONLOCAL,
    PY_TOKEN_KW_NOT,
    PY_TOKEN_KW_OR,
    PY_TOKEN_KW_PASS,
    PY_TOKEN_KW_RAISE,
    PY_TOKEN_KW_RETURN,
    PY_TOKEN_KW_TRY,                // 41
    PY_TOKEN_KW_WHILE,
    PY_TOKEN_KW_WITH,
    PY_TOKEN_KW_YIELD,

    PY_TOKEN_OP_PLUS,               // 45
    PY_TOKEN_OP_MINUS,
    PY_TOKEN_OP_STAR,
    PY_TOKEN_OP_DBL_STAR,
    PY_TOKEN_OP_SLASH,
    PY_TOKEN_OP_DBL_SLASH,
    PY_TOKEN_OP_PERCENT,
    PY_TOKEN_OP_LESS,
    PY_TOKEN_OP_DBL_LESS,
    PY_TOKEN_OP_MORE,
    PY_TOKEN_OP_DBL_MORE,           // 55
    PY_TOKEN_OP_AMPERSAND,
    PY_TOKEN_OP_PIPE,
    PY_TOKEN_OP_CARET,
    PY_TOKEN_OP_TILDE,
    PY_TOKEN_OP_LESS_EQUAL,
    PY_TOKEN_OP_MORE_EQUAL,
    PY_TOKEN_OP_DBL_EQUAL,
    PY_TOKEN_OP_NOT_EQUAL,

    PY_TOKEN_DEL_PAREN_OPEN,        // 64
    PY_TOKEN_DEL_PAREN_CLOSE,
    PY_TOKEN_DEL_BRACKET_OPEN,
    PY_TOKEN_DEL_BRACKET_CLOSE,
    PY_TOKEN_DEL_BRACE_OPEN,
    PY_TOKEN_DEL_BRACE_CLOSE,
    PY_TOKEN_DEL_COMMA,
    PY_TOKEN_DEL_COLON,
    PY_TOKEN_DEL_PERIOD,
    PY_TOKEN_DEL_SEMICOLON,
    PY_TOKEN_DEL_AT,                // 74
    PY_TOKEN_DEL_EQUAL,
    PY_TOKEN_DEL_PLUS_EQUAL,
    PY_TOKEN_DEL_MINUS_EQUAL,
    PY_TOKEN_DEL_STAR_EQUAL,
    PY_TOKEN_DEL_SLASH_EQUAL,
    PY_TOKEN_DEL_DBL_SLASH_EQUAL,
    PY_TOKEN_DEL_PERCENT_EQUAL,
    PY_TOKEN_DEL_AMPERSAND_EQUAL,
    PY_TOKEN_DEL_PIPE_EQUAL,
    PY_TOKEN_DEL_CARET_EQUAL,       // 84
    PY_TOKEN_DEL_DBL_MORE_EQUAL,
    PY_TOKEN_DEL_DBL_LESS_EQUAL,
    PY_TOKEN_DEL_DBL_STAR_EQUAL,
    PY_TOKEN_DEL_MINUS_MORE,
} py_token_kind_t;

typedef struct _py_token_t {
    const char *src_name;       // name of source
    uint src_line;              // source line
    uint src_column;            // source column

    py_token_kind_t kind;       // kind of token
    const char *str;            // string of token (valid only while this token is current token)
    uint len;                   // (byte) length of string of token
} py_token_t;

// the next-char function must return the next character in the stream
// it must return PY_LEXER_CHAR_EOF if end of stream
// it can be called again after returning PY_LEXER_CHAR_EOF, and in that case must return PY_LEXER_CHAR_EOF
#define PY_LEXER_CHAR_EOF (-1)
typedef unichar (*py_lexer_stream_next_char_t)(void*);
typedef void (*py_lexer_stream_close_t)(void*);

typedef struct _py_lexer_t py_lexer_t;

void py_token_show(const py_token_t *tok);
void py_token_show_error_prefix(const py_token_t *tok);
bool py_token_show_error(const py_token_t *tok, const char *msg);

py_lexer_t *py_lexer_new(const char *src_name, void *stream_data, py_lexer_stream_next_char_t stream_next_char, py_lexer_stream_close_t stream_close);
void py_lexer_free(py_lexer_t *lex);
void py_lexer_to_next(py_lexer_t *lex);
const py_token_t *py_lexer_cur(const py_lexer_t *lex);
bool py_lexer_is_kind(py_lexer_t *lex, py_token_kind_t kind);
/* unused
bool py_lexer_is_str(py_lexer_t *lex, const char *str);
bool py_lexer_opt_kind(py_lexer_t *lex, py_token_kind_t kind);
bool py_lexer_opt_str(py_lexer_t *lex, const char *str);
*/
bool py_lexer_show_error(py_lexer_t *lex, const char *msg);
bool py_lexer_show_error_pythonic(py_lexer_t *lex, const char *msg);

#endif /* INCLUDED_LEXER_H */