lexer: Convert type (u)int to mp_(u)int_t.
This commit is contained in:
parent
40f3c02682
commit
54eb4e723e
55
py/lexer.c
55
py/lexer.c
@ -50,25 +50,25 @@ struct _mp_lexer_t {
|
|||||||
|
|
||||||
unichar chr0, chr1, chr2; // current cached characters from source
|
unichar chr0, chr1, chr2; // current cached characters from source
|
||||||
|
|
||||||
uint line; // source line
|
mp_uint_t line; // source line
|
||||||
uint column; // source column
|
mp_uint_t column; // source column
|
||||||
|
|
||||||
int emit_dent; // non-zero when there are INDENT/DEDENT tokens to emit
|
mp_int_t emit_dent; // non-zero when there are INDENT/DEDENT tokens to emit
|
||||||
int nested_bracket_level; // >0 when there are nested brackets over multiple lines
|
mp_int_t nested_bracket_level; // >0 when there are nested brackets over multiple lines
|
||||||
|
|
||||||
uint alloc_indent_level;
|
mp_uint_t alloc_indent_level;
|
||||||
uint num_indent_level;
|
mp_uint_t num_indent_level;
|
||||||
uint16_t *indent_level;
|
uint16_t *indent_level;
|
||||||
|
|
||||||
vstr_t vstr;
|
vstr_t vstr;
|
||||||
mp_token_t tok_cur;
|
mp_token_t tok_cur;
|
||||||
};
|
};
|
||||||
|
|
||||||
uint mp_optimise_value;
|
mp_uint_t mp_optimise_value;
|
||||||
|
|
||||||
// TODO replace with a call to a standard function
|
// TODO replace with a call to a standard function
|
||||||
bool str_strn_equal(const char *str, const char *strn, int len) {
|
bool str_strn_equal(const char *str, const char *strn, mp_uint_t len) {
|
||||||
uint i = 0;
|
mp_uint_t i = 0;
|
||||||
|
|
||||||
while (i < len && *str == *strn) {
|
while (i < len && *str == *strn) {
|
||||||
++i;
|
++i;
|
||||||
@ -81,7 +81,7 @@ bool str_strn_equal(const char *str, const char *strn, int len) {
|
|||||||
|
|
||||||
#ifdef MICROPY_DEBUG_PRINTERS
|
#ifdef MICROPY_DEBUG_PRINTERS
|
||||||
void mp_token_show(const mp_token_t *tok) {
|
void mp_token_show(const mp_token_t *tok) {
|
||||||
printf("(%d:%d) kind:%d str:%p len:%d", tok->src_line, tok->src_column, tok->kind, tok->str, tok->len);
|
printf("(" UINT_FMT ":" UINT_FMT ") kind:%u str:%p len:" UINT_FMT, tok->src_line, tok->src_column, tok->kind, tok->str, tok->len);
|
||||||
if (tok->str != NULL && tok->len > 0) {
|
if (tok->str != NULL && tok->len > 0) {
|
||||||
const byte *i = (const byte *)tok->str;
|
const byte *i = (const byte *)tok->str;
|
||||||
const byte *j = (const byte *)i + tok->len;
|
const byte *j = (const byte *)i + tok->len;
|
||||||
@ -175,7 +175,7 @@ STATIC void next_char(mp_lexer_t *lex) {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
int advance = 1;
|
mp_uint_t advance = 1;
|
||||||
|
|
||||||
if (lex->chr0 == '\n') {
|
if (lex->chr0 == '\n') {
|
||||||
// LF is a new line
|
// LF is a new line
|
||||||
@ -210,7 +210,7 @@ STATIC void next_char(mp_lexer_t *lex) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void indent_push(mp_lexer_t *lex, uint indent) {
|
void indent_push(mp_lexer_t *lex, mp_uint_t indent) {
|
||||||
if (lex->num_indent_level >= lex->alloc_indent_level) {
|
if (lex->num_indent_level >= lex->alloc_indent_level) {
|
||||||
// TODO use m_renew_maybe and somehow indicate an error if it fails... probably by using MP_TOKEN_MEMORY_ERROR
|
// TODO use m_renew_maybe and somehow indicate an error if it fails... probably by using MP_TOKEN_MEMORY_ERROR
|
||||||
lex->indent_level = m_renew(uint16_t, lex->indent_level, lex->alloc_indent_level, lex->alloc_indent_level + MICROPY_ALLOC_LEXEL_INDENT_INC);
|
lex->indent_level = m_renew(uint16_t, lex->indent_level, lex->alloc_indent_level, lex->alloc_indent_level + MICROPY_ALLOC_LEXEL_INDENT_INC);
|
||||||
@ -219,7 +219,7 @@ void indent_push(mp_lexer_t *lex, uint indent) {
|
|||||||
lex->indent_level[lex->num_indent_level++] = indent;
|
lex->indent_level[lex->num_indent_level++] = indent;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint indent_top(mp_lexer_t *lex) {
|
mp_uint_t indent_top(mp_lexer_t *lex) {
|
||||||
return lex->indent_level[lex->num_indent_level - 1];
|
return lex->indent_level[lex->num_indent_level - 1];
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -308,9 +308,9 @@ STATIC const char *tok_kw[] = {
|
|||||||
"__debug__",
|
"__debug__",
|
||||||
};
|
};
|
||||||
|
|
||||||
STATIC int hex_digit(unichar c) {
|
STATIC mp_uint_t hex_digit(unichar c) {
|
||||||
// c is assumed to be hex digit
|
// c is assumed to be hex digit
|
||||||
int n = c - '0';
|
mp_uint_t n = c - '0';
|
||||||
if (n > 9) {
|
if (n > 9) {
|
||||||
n &= ~('a' - 'A');
|
n &= ~('a' - 'A');
|
||||||
n -= ('A' - ('9' + 1));
|
n -= ('A' - ('9' + 1));
|
||||||
@ -320,8 +320,9 @@ STATIC int hex_digit(unichar c) {
|
|||||||
|
|
||||||
// This is called with CUR_CHAR() before first hex digit, and should return with
|
// This is called with CUR_CHAR() before first hex digit, and should return with
|
||||||
// it pointing to last hex digit
|
// it pointing to last hex digit
|
||||||
STATIC bool get_hex(mp_lexer_t *lex, int num_digits, uint *result) {
|
// num_digits must be greater than zero
|
||||||
uint num = 0;
|
STATIC bool get_hex(mp_lexer_t *lex, mp_uint_t num_digits, mp_uint_t *result) {
|
||||||
|
mp_uint_t num = 0;
|
||||||
while (num_digits-- != 0) {
|
while (num_digits-- != 0) {
|
||||||
next_char(lex);
|
next_char(lex);
|
||||||
unichar c = CUR_CHAR(lex);
|
unichar c = CUR_CHAR(lex);
|
||||||
@ -394,7 +395,7 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs
|
|||||||
} else if (had_physical_newline && lex->nested_bracket_level == 0) {
|
} else if (had_physical_newline && lex->nested_bracket_level == 0) {
|
||||||
tok->kind = MP_TOKEN_NEWLINE;
|
tok->kind = MP_TOKEN_NEWLINE;
|
||||||
|
|
||||||
uint num_spaces = lex->column - 1;
|
mp_uint_t num_spaces = lex->column - 1;
|
||||||
lex->emit_dent = 0;
|
lex->emit_dent = 0;
|
||||||
if (num_spaces == indent_top(lex)) {
|
if (num_spaces == indent_top(lex)) {
|
||||||
} else if (num_spaces > indent_top(lex)) {
|
} else if (num_spaces > indent_top(lex)) {
|
||||||
@ -463,7 +464,7 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs
|
|||||||
next_char(lex);
|
next_char(lex);
|
||||||
|
|
||||||
// work out if it's a single or triple quoted literal
|
// work out if it's a single or triple quoted literal
|
||||||
int num_quotes;
|
mp_uint_t num_quotes;
|
||||||
if (is_char_and(lex, quote_char, quote_char)) {
|
if (is_char_and(lex, quote_char, quote_char)) {
|
||||||
// triple quotes
|
// triple quotes
|
||||||
next_char(lex);
|
next_char(lex);
|
||||||
@ -475,7 +476,7 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs
|
|||||||
}
|
}
|
||||||
|
|
||||||
// parse the literal
|
// parse the literal
|
||||||
int n_closing = 0;
|
mp_uint_t n_closing = 0;
|
||||||
while (!is_end(lex) && (num_quotes > 1 || !is_char(lex, '\n')) && n_closing < num_quotes) {
|
while (!is_end(lex) && (num_quotes > 1 || !is_char(lex, '\n')) && n_closing < num_quotes) {
|
||||||
if (is_char(lex, quote_char)) {
|
if (is_char(lex, quote_char)) {
|
||||||
n_closing += 1;
|
n_closing += 1;
|
||||||
@ -512,7 +513,7 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs
|
|||||||
// Otherwise fall through.
|
// Otherwise fall through.
|
||||||
case 'x':
|
case 'x':
|
||||||
{
|
{
|
||||||
uint num = 0;
|
mp_uint_t num = 0;
|
||||||
if (!get_hex(lex, (c == 'x' ? 2 : c == 'u' ? 4 : 8), &num)) {
|
if (!get_hex(lex, (c == 'x' ? 2 : c == 'u' ? 4 : 8), &num)) {
|
||||||
// TODO error message
|
// TODO error message
|
||||||
assert(0);
|
assert(0);
|
||||||
@ -531,8 +532,8 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs
|
|||||||
default:
|
default:
|
||||||
if (c >= '0' && c <= '7') {
|
if (c >= '0' && c <= '7') {
|
||||||
// Octal sequence, 1-3 chars
|
// Octal sequence, 1-3 chars
|
||||||
int digits = 3;
|
mp_uint_t digits = 3;
|
||||||
int num = c - '0';
|
mp_uint_t num = c - '0';
|
||||||
while (is_following_odigit(lex) && --digits != 0) {
|
while (is_following_odigit(lex) && --digits != 0) {
|
||||||
next_char(lex);
|
next_char(lex);
|
||||||
num = num * 8 + (CUR_CHAR(lex) - '0');
|
num = num * 8 + (CUR_CHAR(lex) - '0');
|
||||||
@ -627,7 +628,7 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs
|
|||||||
// search for encoded delimiter or operator
|
// search for encoded delimiter or operator
|
||||||
|
|
||||||
const char *t = tok_enc;
|
const char *t = tok_enc;
|
||||||
uint tok_enc_index = 0;
|
mp_uint_t tok_enc_index = 0;
|
||||||
for (; *t != 0 && !is_char(lex, *t); t += 1) {
|
for (; *t != 0 && !is_char(lex, *t); t += 1) {
|
||||||
if (*t == 'e' || *t == 'c') {
|
if (*t == 'e' || *t == 'c') {
|
||||||
t += 1;
|
t += 1;
|
||||||
@ -649,7 +650,7 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs
|
|||||||
|
|
||||||
// get the maximum characters for a valid token
|
// get the maximum characters for a valid token
|
||||||
t += 1;
|
t += 1;
|
||||||
uint t_index = tok_enc_index;
|
mp_uint_t t_index = tok_enc_index;
|
||||||
for (;;) {
|
for (;;) {
|
||||||
for (; *t == 'e'; t += 1) {
|
for (; *t == 'e'; t += 1) {
|
||||||
t += 1;
|
t += 1;
|
||||||
@ -712,8 +713,8 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs
|
|||||||
// the parser gives a syntax error on, eg, x.__debug__. Otherwise, we
|
// the parser gives a syntax error on, eg, x.__debug__. Otherwise, we
|
||||||
// need to check for this special token in many places in the compiler.
|
// need to check for this special token in many places in the compiler.
|
||||||
// TODO improve speed of these string comparisons
|
// TODO improve speed of these string comparisons
|
||||||
//for (int i = 0; tok_kw[i] != NULL; i++) {
|
//for (mp_int_t i = 0; tok_kw[i] != NULL; i++) {
|
||||||
for (int i = 0; i < MP_ARRAY_SIZE(tok_kw); i++) {
|
for (mp_int_t i = 0; i < MP_ARRAY_SIZE(tok_kw); i++) {
|
||||||
if (str_strn_equal(tok_kw[i], tok->str, tok->len)) {
|
if (str_strn_equal(tok_kw[i], tok->str, tok->len)) {
|
||||||
if (i == MP_ARRAY_SIZE(tok_kw) - 1) {
|
if (i == MP_ARRAY_SIZE(tok_kw) - 1) {
|
||||||
// tok_kw[MP_ARRAY_SIZE(tok_kw) - 1] == "__debug__"
|
// tok_kw[MP_ARRAY_SIZE(tok_kw) - 1] == "__debug__"
|
||||||
|
10
py/lexer.h
10
py/lexer.h
@ -131,12 +131,12 @@ typedef enum _mp_token_kind_t {
|
|||||||
} mp_token_kind_t;
|
} mp_token_kind_t;
|
||||||
|
|
||||||
typedef struct _mp_token_t {
|
typedef struct _mp_token_t {
|
||||||
uint src_line; // source line
|
mp_uint_t src_line; // source line
|
||||||
uint src_column; // source column
|
mp_uint_t src_column; // source column
|
||||||
|
|
||||||
mp_token_kind_t kind; // kind of token
|
mp_token_kind_t kind; // kind of token
|
||||||
const char *str; // string of token (valid only while this token is current token)
|
const char *str; // string of token (valid only while this token is current token)
|
||||||
uint len; // (byte) length of string of token
|
mp_uint_t len; // (byte) length of string of token
|
||||||
} mp_token_t;
|
} mp_token_t;
|
||||||
|
|
||||||
// the next-char function must return the next character in the stream
|
// the next-char function must return the next character in the stream
|
||||||
@ -151,7 +151,7 @@ typedef struct _mp_lexer_t mp_lexer_t;
|
|||||||
void mp_token_show(const mp_token_t *tok);
|
void mp_token_show(const mp_token_t *tok);
|
||||||
|
|
||||||
mp_lexer_t *mp_lexer_new(qstr src_name, void *stream_data, mp_lexer_stream_next_char_t stream_next_char, mp_lexer_stream_close_t stream_close);
|
mp_lexer_t *mp_lexer_new(qstr src_name, void *stream_data, mp_lexer_stream_next_char_t stream_next_char, mp_lexer_stream_close_t stream_close);
|
||||||
mp_lexer_t *mp_lexer_new_from_str_len(qstr src_name, const char *str, uint len, uint free_len);
|
mp_lexer_t *mp_lexer_new_from_str_len(qstr src_name, const char *str, mp_uint_t len, mp_uint_t free_len);
|
||||||
|
|
||||||
void mp_lexer_free(mp_lexer_t *lex);
|
void mp_lexer_free(mp_lexer_t *lex);
|
||||||
qstr mp_lexer_source_name(mp_lexer_t *lex);
|
qstr mp_lexer_source_name(mp_lexer_t *lex);
|
||||||
@ -177,4 +177,4 @@ typedef enum {
|
|||||||
mp_import_stat_t mp_import_stat(const char *path);
|
mp_import_stat_t mp_import_stat(const char *path);
|
||||||
mp_lexer_t *mp_lexer_new_from_file(const char *filename);
|
mp_lexer_t *mp_lexer_new_from_file(const char *filename);
|
||||||
|
|
||||||
extern uint mp_optimise_value;
|
extern mp_uint_t mp_optimise_value;
|
||||||
|
@ -30,7 +30,7 @@
|
|||||||
#include "lexer.h"
|
#include "lexer.h"
|
||||||
|
|
||||||
typedef struct _mp_lexer_str_buf_t {
|
typedef struct _mp_lexer_str_buf_t {
|
||||||
uint free_len; // if > 0, src_beg will be freed when done by: m_free(src_beg, free_len)
|
mp_uint_t free_len; // if > 0, src_beg will be freed when done by: m_free(src_beg, free_len)
|
||||||
const char *src_beg; // beginning of source
|
const char *src_beg; // beginning of source
|
||||||
const char *src_cur; // current location in source
|
const char *src_cur; // current location in source
|
||||||
const char *src_end; // end (exclusive) of source
|
const char *src_end; // end (exclusive) of source
|
||||||
@ -51,7 +51,7 @@ STATIC void str_buf_free(mp_lexer_str_buf_t *sb) {
|
|||||||
m_del_obj(mp_lexer_str_buf_t, sb);
|
m_del_obj(mp_lexer_str_buf_t, sb);
|
||||||
}
|
}
|
||||||
|
|
||||||
mp_lexer_t *mp_lexer_new_from_str_len(qstr src_name, const char *str, uint len, uint free_len) {
|
mp_lexer_t *mp_lexer_new_from_str_len(qstr src_name, const char *str, mp_uint_t len, mp_uint_t free_len) {
|
||||||
mp_lexer_str_buf_t *sb = m_new_obj(mp_lexer_str_buf_t);
|
mp_lexer_str_buf_t *sb = m_new_obj(mp_lexer_str_buf_t);
|
||||||
sb->free_len = free_len;
|
sb->free_len = free_len;
|
||||||
sb->src_beg = str;
|
sb->src_beg = str;
|
||||||
|
@ -43,7 +43,7 @@
|
|||||||
#define STR_INVALID_SYNTAX "invalid syntax"
|
#define STR_INVALID_SYNTAX "invalid syntax"
|
||||||
|
|
||||||
void mp_parse_show_exception(mp_lexer_t *lex, mp_parse_error_kind_t parse_error_kind) {
|
void mp_parse_show_exception(mp_lexer_t *lex, mp_parse_error_kind_t parse_error_kind) {
|
||||||
printf(" File \"%s\", line %d, column %d\n", qstr_str(mp_lexer_source_name(lex)), mp_lexer_cur(lex)->src_line, mp_lexer_cur(lex)->src_column);
|
printf(" File \"%s\", line " UINT_FMT ", column " UINT_FMT "\n", qstr_str(mp_lexer_source_name(lex)), mp_lexer_cur(lex)->src_line, mp_lexer_cur(lex)->src_column);
|
||||||
switch (parse_error_kind) {
|
switch (parse_error_kind) {
|
||||||
case MP_PARSE_ERROR_MEMORY:
|
case MP_PARSE_ERROR_MEMORY:
|
||||||
printf("MemoryError: %s\n", STR_MEMORY);
|
printf("MemoryError: %s\n", STR_MEMORY);
|
||||||
|
@ -16,6 +16,6 @@ mp_lexer_t *mp_lexer_new_from_file(const char *filename)
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
return mp_lexer_new_from_str_len(qstr_from_str(filename), (const char *)data, (uint)len, 0);
|
return mp_lexer_new_from_str_len(qstr_from_str(filename), (const char *)data, (mp_uint_t)len, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user