py/lexer: Make lexer use an mp_reader as its source.
This commit is contained in:
parent
66d955c218
commit
5bdf1650de
@ -35,10 +35,11 @@ typedef struct _mp_lexer_str32_buf_t {
|
||||
uint8_t byte_off;
|
||||
} mp_lexer_str32_buf_t;
|
||||
|
||||
STATIC mp_uint_t str32_buf_next_byte(mp_lexer_str32_buf_t *sb) {
|
||||
STATIC mp_uint_t str32_buf_next_byte(void *sb_in) {
|
||||
mp_lexer_str32_buf_t *sb = (mp_lexer_str32_buf_t*)sb_in;
|
||||
byte c = sb->val & 0xff;
|
||||
if (c == 0) {
|
||||
return MP_LEXER_EOF;
|
||||
return MP_READER_EOF;
|
||||
}
|
||||
|
||||
if (++sb->byte_off > 3) {
|
||||
@ -51,7 +52,8 @@ STATIC mp_uint_t str32_buf_next_byte(mp_lexer_str32_buf_t *sb) {
|
||||
return c;
|
||||
}
|
||||
|
||||
STATIC void str32_buf_free(mp_lexer_str32_buf_t *sb) {
|
||||
STATIC void str32_buf_free(void *sb_in) {
|
||||
mp_lexer_str32_buf_t *sb = (mp_lexer_str32_buf_t*)sb_in;
|
||||
m_del_obj(mp_lexer_str32_buf_t, sb);
|
||||
}
|
||||
|
||||
@ -63,7 +65,8 @@ mp_lexer_t *mp_lexer_new_from_str32(qstr src_name, const char *str, mp_uint_t le
|
||||
sb->byte_off = (uint32_t)str & 3;
|
||||
sb->src_cur = (uint32_t*)(str - sb->byte_off);
|
||||
sb->val = *sb->src_cur++ >> sb->byte_off * 8;
|
||||
return mp_lexer_new(src_name, sb, (mp_lexer_stream_next_byte_t)str32_buf_next_byte, (mp_lexer_stream_close_t)str32_buf_free);
|
||||
mp_reader_t reader = {sb, str32_buf_next_byte, str32_buf_free};
|
||||
return mp_lexer_new(src_name, reader);
|
||||
}
|
||||
|
||||
#endif // MICROPY_ENABLE_COMPILER
|
||||
|
31
py/lexer.c
31
py/lexer.c
@ -52,6 +52,7 @@ STATIC bool str_strn_equal(const char *str, const char *strn, mp_uint_t len) {
|
||||
return i == len && *str == 0;
|
||||
}
|
||||
|
||||
#define MP_LEXER_EOF ((unichar)MP_READER_EOF)
|
||||
#define CUR_CHAR(lex) ((lex)->chr0)
|
||||
|
||||
STATIC bool is_end(mp_lexer_t *lex) {
|
||||
@ -145,7 +146,7 @@ STATIC void next_char(mp_lexer_t *lex) {
|
||||
|
||||
lex->chr0 = lex->chr1;
|
||||
lex->chr1 = lex->chr2;
|
||||
lex->chr2 = lex->stream_next_byte(lex->stream_data);
|
||||
lex->chr2 = lex->reader.readbyte(lex->reader.data);
|
||||
|
||||
if (lex->chr0 == '\r') {
|
||||
// CR is a new line, converted to LF
|
||||
@ -153,7 +154,7 @@ STATIC void next_char(mp_lexer_t *lex) {
|
||||
if (lex->chr1 == '\n') {
|
||||
// CR LF is a single new line
|
||||
lex->chr1 = lex->chr2;
|
||||
lex->chr2 = lex->stream_next_byte(lex->stream_data);
|
||||
lex->chr2 = lex->reader.readbyte(lex->reader.data);
|
||||
}
|
||||
}
|
||||
|
||||
@ -689,21 +690,17 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, bool first_token) {
|
||||
}
|
||||
}
|
||||
|
||||
mp_lexer_t *mp_lexer_new(qstr src_name, void *stream_data, mp_lexer_stream_next_byte_t stream_next_byte, mp_lexer_stream_close_t stream_close) {
|
||||
mp_lexer_t *mp_lexer_new(qstr src_name, mp_reader_t reader) {
|
||||
mp_lexer_t *lex = m_new_obj_maybe(mp_lexer_t);
|
||||
|
||||
// check for memory allocation error
|
||||
if (lex == NULL) {
|
||||
if (stream_close) {
|
||||
stream_close(stream_data);
|
||||
}
|
||||
reader.close(reader.data);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
lex->source_name = src_name;
|
||||
lex->stream_data = stream_data;
|
||||
lex->stream_next_byte = stream_next_byte;
|
||||
lex->stream_close = stream_close;
|
||||
lex->reader = reader;
|
||||
lex->line = 1;
|
||||
lex->column = 1;
|
||||
lex->emit_dent = 0;
|
||||
@ -724,9 +721,9 @@ mp_lexer_t *mp_lexer_new(qstr src_name, void *stream_data, mp_lexer_stream_next_
|
||||
lex->indent_level[0] = 0;
|
||||
|
||||
// preload characters
|
||||
lex->chr0 = stream_next_byte(stream_data);
|
||||
lex->chr1 = stream_next_byte(stream_data);
|
||||
lex->chr2 = stream_next_byte(stream_data);
|
||||
lex->chr0 = reader.readbyte(reader.data);
|
||||
lex->chr1 = reader.readbyte(reader.data);
|
||||
lex->chr2 = reader.readbyte(reader.data);
|
||||
|
||||
// if input stream is 0, 1 or 2 characters long and doesn't end in a newline, then insert a newline at the end
|
||||
if (lex->chr0 == MP_LEXER_EOF) {
|
||||
@ -756,7 +753,7 @@ mp_lexer_t *mp_lexer_new_from_str_len(qstr src_name, const char *str, mp_uint_t
|
||||
if (!mp_reader_new_mem(&reader, (const byte*)str, len, free_len)) {
|
||||
return NULL;
|
||||
}
|
||||
return mp_lexer_new(src_name, reader.data, (mp_lexer_stream_next_byte_t)reader.readbyte, (mp_lexer_stream_close_t)reader.close);
|
||||
return mp_lexer_new(src_name, reader);
|
||||
}
|
||||
|
||||
#if MICROPY_READER_POSIX || MICROPY_READER_FATFS
|
||||
@ -767,7 +764,7 @@ mp_lexer_t *mp_lexer_new_from_file(const char *filename) {
|
||||
if (ret != 0) {
|
||||
return NULL;
|
||||
}
|
||||
return mp_lexer_new(qstr_from_str(filename), reader.data, (mp_lexer_stream_next_byte_t)reader.readbyte, (mp_lexer_stream_close_t)reader.close);
|
||||
return mp_lexer_new(qstr_from_str(filename), reader);
|
||||
}
|
||||
|
||||
#if MICROPY_HELPER_LEXER_UNIX
|
||||
@ -778,7 +775,7 @@ mp_lexer_t *mp_lexer_new_from_fd(qstr filename, int fd, bool close_fd) {
|
||||
if (ret != 0) {
|
||||
return NULL;
|
||||
}
|
||||
return mp_lexer_new(filename, reader.data, (mp_lexer_stream_next_byte_t)reader.readbyte, (mp_lexer_stream_close_t)reader.close);
|
||||
return mp_lexer_new(filename, reader);
|
||||
}
|
||||
|
||||
#endif
|
||||
@ -787,9 +784,7 @@ mp_lexer_t *mp_lexer_new_from_fd(qstr filename, int fd, bool close_fd) {
|
||||
|
||||
void mp_lexer_free(mp_lexer_t *lex) {
|
||||
if (lex) {
|
||||
if (lex->stream_close) {
|
||||
lex->stream_close(lex->stream_data);
|
||||
}
|
||||
lex->reader.close(lex->reader.data);
|
||||
vstr_clear(&lex->vstr);
|
||||
m_del(uint16_t, lex->indent_level, lex->alloc_indent_level);
|
||||
m_del_obj(mp_lexer_t, lex);
|
||||
|
15
py/lexer.h
15
py/lexer.h
@ -30,6 +30,7 @@
|
||||
|
||||
#include "py/mpconfig.h"
|
||||
#include "py/qstr.h"
|
||||
#include "py/reader.h"
|
||||
|
||||
/* lexer.h -- simple tokeniser for Micro Python
|
||||
*
|
||||
@ -142,21 +143,11 @@ typedef enum _mp_token_kind_t {
|
||||
MP_TOKEN_DEL_MINUS_MORE,
|
||||
} mp_token_kind_t;
|
||||
|
||||
// the next-byte function must return the next byte in the stream
|
||||
// it must return MP_LEXER_EOF if end of stream
|
||||
// it can be called again after returning MP_LEXER_EOF, and in that case must return MP_LEXER_EOF
|
||||
#define MP_LEXER_EOF ((unichar)(-1))
|
||||
|
||||
typedef mp_uint_t (*mp_lexer_stream_next_byte_t)(void*);
|
||||
typedef void (*mp_lexer_stream_close_t)(void*);
|
||||
|
||||
// this data structure is exposed for efficiency
|
||||
// public members are: source_name, tok_line, tok_column, tok_kind, vstr
|
||||
typedef struct _mp_lexer_t {
|
||||
qstr source_name; // name of source
|
||||
void *stream_data; // data for stream
|
||||
mp_lexer_stream_next_byte_t stream_next_byte; // stream callback to get next byte
|
||||
mp_lexer_stream_close_t stream_close; // stream callback to free
|
||||
mp_reader_t reader; // stream source
|
||||
|
||||
unichar chr0, chr1, chr2; // current cached characters from source
|
||||
|
||||
@ -176,7 +167,7 @@ typedef struct _mp_lexer_t {
|
||||
vstr_t vstr; // token data
|
||||
} mp_lexer_t;
|
||||
|
||||
mp_lexer_t *mp_lexer_new(qstr src_name, void *stream_data, mp_lexer_stream_next_byte_t stream_next_byte, mp_lexer_stream_close_t stream_close);
|
||||
mp_lexer_t *mp_lexer_new(qstr src_name, mp_reader_t reader);
|
||||
mp_lexer_t *mp_lexer_new_from_str_len(qstr src_name, const char *str, mp_uint_t len, mp_uint_t free_len);
|
||||
|
||||
void mp_lexer_free(mp_lexer_t *lex);
|
||||
|
Loading…
Reference in New Issue
Block a user