ujson: do not eat trailing whitespace

Ujson should only worry about whitespace before JSON.  This becomes apparent when you are using MP stream protocol to read directly from input buffers.

When you attempt to read(1) on a UART (and possibly other protocols) you have to wait for either the byte or the timeout.

Fixes:
- Waiting for a timeout after you have completed reading a correct and complete JSON off the input.
- Raising an OSError after reading a correct and complete JSON off the input.
- Eating more data than semantically owned off the input buffer.
- Blocking to start parsing JSON until the entire JSON body has been loaded into a potentially large, contiguous Python object.

Code you would write before:
```
line = board_busio_uart_port.read_line()
json_dict = json.loads(line)
```
or reaching for fixed buffers and swapping them around in Python.

Code that did not work before that does now:
```
json_dict = json.load(board_busio_uart_port)
```

- This removes the need for intermediate copies of data when reading JSON from micropython stream protocol inputs.
- It also increases total application speed by parsing JSON concurrently with receiving on boards that read from UART via DMA.
- It simplifies code that users write while improving their apps.
This commit is contained in:
warriorofwire 2020-05-10 20:45:42 -07:00
parent 90bd931808
commit eb3d5fa453
3 changed files with 25 additions and 8 deletions

View File

@ -53,6 +53,10 @@ STATIC mp_obj_t mod_ujson_dumps(mp_obj_t obj) {
} }
STATIC MP_DEFINE_CONST_FUN_OBJ_1(mod_ujson_dumps_obj, mod_ujson_dumps); STATIC MP_DEFINE_CONST_FUN_OBJ_1(mod_ujson_dumps_obj, mod_ujson_dumps);
#define JSON_DEBUG(...) (void)0
// #define JSON_DEBUG(...) mp_printf(&mp_plat_print __VA_OPT__(,) __VA_ARGS__)
// The function below implements a simple non-recursive JSON parser. // The function below implements a simple non-recursive JSON parser.
// //
// The JSON specification is at http://www.ietf.org/rfc/rfc4627.txt // The JSON specification is at http://www.ietf.org/rfc/rfc4627.txt
@ -80,6 +84,7 @@ typedef struct _ujson_stream_t {
STATIC byte ujson_stream_next(ujson_stream_t *s) { STATIC byte ujson_stream_next(ujson_stream_t *s) {
mp_uint_t ret = s->read(s->stream_obj, &s->cur, 1, &s->errcode); mp_uint_t ret = s->read(s->stream_obj, &s->cur, 1, &s->errcode);
JSON_DEBUG(" usjon_stream_next err:%2d cur: %c \n", s->errcode, s->cur);
if (s->errcode != 0) { if (s->errcode != 0) {
mp_raise_OSError(s->errcode); mp_raise_OSError(s->errcode);
} }
@ -92,6 +97,7 @@ STATIC byte ujson_stream_next(ujson_stream_t *s) {
STATIC mp_obj_t mod_ujson_load(mp_obj_t stream_obj) { STATIC mp_obj_t mod_ujson_load(mp_obj_t stream_obj) {
const mp_stream_p_t *stream_p = mp_get_stream_raise(stream_obj, MP_STREAM_OP_READ); const mp_stream_p_t *stream_p = mp_get_stream_raise(stream_obj, MP_STREAM_OP_READ);
ujson_stream_t s = {stream_obj, stream_p->read, 0, 0}; ujson_stream_t s = {stream_obj, stream_p->read, 0, 0};
JSON_DEBUG("got JSON stream\n");
vstr_t vstr; vstr_t vstr;
vstr_init(&vstr, 8); vstr_init(&vstr, 8);
mp_obj_list_t stack; // we use a list as a simple stack for nested JSON mp_obj_list_t stack; // we use a list as a simple stack for nested JSON
@ -101,6 +107,15 @@ STATIC mp_obj_t mod_ujson_load(mp_obj_t stream_obj) {
mp_obj_type_t *stack_top_type = NULL; mp_obj_type_t *stack_top_type = NULL;
mp_obj_t stack_key = MP_OBJ_NULL; mp_obj_t stack_key = MP_OBJ_NULL;
S_NEXT(s); S_NEXT(s);
// Eat _leading_ whitespace.
// If we eat trailing whitespace we will block for timeout on streams like UART that
// wait for requested data. Furthermore, it is an OSError to read(1) and incur
// a timeout on those APIs.
// For these reasons, we must only eat _leading_ whitespace.
while (unichar_isspace(S_CUR(s))) {
JSON_DEBUG("Eating leading whitespace");
S_NEXT(s);
}
for (;;) { for (;;) {
cont: cont:
if (S_END(s)) { if (S_END(s)) {
@ -262,14 +277,9 @@ STATIC mp_obj_t mod_ujson_load(mp_obj_t stream_obj) {
} }
} }
success: success:
// eat trailing whitespace // It is legal for a stream to have contents before and after JSON.
while (unichar_isspace(S_CUR(s))) { // If this parser has consumed a full successful JSON and its parse
S_NEXT(s); // stack is empty, the parse has succeeded.
}
if (!S_END(s)) {
// unexpected chars
goto fail;
}
if (stack_top == MP_OBJ_NULL || stack.len != 0) { if (stack_top == MP_OBJ_NULL || stack.len != 0) {
// not exactly 1 object // not exactly 1 object
goto fail; goto fail;

View File

@ -45,6 +45,9 @@
#include "samd/sercom.h" #include "samd/sercom.h"
#define UART_DEBUG(...) (void)0
// #define UART_DEBUG(...) mp_printf(&mp_plat_print __VA_OPT__(,) __VA_ARGS__)
// Do-nothing callback needed so that usart_async code will enable rx interrupts. // Do-nothing callback needed so that usart_async code will enable rx interrupts.
// See comment below re usart_async_register_callback() // See comment below re usart_async_register_callback()
static void usart_async_rxc_callback(const struct usart_async_descriptor *const descr) { static void usart_async_rxc_callback(const struct usart_async_descriptor *const descr) {

View File

@ -39,6 +39,9 @@
#include "py/stream.h" #include "py/stream.h"
#include "supervisor/shared/translate.h" #include "supervisor/shared/translate.h"
#define STREAM_DEBUG(...) (void)0
// #define STREAM_DEBUG(...) mp_printf(&mp_plat_print __VA_OPT__(,) __VA_ARGS__)
//| .. currentmodule:: busio //| .. currentmodule:: busio
//| //|
@ -219,6 +222,7 @@ STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(busio_uart___exit___obj, 4, 4, busio_
// These three methods are used by the shared stream methods. // These three methods are used by the shared stream methods.
STATIC mp_uint_t busio_uart_read(mp_obj_t self_in, void *buf_in, mp_uint_t size, int *errcode) { STATIC mp_uint_t busio_uart_read(mp_obj_t self_in, void *buf_in, mp_uint_t size, int *errcode) {
STREAM_DEBUG("busio_uart_read stream %d\n", size);
busio_uart_obj_t *self = MP_OBJ_TO_PTR(self_in); busio_uart_obj_t *self = MP_OBJ_TO_PTR(self_in);
check_for_deinit(self); check_for_deinit(self);
byte *buf = buf_in; byte *buf = buf_in;