From 6dc03ae3ced496c696058be739e0ba6c491c6a62 Mon Sep 17 00:00:00 2001 From: Dan Halbert Date: Mon, 3 Oct 2022 20:46:19 -0400 Subject: [PATCH] fix some dotenv parsing --- shared-bindings/dotenv/__init__.c | 13 ++- shared-module/dotenv/__init__.c | 115 ++++++++++++--------- tests/circuitpython-manual/dotenv_test.env | 32 ++++++ tests/circuitpython-manual/dotenv_test.py | 23 +++++ 4 files changed, 132 insertions(+), 51 deletions(-) create mode 100644 tests/circuitpython-manual/dotenv_test.env create mode 100644 tests/circuitpython-manual/dotenv_test.py diff --git a/shared-bindings/dotenv/__init__.c b/shared-bindings/dotenv/__init__.c index 0aed49cefb..193338c7c2 100644 --- a/shared-bindings/dotenv/__init__.c +++ b/shared-bindings/dotenv/__init__.c @@ -40,9 +40,13 @@ //| A subset of the CPython `dotenv library `_. It does //| not support variables or double quotes. //| -//| The simplest way to define keys and values is to put them in single quotes. \ and ' are -//| escaped by \ in single quotes. Newlines can occur in quotes for multiline values. Comments -//| start with # and apply for the rest of the line. +//| Keys and values may be put in single quotes. +//| ``\`` and ``'`` are escaped by ``\`` in single quotes. Newlines can occur in quotes for multiline values. +//| Comments start with ``#`` and apply for the rest of the line. +//| A ``#`` immediately following an ``=`` is part of the value, not the start of a comment, +//| and a ``#`` embedded in a value without whitespace will be part of that value. +//| This corresponds to how assignments and comments work in most Unix shells. +//| //| //| File format example: //| @@ -58,6 +62,9 @@ //| multiline = 'hello //| world //| how are you?' +//| # The #'s below will be included in the value. They do not start a comment. +//| key6=#value +//| key7=abc#def //| //| """ //| diff --git a/shared-module/dotenv/__init__.c b/shared-module/dotenv/__init__.c index 1837cef2f5..505eb2a91f 100644 --- a/shared-module/dotenv/__init__.c +++ b/shared-module/dotenv/__init__.c @@ -35,72 +35,90 @@ #include "py/objstr.h" #include "supervisor/filesystem.h" -STATIC uint8_t consume_spaces(FIL *active_file) { - uint8_t character = ' '; - UINT quantity_read = 1; - while (unichar_isspace(character) && quantity_read > 0) { - f_read(active_file, &character, 1, &quantity_read); - } +// Return 0 if there is no next character (EOF). +STATIC uint8_t get_next_character(FIL *active_file) { + uint8_t character = 0; + UINT quantity_read; + // If there's an error or quantity_read is 0, character will remain 0. + f_read(active_file, &character, 1, &quantity_read); + return character; +} + +// Discard whitespace, except for newlines, returning the next character after the whitespace. +// Return 0 if there is no next character (EOF). +STATIC uint8_t consume_whitespace(FIL *active_file) { + uint8_t character; + do { + character = get_next_character(active_file); + } while (character != '\n' && character != 0 && unichar_isspace(character)); return character; } // Starting at the start of a new line, determines if the key matches the given -// key. File pointer is left after the = after the key. +// key. File pointer is set to be just before the = after the key. STATIC bool key_matches(FIL *active_file, const char *key) { - uint8_t character = ' '; - UINT quantity_read = 1; - character = consume_spaces(active_file); + uint8_t character; + character = consume_whitespace(active_file); + if (character == 0) { + return false; + } bool quoted = false; if (character == '\'') { + // Beginning of single-quoted string. quoted = true; - f_read(active_file, &character, 1, &quantity_read); + character = get_next_character(active_file); } size_t key_pos = 0; bool escaped = false; bool matches = true; size_t key_len = strlen(key); - while (quantity_read > 0) { + while (character != 0) { if (character == '\\' && !escaped && quoted) { escaped = true; } else if (!escaped && quoted && character == '\'') { quoted = false; - // Move past the quoted before breaking so we can check the validity of data past it. - f_read(active_file, &character, 1, &quantity_read); + // End of quoted key. Skip over the ending quote. + character = get_next_character(active_file); break; - } else if (!quoted && (unichar_isspace(character) || character == '=' || character == '\n' || character == '#')) { + } else if (!quoted && (unichar_isspace(character) || character == '=' || character == '\n' || character == '#' || character == 0)) { + // End of unquoted key. break; } else { - matches = matches && key[key_pos] == character; - escaped = false; - key_pos++; + // Still on tentative key; see if it matches the next supplied key character, + // but don't run off the end of the supplied key. + if (key_pos < key_len) { + matches = matches && key[key_pos] == character; + escaped = false; + key_pos++; + } else { + // Key on line is too long. + matches = false; + } } - - f_read(active_file, &character, 1, &quantity_read); + character = get_next_character(active_file); } - if (unichar_isspace(character)) { - character = consume_spaces(active_file); - } - if (character == '=' || character == '\n' || character == '#') { - // Rewind one so the value can find it. + if (character == '=' || character == '\n' || character == '#' || character == 0) { + // Rewind one so the value, if any, can be found. f_lseek(active_file, f_tell(active_file) - 1); } else { // We're followed by something else that is invalid syntax. matches = false; } + return matches && key_pos == key_len; } STATIC bool next_line(FIL *active_file) { - uint8_t character = ' '; - UINT quantity_read = 1; + uint8_t character; bool quoted = false; bool escaped = false; // Track comments because they last until the end of the line. bool comment = false; - FRESULT result = FR_OK; // Consume all characters while quoted or others up to \n. - while (result == FR_OK && quantity_read > 0 && (quoted || character != '\n')) { - if (character == '#' || comment) { + do { + character = get_next_character(active_file); + + if ((!quoted || character == '#') || comment) { // Comments consume any escaping. comment = true; } else if (!escaped) { @@ -112,33 +130,32 @@ STATIC bool next_line(FIL *active_file) { } else { escaped = false; } - result = f_read(active_file, &character, 1, &quantity_read); - } - return result == FR_OK && quantity_read > 0; + } while (character != 0 && (quoted || character != '\n')); + + return character != 0; } STATIC mp_int_t read_value(FIL *active_file, char *value, size_t value_len) { - uint8_t character = ' '; - UINT quantity_read = 1; - // Consume spaces before = - character = consume_spaces(active_file); + uint8_t character; + // Consume spaces before "=", and get first character of interest. + character = consume_whitespace(active_file); if (character != '=') { if (character == '#' || character == '\n') { // Keys without an = after them are valid with the value None. - return 0; + return -1; } // All other characters are invalid. return -1; } - character = ' '; // Consume space after = - while (unichar_isspace(character) && quantity_read > 0) { - f_read(active_file, &character, 1, &quantity_read); + if (character != '#') { + // a # immediately after = is part of the value! + character = consume_whitespace(active_file); } bool quoted = false; if (character == '\'') { quoted = true; - f_read(active_file, &character, 1, &quantity_read); + character = get_next_character(active_file); } if (character == '"') { // We don't support double quoted values. @@ -150,12 +167,13 @@ STATIC mp_int_t read_value(FIL *active_file, char *value, size_t value_len) { // Count trailing spaces so we can ignore them at the end of unquoted // values. size_t trailing_spaces = 0; - while (quantity_read > 0) { + bool first_char = true; + while (character != 0) { // Consume the first \ if the value is quoted. if (quoted && character == '\\' && !escaped) { escaped = true; - // Drop this slash by short circuiting the rest of the loop. - f_read(active_file, &character, 1, &quantity_read); + // Drop this backslash by short circuiting the rest of the loop. + character = get_next_character(active_file); continue; } if (quoted && !escaped && character == '\'') { @@ -163,7 +181,7 @@ STATIC mp_int_t read_value(FIL *active_file, char *value, size_t value_len) { break; } // Unquoted values are ended by a newline or comment. - if (!quoted && (character == '\n' || character == '#')) { + if (!quoted && (character == '\n' || (character == '#' && !first_char))) { if (character == '\n') { // Rewind one so the next_line can find the \n. f_lseek(active_file, f_tell(active_file) - 1); @@ -182,7 +200,8 @@ STATIC mp_int_t read_value(FIL *active_file, char *value, size_t value_len) { value[value_pos] = character; } value_pos++; - f_read(active_file, &character, 1, &quantity_read); + character = get_next_character(active_file); + first_char = false; } return value_pos - trailing_spaces; @@ -214,7 +233,7 @@ mp_obj_t common_hal_dotenv_get_key(const char *path, const char *key) { // the length. char value[64]; mp_int_t actual_len = dotenv_get_key(path, key, value, sizeof(value)); - if (actual_len <= 0) { + if (actual_len < 0) { return mp_const_none; } if ((size_t)actual_len >= sizeof(value)) { diff --git a/tests/circuitpython-manual/dotenv_test.env b/tests/circuitpython-manual/dotenv_test.env new file mode 100644 index 0000000000..5e4e4cb8f0 --- /dev/null +++ b/tests/circuitpython-manual/dotenv_test.env @@ -0,0 +1,32 @@ +# No e0 value + # comment preceded by spaces +e1=e1value +e2=e2value # value followed by a comment +e3='e3value' +e4='e4value' # quoted value followed by a comment +# e5 should be None +e5 +# e6 should be the empty string +e6= +# e7 should be '#' (bash-like syntax processing) +e7=# +# e8 should be the empty string +e8='' +# e9 should be the empty string +e9= # +e10=e10_first +e10=e10_last +e11='abc#def' +# e12 should be 'abc#def' +e12=abc#def +e12='multi +line' +e13=e13value +e14 #comment +e15 = e15value +# e16 should be '#' +e16=# # +# e17 should be 'def#hi' +e17='def'#hi +# e18 should be '#has a hash' +e18=#has a hash diff --git a/tests/circuitpython-manual/dotenv_test.py b/tests/circuitpython-manual/dotenv_test.py new file mode 100644 index 0000000000..32127b54cb --- /dev/null +++ b/tests/circuitpython-manual/dotenv_test.py @@ -0,0 +1,23 @@ +import dotenv + +FILE = "dotenv_test.env" + +print("e0", dotenv.get_key(FILE, "e0")) +print("e1", dotenv.get_key(FILE, "e1")) +print("e2", dotenv.get_key(FILE, "e2")) +print("e3", dotenv.get_key(FILE, "e3")) +print("e4", dotenv.get_key(FILE, "e4")) +print("e5", dotenv.get_key(FILE, "e5")) +print("e6", dotenv.get_key(FILE, "e6")) +print("e7", dotenv.get_key(FILE, "e7")) +print("e8", dotenv.get_key(FILE, "e8")) +print("e9", dotenv.get_key(FILE, "e9")) +print("e10", dotenv.get_key(FILE, "e10")) +print("e11", dotenv.get_key(FILE, "e11")) +print("e12", dotenv.get_key(FILE, "e12")) +print("e13", dotenv.get_key(FILE, "e13")) +print("e14", dotenv.get_key(FILE, "e14")) +print("e15", dotenv.get_key(FILE, "e15")) +print("e16", dotenv.get_key(FILE, "e16")) +print("e17", dotenv.get_key(FILE, "e17")) +print("e18", dotenv.get_key(FILE, "e18"))