Refactor utf-8 code, reduce impact on code size

This commit is contained in:
George Waters 2020-08-02 21:26:09 -04:00
parent 1d410bb68b
commit 71ce480dbb
No known key found for this signature in database
GPG Key ID: D993F8B1CC21DB25

View File

@ -92,12 +92,21 @@ typedef struct _readline_t {
int escape_seq; int escape_seq;
int hist_cur; int hist_cur;
size_t cursor_pos; size_t cursor_pos;
uint8_t utf8_cont_chars;
char escape_seq_buf[1]; char escape_seq_buf[1];
const char *prompt; const char *prompt;
} readline_t; } readline_t;
STATIC readline_t rl; STATIC readline_t rl;
int readline_count_cont_byte(char *start, char *end) {
int count = 0;
for (char *pos = start; pos < end; pos++) {
count += UTF8_IS_CONT(*pos);
}
return count;
}
int readline_process_char(int c) { int readline_process_char(int c) {
size_t last_line_len = rl.line->len; size_t last_line_len = rl.line->len;
int cont_chars = 0; int cont_chars = 0;
@ -180,8 +189,7 @@ int readline_process_char(int c) {
#endif #endif
// Check if we have moved into a UTF-8 continuation byte // Check if we have moved into a UTF-8 continuation byte
while (UTF8_IS_CONT(rl.line->buf[rl.cursor_pos-nspace]) && while (UTF8_IS_CONT(rl.line->buf[rl.cursor_pos-nspace])) {
rl.cursor_pos-nspace > rl.orig_line_len) {
nspace++; nspace++;
cont_chars++; cont_chars++;
} }
@ -223,27 +231,23 @@ int readline_process_char(int c) {
}else if (c >= 128) { }else if (c >= 128) {
// utf-8 character // utf-8 character
if (c >= 0xc0 && c < 0xf8) { if (c >= 0xc0 && c < 0xf8) {
// First Code Point // Lead code point
vstr_ins_char(rl.line, rl.cursor_pos, c); vstr_ins_char(rl.line, rl.cursor_pos, c);
rl.utf8_cont_chars = 0;
}else if (UTF8_IS_CONT(c)) { }else if (UTF8_IS_CONT(c)) {
char fcp = rl.line->buf[rl.cursor_pos]; char lcp = rl.line->buf[rl.cursor_pos];
if (fcp >= 0xc0 && fcp < 0xf8) { // Check for valid lead code point
int need = (0xe5 >> ((fcp >> 3) & 0x6)) & 3; // From unicode.c L195 if (lcp >= 0xc0 && lcp < 0xf8) {
cont_chars++; rl.utf8_cont_chars += 1;
while (UTF8_IS_CONT(rl.line->buf[rl.cursor_pos+cont_chars]) && vstr_ins_char(rl.line, rl.cursor_pos+rl.utf8_cont_chars, c);
rl.cursor_pos+cont_chars < rl.line->len && cont_chars < need) { // set redraw parameters if we have the entire character
cont_chars++; uint8_t need = (0xe5 >> ((lcp >> 3) & 0x6)) & 3; // From unicode.c L195
} if (rl.utf8_cont_chars == need) {
vstr_ins_char(rl.line, rl.cursor_pos+cont_chars, c);
if (cont_chars == need) {
redraw_from_cursor = true; redraw_from_cursor = true;
redraw_step_forward = cont_chars+1; redraw_step_forward = rl.utf8_cont_chars+1;
cont_chars = rl.utf8_cont_chars;
} }
}else{
//ignore, for now (invalid first code point)
} }
}else {
// ignore, invalid
} }
} }
} else if (rl.escape_seq == ESEQ_ESC) { } else if (rl.escape_seq == ESEQ_ESC) {
@ -270,12 +274,8 @@ up_arrow_key:
#endif #endif
// up arrow // up arrow
if (rl.hist_cur + 1 < (int)READLINE_HIST_SIZE && MP_STATE_PORT(readline_hist)[rl.hist_cur + 1] != NULL) { if (rl.hist_cur + 1 < (int)READLINE_HIST_SIZE && MP_STATE_PORT(readline_hist)[rl.hist_cur + 1] != NULL) {
for (char *ch = rl.line->buf+rl.cursor_pos-1; ch > rl.line->buf+rl.orig_line_len; ch--) { // Check for continuation characters through the cursor_pos
// printf("char: %d\n", ch); cont_chars = readline_count_cont_byte(rl.line->buf+rl.orig_line_len, rl.line->buf+rl.cursor_pos);
if (UTF8_IS_CONT(*ch)) {
cont_chars++;
}
}
// increase hist num // increase hist num
rl.hist_cur += 1; rl.hist_cur += 1;
// set line to history // set line to history
@ -292,12 +292,8 @@ down_arrow_key:
#endif #endif
// down arrow // down arrow
if (rl.hist_cur >= 0) { if (rl.hist_cur >= 0) {
for (char *ch = rl.line->buf+rl.cursor_pos-1; ch > rl.line->buf+rl.orig_line_len; ch--) { // Check for continuation characters through the cursor_pos
// printf("char: %d\n", ch); cont_chars = readline_count_cont_byte(rl.line->buf+rl.orig_line_len, rl.line->buf+rl.cursor_pos);
if (UTF8_IS_CONT(*ch)) {
cont_chars++;
}
}
// decrease hist num // decrease hist num
rl.hist_cur -= 1; rl.hist_cur -= 1;
// set line to history // set line to history
@ -321,7 +317,6 @@ right_arrow_key:
while (UTF8_IS_CONT(rl.line->buf[rl.cursor_pos+redraw_step_forward]) && while (UTF8_IS_CONT(rl.line->buf[rl.cursor_pos+redraw_step_forward]) &&
rl.cursor_pos+redraw_step_forward < rl.line->len) { rl.cursor_pos+redraw_step_forward < rl.line->len) {
redraw_step_forward++; redraw_step_forward++;
cont_chars++;
} }
} }
} else if (c == 'D') { } else if (c == 'D') {
@ -332,8 +327,7 @@ left_arrow_key:
if (rl.cursor_pos > rl.orig_line_len) { if (rl.cursor_pos > rl.orig_line_len) {
redraw_step_back = 1; redraw_step_back = 1;
// Check if we have moved into a UTF-8 continuation byte // Check if we have moved into a UTF-8 continuation byte
while (UTF8_IS_CONT(rl.line->buf[rl.cursor_pos-redraw_step_back]) && while (UTF8_IS_CONT(rl.line->buf[rl.cursor_pos-redraw_step_back])) {
rl.cursor_pos-redraw_step_back > rl.orig_line_len) {
redraw_step_back++; redraw_step_back++;
cont_chars++; cont_chars++;
} }
@ -352,21 +346,9 @@ left_arrow_key:
if (c == '~') { if (c == '~') {
if (rl.escape_seq_buf[0] == '1' || rl.escape_seq_buf[0] == '7') { if (rl.escape_seq_buf[0] == '1' || rl.escape_seq_buf[0] == '7') {
home_key: home_key:
for (char *ch = rl.line->buf+rl.cursor_pos-1; ch > rl.line->buf+rl.orig_line_len; ch--) {
// printf("char: %d\n", ch);
if (UTF8_IS_CONT(*ch)) {
cont_chars++;
}
}
redraw_step_back = rl.cursor_pos - rl.orig_line_len; redraw_step_back = rl.cursor_pos - rl.orig_line_len;
} else if (rl.escape_seq_buf[0] == '4' || rl.escape_seq_buf[0] == '8') { } else if (rl.escape_seq_buf[0] == '4' || rl.escape_seq_buf[0] == '8') {
end_key: end_key:
for (char *ch = rl.line->buf+rl.cursor_pos-1; ch > rl.line->buf+rl.orig_line_len; ch--) {
// printf("char: %d\n", ch);
if (UTF8_IS_CONT(*ch)) {
cont_chars++;
}
}
redraw_step_forward = rl.line->len - rl.cursor_pos; redraw_step_forward = rl.line->len - rl.cursor_pos;
} else if (rl.escape_seq_buf[0] == '3') { } else if (rl.escape_seq_buf[0] == '3') {
// delete // delete
@ -408,20 +390,8 @@ delete_key:
// erase old chars // erase old chars
mp_hal_erase_line_from_cursor(last_line_len - rl.cursor_pos); mp_hal_erase_line_from_cursor(last_line_len - rl.cursor_pos);
} }
// Check if we have moved into a UTF-8 continuation byte // Check for continuation characters from the new cursor_pos to the EOL
// while (rl.cursor_pos+redraw_step_forward < rl.line->len && cont_chars = readline_count_cont_byte(rl.line->buf+rl.cursor_pos+redraw_step_forward, rl.line->buf+rl.line->len);
// UTF8_IS_CONT(rl.line->buf[rl.cursor_pos]) && rl.cursor_pos > 0) {
// rl.cursor_pos--;
// redraw_step_forward++;
// }
cont_chars = 0;
for (char *ch = rl.line->buf+rl.cursor_pos+redraw_step_forward; ch < rl.line->buf+rl.line->len; ch++) {
// printf("char: %d\n", ch);
if (UTF8_IS_CONT(*ch)) {
cont_chars++;
}
}
// draw new chars // draw new chars
mp_hal_stdout_tx_strn(rl.line->buf + rl.cursor_pos, rl.line->len - rl.cursor_pos); mp_hal_stdout_tx_strn(rl.line->buf + rl.cursor_pos, rl.line->len - rl.cursor_pos);
// move cursor forward if needed (already moved forward by length of line, so move it back) // move cursor forward if needed (already moved forward by length of line, so move it back)