Fix repl support for unicode

Currently when a utf8 character that is bigger than 1 byte is typed in
the repl, it isn't handled how it should be. If you try to move the
cursor in any direction the text gets messed up. This fixes that.
This commit is contained in:
George Waters 2020-07-22 14:06:04 -04:00
parent fee3c0afbd
commit 1d410bb68b
No known key found for this signature in database
GPG Key ID: D993F8B1CC21DB25

View File

@ -100,6 +100,7 @@ STATIC readline_t rl;
int readline_process_char(int c) {
size_t last_line_len = rl.line->len;
int cont_chars = 0;
int redraw_step_back = 0;
bool redraw_from_cursor = false;
int redraw_step_forward = 0;
@ -178,6 +179,13 @@ int readline_process_char(int c) {
int nspace = 1;
#endif
// Check if we have moved into a UTF-8 continuation byte
while (UTF8_IS_CONT(rl.line->buf[rl.cursor_pos-nspace]) &&
rl.cursor_pos-nspace > rl.orig_line_len) {
nspace++;
cont_chars++;
}
// do the backspace
vstr_cut_out_bytes(rl.line, rl.cursor_pos - nspace, nspace);
// set redraw parameters
@ -206,12 +214,37 @@ int readline_process_char(int c) {
redraw_step_forward = compl_len;
}
#endif
} else if (32 <= c ) {
} else if (32 <= c && c <= 126) {
// printable character
vstr_ins_char(rl.line, rl.cursor_pos, c);
// set redraw parameters
redraw_from_cursor = true;
redraw_step_forward = 1;
}else if (c >= 128) {
// utf-8 character
if (c >= 0xc0 && c < 0xf8) {
// First Code Point
vstr_ins_char(rl.line, rl.cursor_pos, c);
}else if (UTF8_IS_CONT(c)) {
char fcp = rl.line->buf[rl.cursor_pos];
if (fcp >= 0xc0 && fcp < 0xf8) {
int need = (0xe5 >> ((fcp >> 3) & 0x6)) & 3; // From unicode.c L195
cont_chars++;
while (UTF8_IS_CONT(rl.line->buf[rl.cursor_pos+cont_chars]) &&
rl.cursor_pos+cont_chars < rl.line->len && cont_chars < need) {
cont_chars++;
}
vstr_ins_char(rl.line, rl.cursor_pos+cont_chars, c);
if (cont_chars == need) {
redraw_from_cursor = true;
redraw_step_forward = cont_chars+1;
}
}else{
//ignore, for now (invalid first code point)
}
}else {
// ignore, invalid
}
}
} else if (rl.escape_seq == ESEQ_ESC) {
switch (c) {
@ -237,6 +270,12 @@ up_arrow_key:
#endif
// up arrow
if (rl.hist_cur + 1 < (int)READLINE_HIST_SIZE && MP_STATE_PORT(readline_hist)[rl.hist_cur + 1] != NULL) {
for (char *ch = rl.line->buf+rl.cursor_pos-1; ch > rl.line->buf+rl.orig_line_len; ch--) {
// printf("char: %d\n", ch);
if (UTF8_IS_CONT(*ch)) {
cont_chars++;
}
}
// increase hist num
rl.hist_cur += 1;
// set line to history
@ -253,6 +292,12 @@ down_arrow_key:
#endif
// down arrow
if (rl.hist_cur >= 0) {
for (char *ch = rl.line->buf+rl.cursor_pos-1; ch > rl.line->buf+rl.orig_line_len; ch--) {
// printf("char: %d\n", ch);
if (UTF8_IS_CONT(*ch)) {
cont_chars++;
}
}
// decrease hist num
rl.hist_cur -= 1;
// set line to history
@ -272,6 +317,12 @@ right_arrow_key:
// right arrow
if (rl.cursor_pos < rl.line->len) {
redraw_step_forward = 1;
// Check if we have moved into a UTF-8 continuation byte
while (UTF8_IS_CONT(rl.line->buf[rl.cursor_pos+redraw_step_forward]) &&
rl.cursor_pos+redraw_step_forward < rl.line->len) {
redraw_step_forward++;
cont_chars++;
}
}
} else if (c == 'D') {
#if MICROPY_REPL_EMACS_KEYS
@ -280,6 +331,12 @@ left_arrow_key:
// left arrow
if (rl.cursor_pos > rl.orig_line_len) {
redraw_step_back = 1;
// Check if we have moved into a UTF-8 continuation byte
while (UTF8_IS_CONT(rl.line->buf[rl.cursor_pos-redraw_step_back]) &&
rl.cursor_pos-redraw_step_back > rl.orig_line_len) {
redraw_step_back++;
cont_chars++;
}
}
} else if (c == 'H') {
// home
@ -295,9 +352,21 @@ left_arrow_key:
if (c == '~') {
if (rl.escape_seq_buf[0] == '1' || rl.escape_seq_buf[0] == '7') {
home_key:
for (char *ch = rl.line->buf+rl.cursor_pos-1; ch > rl.line->buf+rl.orig_line_len; ch--) {
// printf("char: %d\n", ch);
if (UTF8_IS_CONT(*ch)) {
cont_chars++;
}
}
redraw_step_back = rl.cursor_pos - rl.orig_line_len;
} else if (rl.escape_seq_buf[0] == '4' || rl.escape_seq_buf[0] == '8') {
end_key:
for (char *ch = rl.line->buf+rl.cursor_pos-1; ch > rl.line->buf+rl.orig_line_len; ch--) {
// printf("char: %d\n", ch);
if (UTF8_IS_CONT(*ch)) {
cont_chars++;
}
}
redraw_step_forward = rl.line->len - rl.cursor_pos;
} else if (rl.escape_seq_buf[0] == '3') {
// delete
@ -331,7 +400,7 @@ delete_key:
// redraw command prompt, efficiently
if (redraw_step_back > 0) {
mp_hal_move_cursor_back(redraw_step_back);
mp_hal_move_cursor_back(redraw_step_back-cont_chars);
rl.cursor_pos -= redraw_step_back;
}
if (redraw_from_cursor) {
@ -339,10 +408,24 @@ delete_key:
// erase old chars
mp_hal_erase_line_from_cursor(last_line_len - rl.cursor_pos);
}
// Check if we have moved into a UTF-8 continuation byte
// while (rl.cursor_pos+redraw_step_forward < rl.line->len &&
// UTF8_IS_CONT(rl.line->buf[rl.cursor_pos]) && rl.cursor_pos > 0) {
// rl.cursor_pos--;
// redraw_step_forward++;
// }
cont_chars = 0;
for (char *ch = rl.line->buf+rl.cursor_pos+redraw_step_forward; ch < rl.line->buf+rl.line->len; ch++) {
// printf("char: %d\n", ch);
if (UTF8_IS_CONT(*ch)) {
cont_chars++;
}
}
// draw new chars
mp_hal_stdout_tx_strn(rl.line->buf + rl.cursor_pos, rl.line->len - rl.cursor_pos);
// move cursor forward if needed (already moved forward by length of line, so move it back)
mp_hal_move_cursor_back(rl.line->len - (rl.cursor_pos + redraw_step_forward));
mp_hal_move_cursor_back(rl.line->len - (rl.cursor_pos + redraw_step_forward) - cont_chars);
rl.cursor_pos += redraw_step_forward;
} else if (redraw_step_forward > 0) {
// draw over old chars to move cursor forwards