lineedit: fix another corner case with bad unicode input

function                                             old     new   delta
read_key                                             607     646     +39
readit                                                50      55      +5
getch_nowait                                         290     295      +5
hash_find                                            233     234      +1
xstrtoul_range_sfx                                   231     230      -1
passwd_main                                         1058    1056      -2
builtin_exit                                          45      43      -2
cmp_main                                             649     645      -4
lineedit_read_key                                    257     245     -12
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 4/5 up/down: 50/-21)             Total: 29 bytes

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
This commit is contained in:
Denys Vlasenko 2010-03-11 21:17:55 +01:00
parent b0a57abb79
commit 58f108eb33
6 changed files with 67 additions and 35 deletions

View File

@ -2205,7 +2205,7 @@ static int readit(void) // read (maybe cursor) key from stdin
int c; int c;
fflush_all(); fflush_all();
c = read_key(STDIN_FILENO, readbuffer); c = read_key(STDIN_FILENO, readbuffer, /*timeout off:*/ -2);
if (c == -1) { // EOF/error if (c == -1) { // EOF/error
go_bottom_and_clear_to_eol(); go_bottom_and_clear_to_eol();
cookmode(); // terminal to "cooked" cookmode(); // terminal to "cooked"

View File

@ -1275,8 +1275,12 @@ enum {
* Return of -1 means EOF or error (errno == 0 on EOF). * Return of -1 means EOF or error (errno == 0 on EOF).
* buffer[0] is used as a counter of buffered chars and must be 0 * buffer[0] is used as a counter of buffered chars and must be 0
* on first call. * on first call.
* timeout:
* -2: do not poll for input;
* -1: poll(-1) (i.e. block);
* >=0: poll for TIMEOUT milliseconds, return -1/EAGAIN on timeout
*/ */
int64_t read_key(int fd, char *buffer) FAST_FUNC; int64_t read_key(int fd, char *buffer, int timeout) FAST_FUNC;
void read_key_ungets(char *buffer, const char *str, unsigned len) FAST_FUNC; void read_key_ungets(char *buffer, const char *str, unsigned len) FAST_FUNC;

View File

@ -1658,27 +1658,28 @@ static void win_changed(int nsig)
static int lineedit_read_key(char *read_key_buffer) static int lineedit_read_key(char *read_key_buffer)
{ {
int64_t ic; int64_t ic;
struct pollfd pfd; int timeout = -1;
int delay = -1;
#if ENABLE_FEATURE_ASSUME_UNICODE #if ENABLE_FEATURE_ASSUME_UNICODE
char unicode_buf[MB_CUR_MAX + 1]; char unicode_buf[MB_CUR_MAX + 1];
int unicode_idx = 0; int unicode_idx = 0;
#endif #endif
pfd.fd = STDIN_FILENO; while (1) {
pfd.events = POLLIN; /* Wait for input. TIMEOUT = -1 makes read_key wait even
do { * on nonblocking stdin, TIMEOUT = 50 makes sure we won't
#if ENABLE_FEATURE_EDITING_ASK_TERMINAL || ENABLE_FEATURE_ASSUME_UNICODE * insist on full MB_CUR_MAX buffer to declare input like
poll_again: * "\xff\n",pause,"ls\n" invalid and thus won't lose "ls".
*
* Note: read_key sets errno to 0 on success.
*/
ic = read_key(STDIN_FILENO, read_key_buffer, timeout);
if (errno) {
#if ENABLE_FEATURE_ASSUME_UNICODE
if (errno == EAGAIN && unicode_idx != 0)
goto pushback;
#endif #endif
if (read_key_buffer[0] == 0) { break;
/* Wait for input. Can't just call read_key,
* it returns at once if stdin
* is in non-blocking mode. */
safe_poll(&pfd, 1, delay);
} }
/* Note: read_key sets errno to 0 on success: */
ic = read_key(STDIN_FILENO, read_key_buffer);
#if ENABLE_FEATURE_EDITING_ASK_TERMINAL #if ENABLE_FEATURE_EDITING_ASK_TERMINAL
if ((int32_t)ic == KEYCODE_CURSOR_POS if ((int32_t)ic == KEYCODE_CURSOR_POS
@ -1695,7 +1696,7 @@ static int lineedit_read_key(char *read_key_buffer)
} }
} }
} }
goto poll_again; continue;
} }
#endif #endif
@ -1704,19 +1705,20 @@ static int lineedit_read_key(char *read_key_buffer)
wchar_t wc; wchar_t wc;
if ((int32_t)ic < 0) /* KEYCODE_xxx */ if ((int32_t)ic < 0) /* KEYCODE_xxx */
return ic; break;
// TODO: imagine sequence like: 0xff, <left-arrow>: we are currently losing 0xff... // TODO: imagine sequence like: 0xff,<left-arrow>: we are currently losing 0xff...
unicode_buf[unicode_idx++] = ic; unicode_buf[unicode_idx++] = ic;
unicode_buf[unicode_idx] = '\0'; unicode_buf[unicode_idx] = '\0';
if (mbstowcs(&wc, unicode_buf, 1) != 1) { if (mbstowcs(&wc, unicode_buf, 1) != 1) {
/* Not (yet?) a valid unicode char */ /* Not (yet?) a valid unicode char */
if (unicode_idx < MB_CUR_MAX) { if (unicode_idx < MB_CUR_MAX) {
delay = 50; timeout = 50;
goto poll_again; continue;
} }
pushback:
/* Invalid sequence. Save all "bad bytes" except first */ /* Invalid sequence. Save all "bad bytes" except first */
read_key_ungets(read_key_buffer, unicode_buf + 1, MB_CUR_MAX - 1); read_key_ungets(read_key_buffer, unicode_buf + 1, unicode_idx - 1);
/* /*
* ic = unicode_buf[0] sounds even better, but currently * ic = unicode_buf[0] sounds even better, but currently
* this does not work: wchar_t[] -> char[] conversion * this does not work: wchar_t[] -> char[] conversion
@ -1730,7 +1732,8 @@ static int lineedit_read_key(char *read_key_buffer)
} }
} }
#endif #endif
} while (errno == EAGAIN); break;
}
return ic; return ic;
} }

View File

@ -9,7 +9,7 @@
*/ */
#include "libbb.h" #include "libbb.h"
int64_t FAST_FUNC read_key(int fd, char *buffer) int64_t FAST_FUNC read_key(int fd, char *buffer, int timeout)
{ {
struct pollfd pfd; struct pollfd pfd;
const char *seq; const char *seq;
@ -90,14 +90,27 @@ int64_t FAST_FUNC read_key(int fd, char *buffer)
/* ESC [ Z - Shift-Tab */ /* ESC [ Z - Shift-Tab */
}; };
pfd.fd = fd;
pfd.events = POLLIN;
buffer++; /* saved chars counter is in buffer[-1] now */ buffer++; /* saved chars counter is in buffer[-1] now */
start_over: start_over:
errno = 0; errno = 0;
n = (unsigned char)buffer[-1]; n = (unsigned char)buffer[-1];
if (n == 0) { if (n == 0) {
/* If no data, block waiting for input. /* If no data, wait for input.
* It is tempting to read more than one byte here, * If requested, wait TIMEOUT ms. TIMEOUT = -1 is useful
* if fd can be in non-blocking mode.
*/
if (timeout >= -1) {
if (safe_poll(&pfd, 1, timeout) == 0) {
/* Timed out */
errno = EAGAIN;
return -1;
}
}
/* It is tempting to read more than one byte here,
* but it breaks pasting. Example: at shell prompt, * but it breaks pasting. Example: at shell prompt,
* user presses "c","a","t" and then pastes "\nline\n". * user presses "c","a","t" and then pastes "\nline\n".
* When we were reading 3 bytes here, we were eating * When we were reading 3 bytes here, we were eating
@ -121,8 +134,6 @@ int64_t FAST_FUNC read_key(int fd, char *buffer)
} }
/* Loop through known ESC sequences */ /* Loop through known ESC sequences */
pfd.fd = fd;
pfd.events = POLLIN;
seq = esccmds; seq = esccmds;
while (*seq != '\0') { while (*seq != '\0') {
/* n - position in sequence we did not read yet */ /* n - position in sequence we did not read yet */

View File

@ -855,7 +855,7 @@ static int getch_nowait(void)
/* We have kbd_fd in O_NONBLOCK mode, read inside read_key() /* We have kbd_fd in O_NONBLOCK mode, read inside read_key()
* would not block even if there is no input available */ * would not block even if there is no input available */
rd = read_key(kbd_fd, kbd_input); rd = read_key(kbd_fd, kbd_input, /*timeout off:*/ -2);
if (rd == -1) { if (rd == -1) {
if (errno == EAGAIN) { if (errno == EAGAIN) {
/* No keyboard input available. Since poll() did return, /* No keyboard input available. Since poll() did return,

View File

@ -10,33 +10,47 @@
# testing "test name" "options" "expected result" "file input" "stdin" # testing "test name" "options" "expected result" "file input" "stdin"
testing "One byte which is not valid unicode char followed by valid input" \ testing "One byte which is not valid unicode char followed by valid input" \
"script -q -c 'ash' /dev/null >/dev/null; cat output; rm output" \ "script -q -c 'ash' /dev/null >/dev/null; cat ash.output" \
"\ "\
00000000 3f 2d 0a |?-.| 00000000 3f 2d 0a |?-.|
00000003 00000003
" \ " \
"" \ "" \
"echo \xff- | hexdump -C >output; exit; exit; exit; exit\n" \ "echo \xff- | hexdump -C >ash.output; exit; exit; exit; exit\n"
testing "30 bytes which are not valid unicode chars followed by valid input" \ testing "30 bytes which are not valid unicode chars followed by valid input" \
"script -q -c 'ash' /dev/null >/dev/null; cat output; rm output" \ "script -q -c 'ash' /dev/null >/dev/null; cat ash.output" \
"\ "\
00000000 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f |????????????????| 00000000 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f |????????????????|
00000010 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f 2d 0a |??????????????-.| 00000010 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f 3f 2d 0a |??????????????-.|
00000020 00000020
" \ " \
"" \ "" \
"echo \xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff- | hexdump -C >output; exit; exit; exit; exit\n" \ "echo \xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff- | hexdump -C >ash.output; exit; exit; exit; exit\n"
# Not sure this behavior is perfect: we lose all invalid input which precedes # Not sure this behavior is perfect: we lose all invalid input which precedes
# arrow keys and such. In this example, \xff\xff are lost # arrow keys and such. In this example, \xff\xff are lost
testing "2 bytes which are not valid unicode chars followed by left arrow key" \ testing "2 bytes which are not valid unicode chars followed by left arrow key" \
"script -q -c 'ash' /dev/null >/dev/null; cat output; rm output" \ "script -q -c 'ash' /dev/null >/dev/null; cat ash.output" \
"\ "\
00000000 3d 2d 0a |=-.| 00000000 3d 2d 0a |=-.|
00000003 00000003
" \ " \
"" \ "" \
"echo =+\xff\xff\x1b\x5b\x44- | hexdump -C >output; exit; exit; exit; exit\n" \ "echo =+\xff\xff\x1b\x5b\x44- | hexdump -C >ash.output; exit; exit; exit; exit\n"
# ash should see "echo \xff\n",pause -> execute it as "echo ?" (which is
# not checked by the test), then read and execute the rest: "echo A | ..."
# The bug was that ash was eating the beginning of "echo A" despite the pause.
testing "Invalid unicode chars followed by a pause do not eat next chars" \
"{ echo -ne 'echo \xff\n'; sleep 1; echo -ne 'echo A | hexdump -C >ash.output; exit; exit; exit; exit\n'; } \
| script -q -c 'ash' /dev/null >/dev/null; cat ash.output" \
"\
00000000 41 0a |A.|
00000002
" \
"" ""
rm ash.output
exit $FAILCOUNT exit $FAILCOUNT