mirror of
https://github.com/sheumann/hush.git
synced 2025-01-18 07:31:34 +00:00
ls: fix handling of broken unicode sequences
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
This commit is contained in:
parent
d8528b8e56
commit
3d5b606931
@ -139,6 +139,8 @@ size_t FAST_FUNC wcstombs(char *dest, const wchar_t *src, size_t n)
|
|||||||
return org_n - n;
|
return org_n - n;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define ERROR_WCHAR (~(wchar_t)0)
|
||||||
|
|
||||||
static const char *mbstowc_internal(wchar_t *res, const char *src)
|
static const char *mbstowc_internal(wchar_t *res, const char *src)
|
||||||
{
|
{
|
||||||
int bytes;
|
int bytes;
|
||||||
@ -159,16 +161,22 @@ static const char *mbstowc_internal(wchar_t *res, const char *src)
|
|||||||
c <<= 1;
|
c <<= 1;
|
||||||
bytes++;
|
bytes++;
|
||||||
} while ((c & 0x80) && bytes < 6);
|
} while ((c & 0x80) && bytes < 6);
|
||||||
if (bytes == 1)
|
if (bytes == 1) {
|
||||||
return NULL;
|
/* A bare "continuation" byte. Say, 80 */
|
||||||
|
*res = ERROR_WCHAR;
|
||||||
|
return src;
|
||||||
|
}
|
||||||
c = (uint8_t)(c) >> bytes;
|
c = (uint8_t)(c) >> bytes;
|
||||||
|
|
||||||
while (--bytes) {
|
while (--bytes) {
|
||||||
unsigned ch = (unsigned char) *src++;
|
unsigned ch = (unsigned char) *src;
|
||||||
if ((ch & 0xc0) != 0x80) {
|
if ((ch & 0xc0) != 0x80) {
|
||||||
return NULL;
|
/* Missing "continuation" byte. Example: e0 80 */
|
||||||
|
*res = ERROR_WCHAR;
|
||||||
|
return src;
|
||||||
}
|
}
|
||||||
c = (c << 6) + (ch & 0x3f);
|
c = (c << 6) + (ch & 0x3f);
|
||||||
|
src++;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* TODO */
|
/* TODO */
|
||||||
@ -177,8 +185,8 @@ static const char *mbstowc_internal(wchar_t *res, const char *src)
|
|||||||
/* 11110000 10000000 10000100 10000000 converts to 0x100 */
|
/* 11110000 10000000 10000100 10000000 converts to 0x100 */
|
||||||
/* correct encoding: 11000100 10000000 */
|
/* correct encoding: 11000100 10000000 */
|
||||||
if (c <= 0x7f) { /* crude check */
|
if (c <= 0x7f) { /* crude check */
|
||||||
return NULL;
|
*res = ERROR_WCHAR;
|
||||||
//or maybe 0xfffd; /* replacement character */
|
return src;
|
||||||
}
|
}
|
||||||
|
|
||||||
*res = c;
|
*res = c;
|
||||||
@ -204,7 +212,7 @@ size_t FAST_FUNC mbstowcs(wchar_t *dest, const char *src, size_t n)
|
|||||||
while (n) {
|
while (n) {
|
||||||
wchar_t wc;
|
wchar_t wc;
|
||||||
src = mbstowc_internal(&wc, src);
|
src = mbstowc_internal(&wc, src);
|
||||||
if (src == NULL) /* error */
|
if (wc == ERROR_WCHAR) /* error */
|
||||||
return (size_t) -1L;
|
return (size_t) -1L;
|
||||||
if (dest)
|
if (dest)
|
||||||
*dest++ = wc;
|
*dest++ = wc;
|
||||||
@ -312,20 +320,15 @@ static char* FAST_FUNC unicode_conv_to_printable2(uni_stat_t *stats, const char
|
|||||||
goto subst;
|
goto subst;
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
{
|
src = mbstowc_internal(&wc, src);
|
||||||
const char *src1 = mbstowc_internal(&wc, src);
|
/* src is advanced to next mb char
|
||||||
/* src = NULL: invalid sequence is seen,
|
* wc == ERROR_WCHAR: invalid sequence is seen
|
||||||
* else: wc is set, src is advanced to next mb char
|
* else: wc is set
|
||||||
*/
|
*/
|
||||||
if (src1) { /* no error */
|
if (wc == ERROR_WCHAR) /* error */
|
||||||
|
goto subst;
|
||||||
if (wc == 0) /* end-of-string */
|
if (wc == 0) /* end-of-string */
|
||||||
break;
|
break;
|
||||||
src = src1;
|
|
||||||
} else { /* error */
|
|
||||||
src++;
|
|
||||||
goto subst;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
#endif
|
||||||
if (CONFIG_LAST_SUPPORTED_WCHAR && wc > CONFIG_LAST_SUPPORTED_WCHAR)
|
if (CONFIG_LAST_SUPPORTED_WCHAR && wc > CONFIG_LAST_SUPPORTED_WCHAR)
|
||||||
goto subst;
|
goto subst;
|
||||||
@ -411,7 +414,7 @@ unsigned FAST_FUNC unicode_padding_to_width(unsigned width, const char *src)
|
|||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
src = mbstowc_internal(&wc, src);
|
src = mbstowc_internal(&wc, src);
|
||||||
if (!src || wc == 0) /* error, or end-of-string */
|
if (wc == ERROR_WCHAR || wc == 0) /* error, or end-of-string */
|
||||||
return width;
|
return width;
|
||||||
#endif
|
#endif
|
||||||
w = wcwidth(wc);
|
w = wcwidth(wc);
|
||||||
|
@ -11,9 +11,11 @@ mkdir ls.testdir || exit 1
|
|||||||
|
|
||||||
# testing "test name" "command" "expected result" "file input" "stdin"
|
# testing "test name" "command" "expected result" "file input" "stdin"
|
||||||
|
|
||||||
# The test isn't passing correctly now - all | chars should line up
|
# With Unicode provided by libc locale, I'm not sure this test can pass.
|
||||||
# perfectly in the correctly passed test.
|
# I suspect we might fail to skip exactly correct number of bytes
|
||||||
|
# over broked unicode sequences.
|
||||||
test x"$CONFIG_FEATURE_ASSUME_UNICODE" = x"y" \
|
test x"$CONFIG_FEATURE_ASSUME_UNICODE" = x"y" \
|
||||||
|
&& test x"$CONFIG_LOCALE_SUPPORT" != x"y" \
|
||||||
&& test x"$CONFIG_SUBST_WCHAR" = x"63" \
|
&& test x"$CONFIG_SUBST_WCHAR" = x"63" \
|
||||||
&& test x"$CONFIG_LAST_SUPPORTED_WCHAR" = x"767" \
|
&& test x"$CONFIG_LAST_SUPPORTED_WCHAR" = x"767" \
|
||||||
&& testing "ls unicode test" \
|
&& testing "ls unicode test" \
|
||||||
@ -73,40 +75,40 @@ test x"$CONFIG_FEATURE_ASSUME_UNICODE" = x"y" \
|
|||||||
0053____"?_?_"____________________________________________________________|
|
0053____"?_?_"____________________________________________________________|
|
||||||
0054_3.3__Sequences_with_last_continuation_byte_missing___________________|
|
0054_3.3__Sequences_with_last_continuation_byte_missing___________________|
|
||||||
0055_3.3.1__2-byte_sequence_with_last_byte_missing__U+0000_:_____"?"______|
|
0055_3.3.1__2-byte_sequence_with_last_byte_missing__U+0000_:_____"?"______|
|
||||||
0056_3.3.2__3-byte_sequence_with_last_byte_missing__U+0000_:_____"??"______|
|
0056_3.3.2__3-byte_sequence_with_last_byte_missing__U+0000_:_____"?"______|
|
||||||
0057_3.3.3__4-byte_sequence_with_last_byte_missing__U+0000_:_____"???"______|
|
0057_3.3.3__4-byte_sequence_with_last_byte_missing__U+0000_:_____"?"______|
|
||||||
0058_3.3.4__5-byte_sequence_with_last_byte_missing__U+0000_:_____"????"______|
|
0058_3.3.4__5-byte_sequence_with_last_byte_missing__U+0000_:_____"?"______|
|
||||||
0059_3.3.5__6-byte_sequence_with_last_byte_missing__U+0000_:_____"?????"______|
|
0059_3.3.5__6-byte_sequence_with_last_byte_missing__U+0000_:_____"?"______|
|
||||||
0060_3.3.6__2-byte_sequence_with_last_byte_missing__U-000007FF_:_"?"______|
|
0060_3.3.6__2-byte_sequence_with_last_byte_missing__U-000007FF_:_"?"______|
|
||||||
0061_3.3.7__3-byte_sequence_with_last_byte_missing__U-0000FFFF_:_"??"______|
|
0061_3.3.7__3-byte_sequence_with_last_byte_missing__U-0000FFFF_:_"?"______|
|
||||||
0062_3.3.8__4-byte_sequence_with_last_byte_missing__U-001FFFFF_:_"???"______|
|
0062_3.3.8__4-byte_sequence_with_last_byte_missing__U-001FFFFF_:_"?"______|
|
||||||
0063_3.3.9__5-byte_sequence_with_last_byte_missing__U-03FFFFFF_:_"????"______|
|
0063_3.3.9__5-byte_sequence_with_last_byte_missing__U-03FFFFFF_:_"?"______|
|
||||||
0064_3.3.10_6-byte_sequence_with_last_byte_missing__U-7FFFFFFF_:_"?????"______|
|
0064_3.3.10_6-byte_sequence_with_last_byte_missing__U-7FFFFFFF_:_"?"______|
|
||||||
0065_3.4__Concatenation_of_incomplete_sequences___________________________|
|
0065_3.4__Concatenation_of_incomplete_sequences___________________________|
|
||||||
0066____"??????????????????????????????"______________________________________________________|
|
0066____"??????????"______________________________________________________|
|
||||||
0067_3.5__Impossible_bytes________________________________________________|
|
0067_3.5__Impossible_bytes________________________________________________|
|
||||||
0068_3.5.1__fe_=_"?"______________________________________________________|
|
0068_3.5.1__fe_=_"?"______________________________________________________|
|
||||||
0069_3.5.2__ff_=_"?"______________________________________________________|
|
0069_3.5.2__ff_=_"?"______________________________________________________|
|
||||||
0070_3.5.3__fe_fe_ff_ff_=_"????"__________________________________________|
|
0070_3.5.3__fe_fe_ff_ff_=_"????"__________________________________________|
|
||||||
0071_4__Overlong_sequences________________________________________________|
|
0071_4__Overlong_sequences________________________________________________|
|
||||||
0072_4.1__Examples_of_an_overlong_ASCII_character_________________________|
|
0072_4.1__Examples_of_an_overlong_ASCII_character_________________________|
|
||||||
0073_4.1.1_U+002F_=_c0_af_____________=_"??"_______________________________|
|
0073_4.1.1_U+002F_=_c0_af_____________=_"?"_______________________________|
|
||||||
0074_4.1.2_U+002F_=_e0_80_af__________=_"???"_______________________________|
|
0074_4.1.2_U+002F_=_e0_80_af__________=_"?"_______________________________|
|
||||||
0075_4.1.3_U+002F_=_f0_80_80_af_______=_"????"_______________________________|
|
0075_4.1.3_U+002F_=_f0_80_80_af_______=_"?"_______________________________|
|
||||||
0076_4.1.4_U+002F_=_f8_80_80_80_af____=_"?????"_______________________________|
|
0076_4.1.4_U+002F_=_f8_80_80_80_af____=_"?"_______________________________|
|
||||||
0077_4.1.5_U+002F_=_fc_80_80_80_80_af_=_"??????"_______________________________|
|
0077_4.1.5_U+002F_=_fc_80_80_80_80_af_=_"?"_______________________________|
|
||||||
0078_4.2__Maximum_overlong_sequences______________________________________|
|
0078_4.2__Maximum_overlong_sequences______________________________________|
|
||||||
0079_4.2.1__U-0000007F_=_c1_bf_____________=_"??"__________________________|
|
0079_4.2.1__U-0000007F_=_c1_bf_____________=_"?"__________________________|
|
||||||
0080_4.2.2__U-000007FF_=_e0_9f_bf__________=_"?"__________________________|
|
0080_4.2.2__U-000007FF_=_e0_9f_bf__________=_"?"__________________________|
|
||||||
0081_4.2.3__U-0000FFFF_=_f0_8f_bf_bf_______=_"?"__________________________|
|
0081_4.2.3__U-0000FFFF_=_f0_8f_bf_bf_______=_"?"__________________________|
|
||||||
0082_4.2.4__U-001FFFFF_=_f8_87_bf_bf_bf____=_"?"__________________________|
|
0082_4.2.4__U-001FFFFF_=_f8_87_bf_bf_bf____=_"?"__________________________|
|
||||||
0083_4.2.5__U-03FFFFFF_=_fc_83_bf_bf_bf_bf_=_"?"__________________________|
|
0083_4.2.5__U-03FFFFFF_=_fc_83_bf_bf_bf_bf_=_"?"__________________________|
|
||||||
0084_4.3__Overlong_representation_of_the_NUL_character____________________|
|
0084_4.3__Overlong_representation_of_the_NUL_character____________________|
|
||||||
0085_4.3.1__U+0000_=_c0_80_____________=_"??"______________________________|
|
0085_4.3.1__U+0000_=_c0_80_____________=_"?"______________________________|
|
||||||
0086_4.3.2__U+0000_=_e0_80_80__________=_"???"______________________________|
|
0086_4.3.2__U+0000_=_e0_80_80__________=_"?"______________________________|
|
||||||
0087_4.3.3__U+0000_=_f0_80_80_80_______=_"????"______________________________|
|
0087_4.3.3__U+0000_=_f0_80_80_80_______=_"?"______________________________|
|
||||||
0088_4.3.4__U+0000_=_f8_80_80_80_80____=_"?????"______________________________|
|
0088_4.3.4__U+0000_=_f8_80_80_80_80____=_"?"______________________________|
|
||||||
0089_4.3.5__U+0000_=_fc_80_80_80_80_80_=_"??????"______________________________|
|
0089_4.3.5__U+0000_=_fc_80_80_80_80_80_=_"?"______________________________|
|
||||||
0090_5__Illegal_code_positions____________________________________________|
|
0090_5__Illegal_code_positions____________________________________________|
|
||||||
0091_5.1_Single_UTF-16_surrogates_________________________________________|
|
0091_5.1_Single_UTF-16_surrogates_________________________________________|
|
||||||
0092_5.1.1__U+D800_=_ed_a0_80_=_"?"_______________________________________|
|
0092_5.1.1__U+D800_=_ed_a0_80_=_"?"_______________________________________|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user