aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDenys Vlasenko <vda.linux@googlemail.com>2010-01-31 05:15:38 +0100
committerDenys Vlasenko <vda.linux@googlemail.com>2010-01-31 05:15:38 +0100
commitd8528b8e56bab7643722e4453121882d23c23c07 (patch)
treec742df066326cd571327b10d4cca3341c798d129
parented910c750d7908a31262488e04d38b7bf3d75322 (diff)
downloadbusybox-d8528b8e56bab7643722e4453121882d23c23c07.tar.gz
ls: unicode fixes
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r--TODO_unicode2
-rw-r--r--coreutils/ls.c412
-rw-r--r--include/libbb.h19
-rw-r--r--include/unicode.h5
-rw-r--r--libbb/Kbuild1
-rw-r--r--libbb/printable_string.c65
-rw-r--r--testsuite/ls.mk_uni_tests111
-rwxr-xr-xtestsuite/ls.tests136
8 files changed, 545 insertions, 206 deletions
diff --git a/TODO_unicode b/TODO_unicode
index c29fd933b..b310e8d4d 100644
--- a/TODO_unicode
+++ b/TODO_unicode
@@ -7,7 +7,7 @@ dumpleases
Applets which may need unicode handling (more extensive than sanitizing
of filenames in error messages):
-ls - uses unicode_strlen, not scrlen
+ls - work in progress
expand, unexpand - uses unicode_strlen, not scrlen
ash, hush through lineedit - uses unicode_strlen, not scrlen
top - need to sanitize process args
diff --git a/coreutils/ls.c b/coreutils/ls.c
index 6c898b793..d004ce8b1 100644
--- a/coreutils/ls.c
+++ b/coreutils/ls.c
@@ -241,9 +241,6 @@ struct dnode {
IF_SELINUX(security_context_t sid;)
};
-static struct dnode **list_dir(const char *, unsigned *);
-static unsigned list_single(const struct dnode *);
-
struct globals {
#if ENABLE_FEATURE_LS_COLOR
smallint show_color;
@@ -528,31 +525,236 @@ static void dnsort(struct dnode **dn, int size)
#endif
-static void showfiles(struct dnode **dn, unsigned nfiles)
+static unsigned calc_name_len(const char *name)
+{
+ unsigned len;
+ uni_stat_t uni_stat;
+
+ // TODO: quote tab as \t, etc, if -Q
+ name = printable_string(&uni_stat, name);
+
+ if (!(option_mask32 & OPT_Q)) {
+ return uni_stat.unicode_width;
+ }
+
+ len = 2 + uni_stat.unicode_width;
+ while (*name) {
+ if (*name == '"' || *name == '\\') {
+ len++;
+ }
+ name++;
+ }
+ return len;
+}
+
+
+/* Return the number of used columns.
+ * Note that only STYLE_COLUMNS uses return value.
+ * STYLE_SINGLE and STYLE_LONG don't care.
+ * coreutils 7.2 also supports:
+ * ls -b (--escape) = octal escapes (although it doesn't look like working)
+ * ls -N (--literal) = not escape at all
+ */
+static unsigned print_name(const char *name)
+{
+ unsigned len;
+ uni_stat_t uni_stat;
+
+ // TODO: quote tab as \t, etc, if -Q
+ name = printable_string(&uni_stat, name);
+
+ if (!(option_mask32 & OPT_Q)) {
+ fputs(name, stdout);
+ return uni_stat.unicode_width;
+ }
+
+ len = 2 + uni_stat.unicode_width;
+ putchar('"');
+ while (*name) {
+ if (*name == '"' || *name == '\\') {
+ putchar('\\');
+ len++;
+ }
+ putchar(*name++);
+ }
+ putchar('"');
+ return len;
+}
+
+/* Return the number of used columns.
+ * Note that only STYLE_COLUMNS uses return value,
+ * STYLE_SINGLE and STYLE_LONG don't care.
+ */
+static NOINLINE unsigned list_single(const struct dnode *dn)
{
- unsigned i, ncols, nrows, row, nc;
unsigned column = 0;
- unsigned nexttab = 0;
- unsigned column_width = 0; /* for STYLE_LONG and STYLE_SINGLE not used */
+ char *lpath = lpath; /* for compiler */
+#if ENABLE_FEATURE_LS_FILETYPES || ENABLE_FEATURE_LS_COLOR
+ struct stat info;
+ char append;
+#endif
/* Never happens:
- if (dn == NULL || nfiles < 1)
- return;
+ if (dn->fullname == NULL)
+ return 0;
*/
- if (all_fmt & STYLE_LONG) {
+#if ENABLE_FEATURE_LS_FILETYPES
+ append = append_char(dn->dstat.st_mode);
+#endif
+
+ /* Do readlink early, so that if it fails, error message
+ * does not appear *inside* the "ls -l" line */
+ if (all_fmt & LIST_SYMLINK)
+ if (S_ISLNK(dn->dstat.st_mode))
+ lpath = xmalloc_readlink_or_warn(dn->fullname);
+
+ if (all_fmt & LIST_INO)
+ column += printf("%7llu ", (long long) dn->dstat.st_ino);
+ if (all_fmt & LIST_BLOCKS)
+ column += printf("%4"OFF_FMT"u ", (off_t) (dn->dstat.st_blocks >> 1));
+ if (all_fmt & LIST_MODEBITS)
+ column += printf("%-10s ", (char *) bb_mode_string(dn->dstat.st_mode));
+ if (all_fmt & LIST_NLINKS)
+ column += printf("%4lu ", (long) dn->dstat.st_nlink);
+#if ENABLE_FEATURE_LS_USERNAME
+ if (all_fmt & LIST_ID_NAME) {
+ if (option_mask32 & OPT_g) {
+ column += printf("%-8.8s ",
+ get_cached_username(dn->dstat.st_uid));
+ } else {
+ column += printf("%-8.8s %-8.8s ",
+ get_cached_username(dn->dstat.st_uid),
+ get_cached_groupname(dn->dstat.st_gid));
+ }
+ }
+#endif
+ if (all_fmt & LIST_ID_NUMERIC) {
+ if (option_mask32 & OPT_g)
+ column += printf("%-8u ", (int) dn->dstat.st_uid);
+ else
+ column += printf("%-8u %-8u ",
+ (int) dn->dstat.st_uid,
+ (int) dn->dstat.st_gid);
+ }
+ if (all_fmt & (LIST_SIZE /*|LIST_DEV*/ )) {
+ if (S_ISBLK(dn->dstat.st_mode) || S_ISCHR(dn->dstat.st_mode)) {
+ column += printf("%4u, %3u ",
+ (int) major(dn->dstat.st_rdev),
+ (int) minor(dn->dstat.st_rdev));
+ } else {
+ if (all_fmt & LS_DISP_HR) {
+ column += printf("%"HUMAN_READABLE_MAX_WIDTH_STR"s ",
+ /* print st_size, show one fractional, use suffixes */
+ make_human_readable_str(dn->dstat.st_size, 1, 0)
+ );
+ } else {
+ column += printf("%9"OFF_FMT"u ", (off_t) dn->dstat.st_size);
+ }
+ }
+ }
+#if ENABLE_FEATURE_LS_TIMESTAMPS
+ if (all_fmt & (LIST_FULLTIME|LIST_DATE_TIME)) {
+ char *filetime;
+ time_t ttime = dn->dstat.st_mtime;
+ if (all_fmt & TIME_ACCESS)
+ ttime = dn->dstat.st_atime;
+ if (all_fmt & TIME_CHANGE)
+ ttime = dn->dstat.st_ctime;
+ filetime = ctime(&ttime);
+ /* filetime's format: "Wed Jun 30 21:49:08 1993\n" */
+ if (all_fmt & LIST_FULLTIME)
+ column += printf("%.24s ", filetime);
+ else { /* LIST_DATE_TIME */
+ /* current_time_t ~== time(NULL) */
+ time_t age = current_time_t - ttime;
+ printf("%.6s ", filetime + 4); /* "Jun 30" */
+ if (age < 3600L * 24 * 365 / 2 && age > -15 * 60) {
+ /* hh:mm if less than 6 months old */
+ printf("%.5s ", filetime + 11);
+ } else { /* year. buggy if year > 9999 ;) */
+ printf(" %.4s ", filetime + 20);
+ }
+ column += 13;
+ }
+ }
+#endif
+#if ENABLE_SELINUX
+ if (all_fmt & LIST_CONTEXT) {
+ column += printf("%-32s ", dn->sid ? dn->sid : "unknown");
+ freecon(dn->sid);
+ }
+#endif
+ if (all_fmt & LIST_FILENAME) {
+#if ENABLE_FEATURE_LS_COLOR
+ if (show_color) {
+ info.st_mode = 0; /* for fgcolor() */
+ lstat(dn->fullname, &info);
+ printf("\033[%u;%um", bold(info.st_mode),
+ fgcolor(info.st_mode));
+ }
+#endif
+ column += print_name(dn->name);
+ if (show_color) {
+ printf("\033[0m");
+ }
+ }
+ if (all_fmt & LIST_SYMLINK) {
+ if (S_ISLNK(dn->dstat.st_mode) && lpath) {
+ printf(" -> ");
+#if ENABLE_FEATURE_LS_FILETYPES || ENABLE_FEATURE_LS_COLOR
+#if ENABLE_FEATURE_LS_COLOR
+ info.st_mode = 0; /* for fgcolor() */
+#endif
+ if (stat(dn->fullname, &info) == 0) {
+ append = append_char(info.st_mode);
+ }
+#endif
+#if ENABLE_FEATURE_LS_COLOR
+ if (show_color) {
+ printf("\033[%u;%um", bold(info.st_mode),
+ fgcolor(info.st_mode));
+ }
+#endif
+ column += print_name(lpath) + 4;
+ if (show_color) {
+ printf("\033[0m");
+ }
+ free(lpath);
+ }
+ }
+#if ENABLE_FEATURE_LS_FILETYPES
+ if (all_fmt & LIST_FILETYPE) {
+ if (append) {
+ putchar(append);
+ column++;
+ }
+ }
+#endif
+
+ return column;
+}
+
+static void showfiles(struct dnode **dn, unsigned nfiles)
+{
+ unsigned i, ncols, nrows, row, nc;
+ unsigned column = 0;
+ unsigned nexttab = 0;
+ unsigned column_width = 0; /* used only by STYLE_COLUMNS */
+
+ if (all_fmt & STYLE_LONG) { /* STYLE_LONG or STYLE_SINGLE */
ncols = 1;
} else {
/* find the longest file name, use that as the column width */
for (i = 0; dn[i]; i++) {
- int len = unicode_strlen(dn[i]->name);
+ int len = calc_name_len(dn[i]->name);
if (column_width < len)
column_width = len;
}
column_width += tabstops +
IF_SELINUX( ((all_fmt & LIST_CONTEXT) ? 33 : 0) + )
- ((all_fmt & LIST_INO) ? 8 : 0) +
- ((all_fmt & LIST_BLOCKS) ? 5 : 0);
+ ((all_fmt & LIST_INO) ? 8 : 0) +
+ ((all_fmt & LIST_BLOCKS) ? 5 : 0);
ncols = (int) (terminal_width / column_width);
}
@@ -618,6 +820,8 @@ static off_t calculate_blocks(struct dnode **dn)
#endif
+static struct dnode **list_dir(const char *, unsigned *);
+
static void showdirs(struct dnode **dn, int first)
{
unsigned nfiles;
@@ -733,188 +937,6 @@ static struct dnode **list_dir(const char *path, unsigned *nfiles_p)
}
-static int print_name(const char *name)
-{
- if (option_mask32 & OPT_Q) {
-#if ENABLE_FEATURE_ASSUME_UNICODE
- unsigned len = 2 + unicode_strlen(name);
-#else
- unsigned len = 2;
-#endif
- putchar('"');
- while (*name) {
- if (*name == '"') {
- putchar('\\');
- len++;
- }
- putchar(*name++);
- if (!ENABLE_FEATURE_ASSUME_UNICODE)
- len++;
- }
- putchar('"');
- return len;
- }
- /* No -Q: */
-#if ENABLE_FEATURE_ASSUME_UNICODE
- fputs(name, stdout);
- return unicode_strlen(name);
-#else
- return printf("%s", name);
-#endif
-}
-
-
-static NOINLINE unsigned list_single(const struct dnode *dn)
-{
- unsigned column = 0;
- char *lpath = lpath; /* for compiler */
-#if ENABLE_FEATURE_LS_FILETYPES || ENABLE_FEATURE_LS_COLOR
- struct stat info;
- char append;
-#endif
-
- /* Never happens:
- if (dn->fullname == NULL)
- return 0;
- */
-
-#if ENABLE_FEATURE_LS_FILETYPES
- append = append_char(dn->dstat.st_mode);
-#endif
-
- /* Do readlink early, so that if it fails, error message
- * does not appear *inside* the "ls -l" line */
- if (all_fmt & LIST_SYMLINK)
- if (S_ISLNK(dn->dstat.st_mode))
- lpath = xmalloc_readlink_or_warn(dn->fullname);
-
- if (all_fmt & LIST_INO)
- column += printf("%7llu ", (long long) dn->dstat.st_ino);
- if (all_fmt & LIST_BLOCKS)
- column += printf("%4"OFF_FMT"u ", (off_t) (dn->dstat.st_blocks >> 1));
- if (all_fmt & LIST_MODEBITS)
- column += printf("%-10s ", (char *) bb_mode_string(dn->dstat.st_mode));
- if (all_fmt & LIST_NLINKS)
- column += printf("%4lu ", (long) dn->dstat.st_nlink);
-#if ENABLE_FEATURE_LS_USERNAME
- if (all_fmt & LIST_ID_NAME) {
- if (option_mask32 & OPT_g) {
- column += printf("%-8.8s ",
- get_cached_username(dn->dstat.st_uid));
- } else {
- column += printf("%-8.8s %-8.8s ",
- get_cached_username(dn->dstat.st_uid),
- get_cached_groupname(dn->dstat.st_gid));
- }
- }
-#endif
- if (all_fmt & LIST_ID_NUMERIC) {
- if (option_mask32 & OPT_g)
- column += printf("%-8u ", (int) dn->dstat.st_uid);
- else
- column += printf("%-8u %-8u ",
- (int) dn->dstat.st_uid,
- (int) dn->dstat.st_gid);
- }
- if (all_fmt & (LIST_SIZE /*|LIST_DEV*/ )) {
- if (S_ISBLK(dn->dstat.st_mode) || S_ISCHR(dn->dstat.st_mode)) {
- column += printf("%4u, %3u ",
- (int) major(dn->dstat.st_rdev),
- (int) minor(dn->dstat.st_rdev));
- } else {
- if (all_fmt & LS_DISP_HR) {
- column += printf("%"HUMAN_READABLE_MAX_WIDTH_STR"s ",
- /* print st_size, show one fractional, use suffixes */
- make_human_readable_str(dn->dstat.st_size, 1, 0)
- );
- } else {
- column += printf("%9"OFF_FMT"u ", (off_t) dn->dstat.st_size);
- }
- }
- }
-#if ENABLE_FEATURE_LS_TIMESTAMPS
- if (all_fmt & (LIST_FULLTIME|LIST_DATE_TIME)) {
- char *filetime;
- time_t ttime = dn->dstat.st_mtime;
- if (all_fmt & TIME_ACCESS)
- ttime = dn->dstat.st_atime;
- if (all_fmt & TIME_CHANGE)
- ttime = dn->dstat.st_ctime;
- filetime = ctime(&ttime);
- /* filetime's format: "Wed Jun 30 21:49:08 1993\n" */
- if (all_fmt & LIST_FULLTIME)
- column += printf("%.24s ", filetime);
- else { /* LIST_DATE_TIME */
- /* current_time_t ~== time(NULL) */
- time_t age = current_time_t - ttime;
- printf("%.6s ", filetime + 4); /* "Jun 30" */
- if (age < 3600L * 24 * 365 / 2 && age > -15 * 60) {
- /* hh:mm if less than 6 months old */
- printf("%.5s ", filetime + 11);
- } else { /* year. buggy if year > 9999 ;) */
- printf(" %.4s ", filetime + 20);
- }
- column += 13;
- }
- }
-#endif
-#if ENABLE_SELINUX
- if (all_fmt & LIST_CONTEXT) {
- column += printf("%-32s ", dn->sid ? dn->sid : "unknown");
- freecon(dn->sid);
- }
-#endif
- if (all_fmt & LIST_FILENAME) {
-#if ENABLE_FEATURE_LS_COLOR
- if (show_color) {
- info.st_mode = 0; /* for fgcolor() */
- lstat(dn->fullname, &info);
- printf("\033[%u;%um", bold(info.st_mode),
- fgcolor(info.st_mode));
- }
-#endif
- column += print_name(dn->name);
- if (show_color) {
- printf("\033[0m");
- }
- }
- if (all_fmt & LIST_SYMLINK) {
- if (S_ISLNK(dn->dstat.st_mode) && lpath) {
- printf(" -> ");
-#if ENABLE_FEATURE_LS_FILETYPES || ENABLE_FEATURE_LS_COLOR
-#if ENABLE_FEATURE_LS_COLOR
- info.st_mode = 0; /* for fgcolor() */
-#endif
- if (stat(dn->fullname, &info) == 0) {
- append = append_char(info.st_mode);
- }
-#endif
-#if ENABLE_FEATURE_LS_COLOR
- if (show_color) {
- printf("\033[%u;%um", bold(info.st_mode),
- fgcolor(info.st_mode));
- }
-#endif
- column += print_name(lpath) + 4;
- if (show_color) {
- printf("\033[0m");
- }
- free(lpath);
- }
- }
-#if ENABLE_FEATURE_LS_FILETYPES
- if (all_fmt & LIST_FILETYPE) {
- if (append) {
- putchar(append);
- column++;
- }
- }
-#endif
-
- return column;
-}
-
-
int ls_main(int argc UNUSED_PARAM, char **argv)
{
struct dnode **dnd;
diff --git a/include/libbb.h b/include/libbb.h
index 73aea409e..a86d64400 100644
--- a/include/libbb.h
+++ b/include/libbb.h
@@ -577,11 +577,6 @@ char *strncpy_IFNAMSIZ(char *dst, const char *src) FAST_FUNC;
* But potentially slow, don't use in one-billion-times loops */
int bb_putchar(int ch) FAST_FUNC;
char *xasprintf(const char *format, ...) __attribute__ ((format(printf, 1, 2))) FAST_FUNC RETURNS_MALLOC;
-/* Prints unprintable chars ch as ^C or M-c to file
- * (M-c is used only if ch is ORed with PRINTABLE_META),
- * else it is printed as-is (except for ch = 0x9b) */
-enum { PRINTABLE_META = 0x100 };
-void fputc_printable(int ch, FILE *file) FAST_FUNC;
// gcc-4.1.1 still isn't good enough at optimizing it
// (+200 bytes compared to macro)
//static ALWAYS_INLINE
@@ -594,6 +589,20 @@ void fputc_printable(int ch, FILE *file) FAST_FUNC;
#define NOT_LONE_CHAR(s,c) ((s)[0] != (c) || (s)[1])
#define DOT_OR_DOTDOT(s) ((s)[0] == '.' && (!(s)[1] || ((s)[1] == '.' && !(s)[2])))
+typedef struct uni_stat_t {
+ unsigned byte_count;
+ unsigned unicode_count;
+ unsigned unicode_width;
+} uni_stat_t;
+/* Returns a string with unprintable chars replaced by '?' or
+ * SUBST_WCHAR. This function is unicode-aware. */
+const char* FAST_FUNC printable_string(uni_stat_t *stats, const char *str);
+/* Prints unprintable char ch as ^C or M-c to file
+ * (M-c is used only if ch is ORed with PRINTABLE_META),
+ * else it is printed as-is (except for ch = 0x9b) */
+enum { PRINTABLE_META = 0x100 };
+void fputc_printable(int ch, FILE *file) FAST_FUNC;
+
/* dmalloc will redefine these to it's own implementation. It is safe
* to have the prototypes here unconditionally. */
void *malloc_or_warn(size_t size) FAST_FUNC RETURNS_MALLOC;
diff --git a/include/unicode.h b/include/unicode.h
index f32e56599..25ef7407e 100644
--- a/include/unicode.h
+++ b/include/unicode.h
@@ -23,11 +23,6 @@ size_t FAST_FUNC unicode_strlen(const char *string);
enum {
UNI_FLAG_PAD = (1 << 0),
};
-typedef struct uni_stat_t {
- unsigned byte_count;
- unsigned unicode_count;
- unsigned unicode_width;
-} uni_stat_t;
//UNUSED: unsigned FAST_FUNC unicode_padding_to_width(unsigned width, const char *src);
//UNUSED: char* FAST_FUNC unicode_conv_to_printable2(uni_stat_t *stats, const char *src, unsigned width, int flags);
char* FAST_FUNC unicode_conv_to_printable(uni_stat_t *stats, const char *src);
diff --git a/libbb/Kbuild b/libbb/Kbuild
index 243626d67..7e793109e 100644
--- a/libbb/Kbuild
+++ b/libbb/Kbuild
@@ -73,6 +73,7 @@ lib-y += perror_nomsg_and_die.o
lib-y += pidfile.o
lib-y += platform.o
lib-y += printable.o
+lib-y += printable_string.o
lib-y += print_flags.o
lib-y += process_escape_sequence.o
lib-y += procps.o
diff --git a/libbb/printable_string.c b/libbb/printable_string.c
new file mode 100644
index 000000000..47565de0d
--- /dev/null
+++ b/libbb/printable_string.c
@@ -0,0 +1,65 @@
+/* vi: set sw=4 ts=4: */
+/*
+ * Unicode support routines.
+ *
+ * Copyright (C) 2010 Denys Vlasenko
+ *
+ * Licensed under GPL version 2, see file LICENSE in this tarball for details.
+ */
+#include "libbb.h"
+#include "unicode.h"
+
+const char* FAST_FUNC printable_string(uni_stat_t *stats, const char *str)
+{
+ static char *saved[4];
+ static unsigned cur_saved; /* = 0 */
+
+ char *dst;
+ const char *s;
+
+ s = str;
+ while (1) {
+ unsigned char c = *s;
+ if (c == '\0') {
+ /* 99+% of inputs do not need conversion */
+ if (stats) {
+ stats->byte_count = (s - str);
+ stats->unicode_count = (s - str);
+ stats->unicode_width = (s - str);
+ }
+ return str;
+ }
+ if (c < ' ')
+ break;
+ if (c >= 0x7f)
+ break;
+ s++;
+ }
+
+#if ENABLE_FEATURE_ASSUME_UNICODE
+ dst = unicode_conv_to_printable(stats, str);
+#else
+ {
+ char *d = dst = xstrdup(str);
+ while (1) {
+ unsigned char c = *d;
+ if (c == '\0')
+ break;
+ if (c < ' ' || c >= 0x7f)
+ *d = '?';
+ d++;
+ }
+ if (stats) {
+ stats->byte_count = (d - dst);
+ stats->unicode_count = (d - dst);
+ stats->unicode_width = (d - dst);
+ }
+ }
+#endif
+
+ free(saved[cur_saved]);
+ saved[cur_saved] = dst;
+ cur_saved = (cur_saved + 1) & (ARRAY_SIZE(saved)-1);
+
+ return dst;
+}
diff --git a/testsuite/ls.mk_uni_tests b/testsuite/ls.mk_uni_tests
new file mode 100644
index 000000000..da0c29f29
--- /dev/null
+++ b/testsuite/ls.mk_uni_tests
@@ -0,0 +1,111 @@
+# DO NOT EDIT THIS FILE! MOST TEXT EDITORS WILL DAMAGE IT!
+>'0001_1__Some_correct_UTF-8_text___________________________________________|'
+>'0002_2__Boundary_condition_test_cases_____________________________________|'
+>'0003_2.1__First_possible_sequence_of_a_certain_length_____________________|'
+>'0004_2.1.2__2_bytes__U-00000080_:________"€"______________________________|'
+>'0005_2.1.3__3_bytes__U-00000800_:________"ࠀ"______________________________|'
+>'0006_2.1.4__4_bytes__U-00010000_:________"𐀀"______________________________|'
+>'0007_2.1.5__5_bytes__U-00200000_:________""______________________________|'
+>'0008_2.1.6__6_bytes__U-04000000_:________""______________________________|'
+>'0009_2.2__Last_possible_sequence_of_a_certain_length______________________|'
+>'0010_2.2.1__1_byte___U-0000007F_:________""______________________________|'
+>'0011_2.2.2__2_bytes__U-000007FF_:________"߿"______________________________|'
+>'0012_2.2.3__3_bytes__U-0000FFFF_:________"￿"______________________________|'
+>'0013_2.2.4__4_bytes__U-001FFFFF_:________""______________________________|'
+>'0014_2.2.5__5_bytes__U-03FFFFFF_:________""______________________________|'
+>'0015_2.2.6__6_bytes__U-7FFFFFFF_:________""______________________________|'
+>'0016_2.3__Other_boundary_conditions_______________________________________|'
+>'0017_2.3.1__U-0000D7FF_=_ed_9f_bf_=_"퟿"___________________________________|'
+>'0018_2.3.2__U-0000E000_=_ee_80_80_=_""___________________________________|'
+>'0019_2.3.3__U-0000FFFD_=_ef_bf_bd_=_"�"___________________________________|'
+>'0020_2.3.4__U-0010FFFF_=_f4_8f_bf_bf_=_"􏿿"________________________________|'
+>'0021_2.3.5__U-00110000_=_f4_90_80_80_=_""________________________________|'
+>'0022_3__Malformed_sequences_______________________________________________|'
+>'0023_3.1__Unexpected_continuation_bytes___________________________________|'
+>'0024_3.1.1__First_continuation_byte_0x80:_""_____________________________|'
+>'0025_3.1.2__Last__continuation_byte_0xbf:_""_____________________________|'
+>'0026_3.1.3__2_continuation_bytes:_""____________________________________|'
+>'0027_3.1.4__3_continuation_bytes:_""___________________________________|'
+>'0028_3.1.5__4_continuation_bytes:_""__________________________________|'
+>'0029_3.1.6__5_continuation_bytes:_""_________________________________|'
+>'0030_3.1.7__6_continuation_bytes:_""________________________________|'
+>'0031_3.1.8__7_continuation_bytes:_""_______________________________|'
+>'0032_3.1.9__Sequence_of_all_64_possible_continuation_bytes__0x80-0xbf_:___|'
+>'0033____"_________________________________________________|'
+>'0034______________________________________________________|'
+>'0035______________________________________________________|'
+>'0036_____"________________________________________________|'
+>'0037_3.2__Lonely_start_characters_________________________________________|'
+>'0038_3.2.1__All_32_first_bytes_of_2-byte_sequences__0xc0-0xdf_,___________|'
+>'0039________each_followed_by_a_space_character:___________________________|'
+>'0040____"_________________________________________________|'
+>'0041_____________________"________________________________|'
+>'0042_3.2.2__All_16_first_bytes_of_3-byte_sequences__0xe0-0xef_,___________|'
+>'0043________each_followed_by_a_space_character:___________________________|'
+>'0044____"________________"________________________________|'
+>'0045_3.2.3__All_8_first_bytes_of_4-byte_sequences__0xf0-0xf7_,____________|'
+>'0046________each_followed_by_a_space_character:___________________________|'
+>'0047____"________"________________________________________________|'
+>'0048_3.2.4__All_4_first_bytes_of_5-byte_sequences__0xf8-0xfb_,____________|'
+>'0049________each_followed_by_a_space_character:___________________________|'
+>'0050____"____"________________________________________________________|'
+>'0051_3.2.5__All_2_first_bytes_of_6-byte_sequences__0xfc-0xfd_,____________|'
+>'0052________each_followed_by_a_space_character:___________________________|'
+>'0053____"__"____________________________________________________________|'
+>'0054_3.3__Sequences_with_last_continuation_byte_missing___________________|'
+>'0055_3.3.1__2-byte_sequence_with_last_byte_missing__U+0000_:_____""______|'
+>'0056_3.3.2__3-byte_sequence_with_last_byte_missing__U+0000_:_____""______|'
+>'0057_3.3.3__4-byte_sequence_with_last_byte_missing__U+0000_:_____""______|'
+>'0058_3.3.4__5-byte_sequence_with_last_byte_missing__U+0000_:_____""______|'
+>'0059_3.3.5__6-byte_sequence_with_last_byte_missing__U+0000_:_____""______|'
+>'0060_3.3.6__2-byte_sequence_with_last_byte_missing__U-000007FF_:_""______|'
+>'0061_3.3.7__3-byte_sequence_with_last_byte_missing__U-0000FFFF_:_""______|'
+>'0062_3.3.8__4-byte_sequence_with_last_byte_missing__U-001FFFFF_:_""______|'
+>'0063_3.3.9__5-byte_sequence_with_last_byte_missing__U-03FFFFFF_:_""______|'
+>'0064_3.3.10_6-byte_sequence_with_last_byte_missing__U-7FFFFFFF_:_""______|'
+>'0065_3.4__Concatenation_of_incomplete_sequences___________________________|'
+>'0066____""______________________________________________________|'
+>'0067_3.5__Impossible_bytes________________________________________________|'
+>'0068_3.5.1__fe_=_""______________________________________________________|'
+>'0069_3.5.2__ff_=_""______________________________________________________|'
+>'0070_3.5.3__fe_fe_ff_ff_=_""__________________________________________|'
+>'0071_4__Overlong_sequences________________________________________________|'
+>'0072_4.1__Examples_of_an_overlong_ASCII_character_________________________|'
+>'0073_4.1.1_U+002F_=_c0_af_____________=_""_______________________________|'
+>'0074_4.1.2_U+002F_=_e0_80_af__________=_""_______________________________|'
+>'0075_4.1.3_U+002F_=_f0_80_80_af_______=_""_______________________________|'
+>'0076_4.1.4_U+002F_=_f8_80_80_80_af____=_""_______________________________|'
+>'0077_4.1.5_U+002F_=_fc_80_80_80_80_af_=_""_______________________________|'
+>'0078_4.2__Maximum_overlong_sequences______________________________________|'
+>'0079_4.2.1__U-0000007F_=_c1_bf_____________=_""__________________________|'
+>'0080_4.2.2__U-000007FF_=_e0_9f_bf__________=_""__________________________|'
+>'0081_4.2.3__U-0000FFFF_=_f0_8f_bf_bf_______=_""__________________________|'
+>'0082_4.2.4__U-001FFFFF_=_f8_87_bf_bf_bf____=_""__________________________|'
+>'0083_4.2.5__U-03FFFFFF_=_fc_83_bf_bf_bf_bf_=_""__________________________|'
+>'0084_4.3__Overlong_representation_of_the_NUL_character____________________|'
+>'0085_4.3.1__U+0000_=_c0_80_____________=_""______________________________|'
+>'0086_4.3.2__U+0000_=_e0_80_80__________=_""______________________________|'
+>'0087_4.3.3__U+0000_=_f0_80_80_80_______=_""______________________________|'
+>'0088_4.3.4__U+0000_=_f8_80_80_80_80____=_""______________________________|'
+>'0089_4.3.5__U+0000_=_fc_80_80_80_80_80_=_""______________________________|'
+>'0090_5__Illegal_code_positions____________________________________________|'
+>'0091_5.1_Single_UTF-16_surrogates_________________________________________|'
+>'0092_5.1.1__U+D800_=_ed_a0_80_=_""_______________________________________|'
+>'0093_5.1.2__U+DB7F_=_ed_ad_bf_=_""_______________________________________|'
+>'0094_5.1.3__U+DB80_=_ed_ae_80_=_""_______________________________________|'
+>'0095_5.1.4__U+DBFF_=_ed_af_bf_=_""_______________________________________|'
+>'0096_5.1.5__U+DC00_=_ed_b0_80_=_""_______________________________________|'
+>'0097_5.1.6__U+DF80_=_ed_be_80_=_""_______________________________________|'
+>'0098_5.1.7__U+DFFF_=_ed_bf_bf_=_""_______________________________________|'
+>'0099_5.2_Paired_UTF-16_surrogates_________________________________________|'
+>'0100_5.2.1__U+D800_U+DC00_=_ed_a0_80_ed_b0_80_=_""______________________|'
+>'0101_5.2.2__U+D800_U+DFFF_=_ed_a0_80_ed_bf_bf_=_""______________________|'
+>'0102_5.2.3__U+DB7F_U+DC00_=_ed_ad_bf_ed_b0_80_=_""______________________|'
+>'0103_5.2.4__U+DB7F_U+DFFF_=_ed_ad_bf_ed_bf_bf_=_""______________________|'
+>'0104_5.2.5__U+DB80_U+DC00_=_ed_ae_80_ed_b0_80_=_""______________________|'
+>'0105_5.2.6__U+DB80_U+DFFF_=_ed_ae_80_ed_bf_bf_=_""______________________|'
+>'0106_5.2.7__U+DBFF_U+DC00_=_ed_af_bf_ed_b0_80_=_""______________________|'
+>'0107_5.2.8__U+DBFF_U+DFFF_=_ed_af_bf_ed_bf_bf_=_""______________________|'
+>'0108_5.3_Other_illegal_code_positions_____________________________________|'
+>'0109_5.3.1__U+FFFE_=_ef_bf_be_=_"￾"_______________________________________|'
+>'0110_5.3.2__U+FFFF_=_ef_bf_bf_=_"￿"_______________________________________|'
diff --git a/testsuite/ls.tests b/testsuite/ls.tests
new file mode 100755
index 000000000..b0c5da7f9
--- /dev/null
+++ b/testsuite/ls.tests
@@ -0,0 +1,136 @@
+#!/bin/sh
+# Copyright 2010 by Denys Vlasenko
+# Licensed under GPL v2, see file LICENSE for details.
+
+. ./testing.sh
+
+test -f "$bindir/.config" && . "$bindir/.config"
+
+rm -rf ls.testdir >/dev/null
+mkdir ls.testdir || exit 1
+
+# testing "test name" "command" "expected result" "file input" "stdin"
+
+# The test isn't passing correctly now - all | chars should line up
+# perfectly in the correctly passed test.
+test x"$CONFIG_FEATURE_ASSUME_UNICODE" = x"y" \
+&& test x"$CONFIG_SUBST_WCHAR" = x"63" \
+&& test x"$CONFIG_LAST_SUPPORTED_WCHAR" = x"767" \
+&& testing "ls unicode test" \
+"(cd ls.testdir && sh ../ls.mk_uni_tests) && ls -1 ls.testdir" \
+'0001_1__Some_correct_UTF-8_text___________________________________________|
+0002_2__Boundary_condition_test_cases_____________________________________|
+0003_2.1__First_possible_sequence_of_a_certain_length_____________________|
+0004_2.1.2__2_bytes__U-00000080_:________"?"______________________________|
+0005_2.1.3__3_bytes__U-00000800_:________"?"______________________________|
+0006_2.1.4__4_bytes__U-00010000_:________"?"______________________________|
+0007_2.1.5__5_bytes__U-00200000_:________"?"______________________________|
+0008_2.1.6__6_bytes__U-04000000_:________"?"______________________________|
+0009_2.2__Last_possible_sequence_of_a_certain_length______________________|
+0010_2.2.1__1_byte___U-0000007F_:________"?"______________________________|
+0011_2.2.2__2_bytes__U-000007FF_:________"?"______________________________|
+0012_2.2.3__3_bytes__U-0000FFFF_:________"?"______________________________|
+0013_2.2.4__4_bytes__U-001FFFFF_:________"?"______________________________|
+0014_2.2.5__5_bytes__U-03FFFFFF_:________"?"______________________________|
+0015_2.2.6__6_bytes__U-7FFFFFFF_:________"?"______________________________|
+0016_2.3__Other_boundary_conditions_______________________________________|
+0017_2.3.1__U-0000D7FF_=_ed_9f_bf_=_"?"___________________________________|
+0018_2.3.2__U-0000E000_=_ee_80_80_=_"?"___________________________________|
+0019_2.3.3__U-0000FFFD_=_ef_bf_bd_=_"?"___________________________________|
+0020_2.3.4__U-0010FFFF_=_f4_8f_bf_bf_=_"?"________________________________|
+0021_2.3.5__U-00110000_=_f4_90_80_80_=_"?"________________________________|
+0022_3__Malformed_sequences_______________________________________________|
+0023_3.1__Unexpected_continuation_bytes___________________________________|
+0024_3.1.1__First_continuation_byte_0x80:_"?"_____________________________|
+0025_3.1.2__Last__continuation_byte_0xbf:_"?"_____________________________|
+0026_3.1.3__2_continuation_bytes:_"??"____________________________________|
+0027_3.1.4__3_continuation_bytes:_"???"___________________________________|
+0028_3.1.5__4_continuation_bytes:_"????"__________________________________|
+0029_3.1.6__5_continuation_bytes:_"?????"_________________________________|
+0030_3.1.7__6_continuation_bytes:_"??????"________________________________|
+0031_3.1.8__7_continuation_bytes:_"???????"_______________________________|
+0032_3.1.9__Sequence_of_all_64_possible_continuation_bytes__0x80-0xbf_:___|
+0033____"????????????????_________________________________________________|
+0034_____????????????????_________________________________________________|
+0035_____????????????????_________________________________________________|
+0036_____????????????????"________________________________________________|
+0037_3.2__Lonely_start_characters_________________________________________|
+0038_3.2.1__All_32_first_bytes_of_2-byte_sequences__0xc0-0xdf_,___________|
+0039________each_followed_by_a_space_character:___________________________|
+0040____"?_?_?_?_?_?_?_?_?_?_?_?_?_?_?_?__________________________________|
+0041_____?_?_?_?_?_?_?_?_?_?_?_?_?_?_?_?_"________________________________|
+0042_3.2.2__All_16_first_bytes_of_3-byte_sequences__0xe0-0xef_,___________|
+0043________each_followed_by_a_space_character:___________________________|
+0044____"?_?_?_?_?_?_?_?_?_?_?_?_?_?_?_?_"________________________________|
+0045_3.2.3__All_8_first_bytes_of_4-byte_sequences__0xf0-0xf7_,____________|
+0046________each_followed_by_a_space_character:___________________________|
+0047____"?_?_?_?_?_?_?_?_"________________________________________________|
+0048_3.2.4__All_4_first_bytes_of_5-byte_sequences__0xf8-0xfb_,____________|
+0049________each_followed_by_a_space_character:___________________________|
+0050____"?_?_?_?_"________________________________________________________|
+0051_3.2.5__All_2_first_bytes_of_6-byte_sequences__0xfc-0xfd_,____________|
+0052________each_followed_by_a_space_character:___________________________|
+0053____"?_?_"____________________________________________________________|
+0054_3.3__Sequences_with_last_continuation_byte_missing___________________|
+0055_3.3.1__2-byte_sequence_with_last_byte_missing__U+0000_:_____"?"______|
+0056_3.3.2__3-byte_sequence_with_last_byte_missing__U+0000_:_____"??"______|
+0057_3.3.3__4-byte_sequence_with_last_byte_missing__U+0000_:_____"???"______|
+0058_3.3.4__5-byte_sequence_with_last_byte_missing__U+0000_:_____"????"______|
+0059_3.3.5__6-byte_sequence_with_last_byte_missing__U+0000_:_____"?????"______|
+0060_3.3.6__2-byte_sequence_with_last_byte_missing__U-000007FF_:_"?"______|
+0061_3.3.7__3-byte_sequence_with_last_byte_missing__U-0000FFFF_:_"??"______|
+0062_3.3.8__4-byte_sequence_with_last_byte_missing__U-001FFFFF_:_"???"______|
+0063_3.3.9__5-byte_sequence_with_last_byte_missing__U-03FFFFFF_:_"????"______|
+0064_3.3.10_6-byte_sequence_with_last_byte_missing__U-7FFFFFFF_:_"?????"______|
+0065_3.4__Concatenation_of_incomplete_sequences___________________________|
+0066____"??????????????????????????????"______________________________________________________|
+0067_3.5__Impossible_bytes________________________________________________|
+0068_3.5.1__fe_=_"?"______________________________________________________|
+0069_3.5.2__ff_=_"?"______________________________________________________|
+0070_3.5.3__fe_fe_ff_ff_=_"????"__________________________________________|
+0071_4__Overlong_sequences________________________________________________|
+0072_4.1__Examples_of_an_overlong_ASCII_character_________________________|
+0073_4.1.1_U+002F_=_c0_af_____________=_"??"_______________________________|
+0074_4.1.2_U+002F_=_e0_80_af__________=_"???"_______________________________|
+0075_4.1.3_U+002F_=_f0_80_80_af_______=_"????"_______________________________|
+0076_4.1.4_U+002F_=_f8_80_80_80_af____=_"?????"_______________________________|
+0077_4.1.5_U+002F_=_fc_80_80_80_80_af_=_"??????"_______________________________|
+0078_4.2__Maximum_overlong_sequences______________________________________|
+0079_4.2.1__U-0000007F_=_c1_bf_____________=_"??"__________________________|
+0080_4.2.2__U-000007FF_=_e0_9f_bf__________=_"?"__________________________|
+0081_4.2.3__U-0000FFFF_=_f0_8f_bf_bf_______=_"?"__________________________|
+0082_4.2.4__U-001FFFFF_=_f8_87_bf_bf_bf____=_"?"__________________________|
+0083_4.2.5__U-03FFFFFF_=_fc_83_bf_bf_bf_bf_=_"?"__________________________|
+0084_4.3__Overlong_representation_of_the_NUL_character____________________|
+0085_4.3.1__U+0000_=_c0_80_____________=_"??"______________________________|
+0086_4.3.2__U+0000_=_e0_80_80__________=_"???"______________________________|
+0087_4.3.3__U+0000_=_f0_80_80_80_______=_"????"______________________________|
+0088_4.3.4__U+0000_=_f8_80_80_80_80____=_"?????"______________________________|
+0089_4.3.5__U+0000_=_fc_80_80_80_80_80_=_"??????"______________________________|
+0090_5__Illegal_code_positions____________________________________________|
+0091_5.1_Single_UTF-16_surrogates_________________________________________|
+0092_5.1.1__U+D800_=_ed_a0_80_=_"?"_______________________________________|
+0093_5.1.2__U+DB7F_=_ed_ad_bf_=_"?"_______________________________________|
+0094_5.1.3__U+DB80_=_ed_ae_80_=_"?"_______________________________________|
+0095_5.1.4__U+DBFF_=_ed_af_bf_=_"?"_______________________________________|
+0096_5.1.5__U+DC00_=_ed_b0_80_=_"?"_______________________________________|
+0097_5.1.6__U+DF80_=_ed_be_80_=_"?"_______________________________________|
+0098_5.1.7__U+DFFF_=_ed_bf_bf_=_"?"_______________________________________|
+0099_5.2_Paired_UTF-16_surrogates_________________________________________|
+0100_5.2.1__U+D800_U+DC00_=_ed_a0_80_ed_b0_80_=_"??"______________________|
+0101_5.2.2__U+D800_U+DFFF_=_ed_a0_80_ed_bf_bf_=_"??"______________________|
+0102_5.2.3__U+DB7F_U+DC00_=_ed_ad_bf_ed_b0_80_=_"??"______________________|
+0103_5.2.4__U+DB7F_U+DFFF_=_ed_ad_bf_ed_bf_bf_=_"??"______________________|
+0104_5.2.5__U+DB80_U+DC00_=_ed_ae_80_ed_b0_80_=_"??"______________________|
+0105_5.2.6__U+DB80_U+DFFF_=_ed_ae_80_ed_bf_bf_=_"??"______________________|
+0106_5.2.7__U+DBFF_U+DC00_=_ed_af_bf_ed_b0_80_=_"??"______________________|
+0107_5.2.8__U+DBFF_U+DFFF_=_ed_af_bf_ed_bf_bf_=_"??"______________________|
+0108_5.3_Other_illegal_code_positions_____________________________________|
+0109_5.3.1__U+FFFE_=_ef_bf_be_=_"?"_______________________________________|
+0110_5.3.2__U+FFFF_=_ef_bf_bf_=_"?"_______________________________________|
+' "" ""
+
+# Clean up
+rm -rf ls.testdir 2>/dev/null
+
+exit $FAILCOUNT