From d2b04050c0a9a15e29e15cbf9c487db93d07c46e Mon Sep 17 00:00:00 2001
From: Tomas Heinrich <heinrich.tomas@gmail.com>
Date: Tue, 9 Mar 2010 14:09:24 +0100
Subject: lineedit: invalid unicode characters are replaced with
 CONFIG_SUBST_WCHAR

function                                             old     new   delta
read_key_ungets                                        -      50     +50
lineedit_read_key                                    223     252     +29

Signed-off-by: Tomas Heinrich <heinrich.tomas@gmail.com>
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
---
 libbb/lineedit.c | 26 +++++++++++++++++++++-----
 libbb/read_key.c |  9 +++++++++
 2 files changed, 30 insertions(+), 5 deletions(-)

(limited to 'libbb')

diff --git a/libbb/lineedit.c b/libbb/lineedit.c
index c50b31d67..8e339da53 100644
--- a/libbb/lineedit.c
+++ b/libbb/lineedit.c
@@ -1700,18 +1700,34 @@ static int lineedit_read_key(char *read_key_buffer)
 #endif
 
 #if ENABLE_FEATURE_ASSUME_UNICODE
-		{
+		if (unicode_status == UNICODE_ON) {
 			wchar_t wc;
 
 			if ((int32_t)ic < 0) /* KEYCODE_xxx */
 				return ic;
+			// TODO: imagine sequence like: 0xff, <left-arrow>: we are currently losing 0xff...
+
 			unicode_buf[unicode_idx++] = ic;
 			unicode_buf[unicode_idx] = '\0';
-			if (mbstowcs(&wc, unicode_buf, 1) != 1 && unicode_idx < MB_CUR_MAX) {
-				delay = 50;
-				goto poll_again;
+			if (mbstowcs(&wc, unicode_buf, 1) != 1) {
+				/* Not (yet?) a valid unicode char */
+				if (unicode_idx < MB_CUR_MAX) {
+					delay = 50;
+					goto poll_again;
+				}
+				/* Invalid sequence. Save all "bad bytes" except first */
+				read_key_ungets(read_key_buffer, unicode_buf + 1, MB_CUR_MAX - 1);
+				/*
+				 * ic = unicode_buf[0] sounds even better, but currently
+				 * this does not work: wchar_t[] -> char[] conversion
+				 * when lineedit finishes mangles such "raw bytes"
+				 * (by misinterpreting them as unicode chars):
+				 */
+				ic = CONFIG_SUBST_WCHAR;
+			} else {
+				/* Valid unicode char, return its code */
+				ic = wc;
 			}
-			ic = wc;
 		}
 #endif
 	} while (errno == EAGAIN);
diff --git a/libbb/read_key.c b/libbb/read_key.c
index a2253ce3e..98b3131de 100644
--- a/libbb/read_key.c
+++ b/libbb/read_key.c
@@ -246,3 +246,12 @@ int64_t FAST_FUNC read_key(int fd, char *buffer)
 	buffer[-1] = 0;
 	goto start_over;
 }
+
+void FAST_FUNC read_key_ungets(char *buffer, const char *str, unsigned len)
+{
+	unsigned cur_len = (unsigned char)buffer[0];
+	if (len > KEYCODE_BUFFER_SIZE-1 - cur_len)
+		len = KEYCODE_BUFFER_SIZE-1 - cur_len;
+	memcpy(buffer + 1 + cur_len, str, len);
+	buffer[0] += cur_len + len;
+}
-- 
cgit v1.2.3