unicode: exclude FDD0..FDEF range too

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
author: Denys Vlasenko <vda.linux@googlemail.com> 2010-01-31 16:34:37 +0100
committer: Denys Vlasenko <vda.linux@googlemail.com> 2010-01-31 16:34:37 +0100
commit: b1edf20f1848cd741e8a8395afb4a4655a210906 (patch)
tree: ff6f99354d507ae1bb3bcf29ca99e1626cad0733 /libbb/unicode_wcwidth.c
parent: 40e4e88a28398c49d326b0fdf0d7f100f08b8f8d (diff)
download: busybox-b1edf20f1848cd741e8a8395afb4a4655a210906.tar.gz
1 files changed, 48 insertions, 48 deletions
diff --git a/libbb/unicode_wcwidth.c b/libbb/unicode_wcwidth.c
index 410c741ac..c7cc524a6 100644
--- a/libbb/unicode_wcwidth.c
+++ b/libbb/unicode_wcwidth.c
@@ -90,13 +90,13 @@
  * until Unicode committee assigns something there.
  */
 
-#if CONFIG_LAST_SUPPORTED_WCHAR < 126 || CONFIG_LAST_SUPPORTED_WCHAR > 0x30000
-# define LAST_SUPPORTED_WCHAR 0x30000
+#if CONFIG_LAST_SUPPORTED_WCHAR < 126 || CONFIG_LAST_SUPPORTED_WCHAR >= 0x30000
+# define LAST_SUPPORTED_WCHAR 0x2ffff
 #else
 # define LAST_SUPPORTED_WCHAR CONFIG_LAST_SUPPORTED_WCHAR
 #endif
 
-#if LAST_SUPPORTED_WCHAR >= 0x0300
+#if LAST_SUPPORTED_WCHAR >= 0x300
 struct interval {
 	uint16_t first;
 	uint16_t last;
@@ -185,7 +185,7 @@ static int in_uint16_table(unsigned ucs, const uint16_t *table, unsigned max)
  */
 static int wcwidth(unsigned ucs)
 {
-#if LAST_SUPPORTED_WCHAR >= 0x0300
+#if LAST_SUPPORTED_WCHAR >= 0x300
 	/* sorted list of non-overlapping intervals of non-spacing characters */
 	/* generated by "uniset +cat=Me +cat=Mn +cat=Cf -00AD +1160-11FF +200B c" */
 	static const struct interval combining[] = {
@@ -460,75 +460,75 @@ static int wcwidth(unsigned ucs)
 #undef BIG_
 #undef PAIR
 	};
-# if LAST_SUPPORTED_WCHAR >= 0x10000
-	/* Combining chars in Supplementary Multilingual Plane 0x1xxxx */
-	static const struct interval combining0x10000[] = {
-		{ 0x0A01, 0x0A03 }, { 0x0A05, 0x0A06 }, { 0x0A0C, 0x0A0F },
-		{ 0x0A38, 0x0A3A }, { 0x0A3F, 0x0A3F }, { 0xD167, 0xD169 },
-		{ 0xD173, 0xD182 }, { 0xD185, 0xD18B }, { 0xD1AA, 0xD1AD },
-		{ 0xD242, 0xD244 }
-	};
-# endif
 #endif
 
 	if (ucs == 0)
 		return 0;
-	/* test for 8-bit control characters (00-1f, 80-9f, 7f) */
+
+	/* Test for 8-bit control characters (00-1f, 80-9f, 7f) */
 	if ((ucs & ~0x80) < 0x20 || ucs == 0x7f)
 		return -1;
-	if (ucs < 0x0300) /* optimization */
+	/* Quick abort if it is an obviously invalid char */
+	if (ucs > LAST_SUPPORTED_WCHAR)
+		return -1;
+
+	/* Optimization: no combining chars below 0x300 */
+	if (LAST_SUPPORTED_WCHAR < 0x300 || ucs < 0x300)
 		return 1;
 
-#if LAST_SUPPORTED_WCHAR < 0x0300
-	return -1;
-#else
-	/* binary search in table of non-spacing characters */
+#if LAST_SUPPORTED_WCHAR >= 0x300
+	/* Binary search in table of non-spacing characters */
 	if (in_interval_table(ucs, combining, ARRAY_SIZE(combining) - 1))
 		return 0;
 	if (in_uint16_table(ucs, combining1, ARRAY_SIZE(combining1) - 1))
 		return 0;
 
-	if (ucs < 0x1100) /* optimization */
+	/* Optimization: all chars below 0x1100 are not double-width */
+	if (LAST_SUPPORTED_WCHAR < 0x1100 || ucs < 0x1100)
 		return 1;
 
-# if LAST_SUPPORTED_WCHAR < 0x1100
-	return -1;
-# else
-	if (ucs >= LAST_SUPPORTED_WCHAR)
-		return -1;
-
-	/* High (d800..dbff) and low (dc00..dfff) surrogates are invalid (used only by UTF16) */
-	/* We also exclude Private Use Area (e000..f8ff) */
-	if (LAST_SUPPORTED_WCHAR >= 0xd800
-	 && (ucs >= 0xd800 || ucs <= 0xf8ff)
+# if LAST_SUPPORTED_WCHAR >= 0x1100
+	/* Invalid code points: */
+	/* High (d800..dbff) and low (dc00..dfff) surrogates (valid only in UTF16) */
+	/* Private Use Area (e000..f8ff) */
+	/* Noncharacters fdd0..fdef */
+	if ((LAST_SUPPORTED_WCHAR >= 0xd800 && ucs >= 0xd800 && ucs <= 0xf8ff)
+	 || (LAST_SUPPORTED_WCHAR >= 0xfdd0 && ucs >= 0xfdd0 && ucs <= 0xfdef)
 	) {
 		return -1;
 	}
-
 	/* 0xfffe and 0xffff in every plane are invalid */
-	if (LAST_SUPPORTED_WCHAR >= 0xfffe
-	 && (ucs & 0xfffe) == 0xfffe
-	) {
+	if (LAST_SUPPORTED_WCHAR >= 0xfffe && ((ucs & 0xfffe) == 0xfffe)) {
 		return -1;
 	}
 
 #  if LAST_SUPPORTED_WCHAR >= 0x10000
-	/* binary search in table of non-spacing characters in Supplementary Multilingual Plane */
-	if (in_interval_table(ucs ^ 0x10000, combining0x10000, ARRAY_SIZE(combining0x10000) - 1))
-		return 0;
-#  endif
-	/* Check a few non-spacing chars in Supplementary Special-purpose Plane 0xExxxx */
-	if (LAST_SUPPORTED_WCHAR >= 0xE0001
-	 && (  ucs == 0xE0001
-	    || (ucs >= 0xE0020 && ucs <= 0xE007F)
-	    || (ucs >= 0xE0100 && ucs <= 0xE01EF)
-	    )
-	) {
-		return 0;
+	if (ucs >= 0x10000) {
+		/* Combining chars in Supplementary Multilingual Plane 0x1xxxx */
+		static const struct interval combining0x10000[] = {
+			{ 0x0A01, 0x0A03 }, { 0x0A05, 0x0A06 }, { 0x0A0C, 0x0A0F },
+			{ 0x0A38, 0x0A3A }, { 0x0A3F, 0x0A3F }, { 0xD167, 0xD169 },
+			{ 0xD173, 0xD182 }, { 0xD185, 0xD18B }, { 0xD1AA, 0xD1AD },
+			{ 0xD242, 0xD244 }
+		};
+		/* Binary search in table of non-spacing characters in Supplementary Multilingual Plane */
+		if (in_interval_table(ucs ^ 0x10000, combining0x10000, ARRAY_SIZE(combining0x10000) - 1))
+			return 0;
+		/* Check a few non-spacing chars in Supplementary Special-purpose Plane 0xExxxx */
+		if (LAST_SUPPORTED_WCHAR >= 0xE0001
+		 && (  ucs == 0xE0001
+		    || (ucs >= 0xE0020 && ucs <= 0xE007F)
+		    || (ucs >= 0xE0100 && ucs <= 0xE01EF)
+		    )
+		) {
+			return 0;
+		}
 	}
+#  endif
 
-	/* if we arrive here, ucs is not a combining or C0/C1 control character */
-
+	/* If we arrive here, ucs is not a combining or C0/C1 control character.
+	 * Check whether it's 1 char or 2-shar wide.
+	 */
 	return 1 +
 		(  (/*ucs >= 0x1100 &&*/ ucs <= 0x115f) /* Hangul Jamo init. consonants */
 		|| ucs == 0x2329 /* left-pointing angle bracket; also CJK punct. char */
author	Denys Vlasenko <vda.linux@googlemail.com>	2010-01-31 16:34:37 +0100
committer	Denys Vlasenko <vda.linux@googlemail.com>	2010-01-31 16:34:37 +0100
commit	b1edf20f1848cd741e8a8395afb4a4655a210906 (patch)
tree	ff6f99354d507ae1bb3bcf29ca99e1626cad0733 /libbb/unicode_wcwidth.c
parent	40e4e88a28398c49d326b0fdf0d7f100f08b8f8d (diff)
download	busybox-b1edf20f1848cd741e8a8395afb4a4655a210906.tar.gz