From af53c56ec8cb59ab4a2569a8aa00ec8de753b36c Mon Sep 17 00:00:00 2001
From: Benno Schulenberg <bensberg@justemail.net>
Date: Wed, 29 Jun 2016 20:48:04 +0200
Subject: [PATCH] chars: speed up the determination whether something is a
 control character

Use knowledge of UTF-8 instead of converting to wide characters first.
---
 src/chars.c | 23 +++--------------------
 src/proto.h |  3 ---
 2 files changed, 3 insertions(+), 23 deletions(-)

diff --git a/src/chars.c b/src/chars.c
index 02c6fa31..61fa6e69 100644
--- a/src/chars.c
+++ b/src/chars.c
@@ -150,20 +150,9 @@ bool is_ascii_cntrl_char(int c)
  * handles high-bit control characters. */
 bool is_cntrl_char(int c)
 {
-    return (-128 <= c && c < -96) || (0 <= c && c < 32) ||
-	(127 <= c && c < 160);
+    return ((c & 0x60) == 0 || c == 127);
 }
 
-#ifdef ENABLE_UTF8
-/* This function is equivalent to iscntrl() for wide characters, except
- * in that it also handles wide control characters with their high bits
- * set. */
-bool is_cntrl_wchar(wchar_t wc)
-{
-    return (0 <= wc && wc < 32) || (127 <= wc && wc < 160);
-}
-#endif
-
 /* This function is equivalent to iscntrl() for multibyte characters,
  * except in that it also handles multibyte control characters with
  * their high bits set. */
@@ -173,14 +162,8 @@ bool is_cntrl_mbchar(const char *c)
 
 #ifdef ENABLE_UTF8
     if (use_utf8) {
-	wchar_t wc;
-
-	if (mbtowc(&wc, c, MB_CUR_MAX) < 0) {
-	    mbtowc_reset();
-	    wc = bad_wchar;
-	}
-
-	return is_cntrl_wchar(wc);
+	return ((c[0] & 0xE0) == 0 || c[0] == 127 ||
+		((signed char)c[0] == -62 && (signed char)c[1] < -96));
     } else
 #endif
 	return is_cntrl_char((unsigned char)*c);
diff --git a/src/proto.h b/src/proto.h
index 806ba858..c3386af5 100644
--- a/src/proto.h
+++ b/src/proto.h
@@ -183,9 +183,6 @@ bool is_alnum_mbchar(const char *c);
 bool is_blank_mbchar(const char *c);
 bool is_ascii_cntrl_char(int c);
 bool is_cntrl_char(int c);
-#ifdef ENABLE_UTF8
-bool is_cntrl_wchar(wchar_t wc);
-#endif
 bool is_cntrl_mbchar(const char *c);
 bool is_punct_mbchar(const char *c);
 bool is_word_mbchar(const char *c, bool allow_punct);
-- 
GitLab