From af53c56ec8cb59ab4a2569a8aa00ec8de753b36c Mon Sep 17 00:00:00 2001 From: Benno Schulenberg <bensberg@justemail.net> Date: Wed, 29 Jun 2016 20:48:04 +0200 Subject: [PATCH] chars: speed up the determination whether something is a control character Use knowledge of UTF-8 instead of converting to wide characters first. --- src/chars.c | 23 +++-------------------- src/proto.h | 3 --- 2 files changed, 3 insertions(+), 23 deletions(-) diff --git a/src/chars.c b/src/chars.c index 02c6fa31..61fa6e69 100644 --- a/src/chars.c +++ b/src/chars.c @@ -150,20 +150,9 @@ bool is_ascii_cntrl_char(int c) * handles high-bit control characters. */ bool is_cntrl_char(int c) { - return (-128 <= c && c < -96) || (0 <= c && c < 32) || - (127 <= c && c < 160); + return ((c & 0x60) == 0 || c == 127); } -#ifdef ENABLE_UTF8 -/* This function is equivalent to iscntrl() for wide characters, except - * in that it also handles wide control characters with their high bits - * set. */ -bool is_cntrl_wchar(wchar_t wc) -{ - return (0 <= wc && wc < 32) || (127 <= wc && wc < 160); -} -#endif - /* This function is equivalent to iscntrl() for multibyte characters, * except in that it also handles multibyte control characters with * their high bits set. */ @@ -173,14 +162,8 @@ bool is_cntrl_mbchar(const char *c) #ifdef ENABLE_UTF8 if (use_utf8) { - wchar_t wc; - - if (mbtowc(&wc, c, MB_CUR_MAX) < 0) { - mbtowc_reset(); - wc = bad_wchar; - } - - return is_cntrl_wchar(wc); + return ((c[0] & 0xE0) == 0 || c[0] == 127 || + ((signed char)c[0] == -62 && (signed char)c[1] < -96)); } else #endif return is_cntrl_char((unsigned char)*c); diff --git a/src/proto.h b/src/proto.h index 806ba858..c3386af5 100644 --- a/src/proto.h +++ b/src/proto.h @@ -183,9 +183,6 @@ bool is_alnum_mbchar(const char *c); bool is_blank_mbchar(const char *c); bool is_ascii_cntrl_char(int c); bool is_cntrl_char(int c); -#ifdef ENABLE_UTF8 -bool is_cntrl_wchar(wchar_t wc); -#endif bool is_cntrl_mbchar(const char *c); bool is_punct_mbchar(const char *c); bool is_word_mbchar(const char *c, bool allow_punct); -- GitLab