From 0894587305387fec8ab78199a69e3d7abcc21e2f Mon Sep 17 00:00:00 2001
From: Benno Schulenberg <bensberg@justemail.net>
Date: Mon, 6 Jun 2016 12:48:26 +0200
Subject: [PATCH] screen: elide another intermediate buffer for every visible
 character

---
 src/chars.c | 40 ++++++++++++++++------------------------
 src/proto.h |  2 +-
 src/winio.c | 48 ++++++++++++++++++++++++++++++++++--------------
 3 files changed, 51 insertions(+), 39 deletions(-)

diff --git a/src/chars.c b/src/chars.c
index d36faaee..eb070794 100644
--- a/src/chars.c
+++ b/src/chars.c
@@ -35,8 +35,6 @@
 
 static bool use_utf8 = FALSE;
 	/* Whether we've enabled UTF-8 support. */
-static const char *const bad_mbchar = "\xEF\xBF\xBD";
-static const int bad_mbchar_len = 3;
 
 /* Enable UTF-8 support. */
 void utf8_init(void)
@@ -230,38 +228,32 @@ char control_mbrep(const char *c)
 	return control_rep(*c);
 }
 
-/* c is a multibyte non-control character.  We return that multibyte
- * character.  If crep is an invalid multibyte sequence, it will be
- * replaced with Unicode 0xFFFD (Replacement Character). */
-char *mbrep(const char *c, char *crep, int *crep_len)
+/* Assess how many bytes the given (multibyte) character occupies.  Return -1
+ * if the byte sequence is invalid, and return the number of bytes minus 8
+ * when the byte sequence encodes an invalid codepoint. */
+int length_of_char(const char *c)
 {
-    assert(c != NULL && crep != NULL && crep_len != NULL);
+    assert(c != NULL);
 
 #ifdef ENABLE_UTF8
     if (use_utf8) {
 	wchar_t wc;
+	int charlen = mbtowc(&wc, c, MB_CUR_MAX);
 
-	/* Reject invalid Unicode characters. */
-	if (mbtowc(&wc, c, MB_CUR_MAX) < 0 || !is_valid_unicode(wc)) {
+	/* If the sequence is invalid... */
+	if (charlen < 0) {
 	    mbtowc_reset();
-	    *crep_len = bad_mbchar_len;
-	    strncpy(crep, bad_mbchar, *crep_len);
-	} else {
-	    *crep_len = wctomb(crep, wc);
-
-	    if (*crep_len < 0) {
-		wctomb_reset();
-		*crep_len = 0;
-	    }
+	    return -1;
 	}
+
+	/* If the codepoint is invalid... */
+	if (!is_valid_unicode(wc))
+	    return charlen - 8;
+	else
+	    return charlen;
     } else
 #endif
-    {
-	*crep_len = 1;
-	*crep = *c;
-    }
-
-    return crep;
+	return 1;
 }
 
 /* This function is equivalent to wcwidth() for multibyte characters. */
diff --git a/src/proto.h b/src/proto.h
index 3d154210..470df16a 100644
--- a/src/proto.h
+++ b/src/proto.h
@@ -188,7 +188,7 @@ bool is_punct_mbchar(const char *c);
 bool is_word_mbchar(const char *c, bool allow_punct);
 char control_rep(const signed char c);
 char control_mbrep(const char *c);
-char *mbrep(const char *c, char *crep, int *crep_len);
+int length_of_char(const char *c);
 int mbwidth(const char *c);
 int mb_cur_max(void);
 char *make_mbchar(long chr, int *chr_mb_len);
diff --git a/src/winio.c b/src/winio.c
index 3bfac99f..0ad2140c 100644
--- a/src/winio.c
+++ b/src/winio.c
@@ -1780,6 +1780,8 @@ char *display_string(const char *buf, size_t start_col, size_t len, bool
     }
 
     while (*buf != '\0') {
+	int charlength;
+
 	if (*buf == ' ') {
 	    /* Show a space as a visible character, or as a space. */
 #ifndef NANO_TINY
@@ -1792,6 +1794,8 @@ char *display_string(const char *buf, size_t start_col, size_t len, bool
 #endif
 		converted[index++] = ' ';
 	    start_col++;
+	    buf++;
+	    continue;
 	} else if (*buf == '\t') {
 	    /* Show a tab as a visible character, or as as a space. */
 #ifndef NANO_TINY
@@ -1809,30 +1813,46 @@ char *display_string(const char *buf, size_t start_col, size_t len, bool
 		converted[index++] = ' ';
 		start_col++;
 	    }
+	    buf++;
+	    continue;
+	}
+
+	charlength = length_of_char(buf);
+
 	/* If buf contains a control character, represent it. */
-	} else if (is_cntrl_mbchar(buf)) {
+	if (is_cntrl_mbchar(buf)) {
 	    converted[index++] = '^';
 	    converted[index++] = control_mbrep(buf);
 	    start_col += 2;
-	/* If buf contains a non-control character, interpret it.  If buf
-	 * contains an invalid multibyte sequence, display it as such. */
-	} else {
-	    char *character = charalloc(mb_cur_max());
-	    int charlen, i;
-	    character = mbrep(buf, character, &charlen);
-
-	    for (i = 0; i < charlen; i++)
-		converted[index++] = character[i];
+	    buf += charlength;
+	    continue;
+	}
 
-	    start_col += mbwidth(character);
+	/* If buf contains a valid non-control character, simply copy it. */
+	if (charlength > 0) {
+	    int width = mbwidth(buf);
 
-	    free(character);
+	    for (; charlength > 0; charlength--)
+		converted[index++] = *(buf++);
 
-	    if (mbwidth(buf) > 1)
+	    start_col += width;
+	    if (width > 1)
 		seen_wide = TRUE;
+
+	    continue;
 	}
 
-	buf += parse_mbchar(buf, NULL, NULL);
+	/* Represent an invalid sequence with the Replacement Character. */
+	converted[index++] = '\xEF';
+	converted[index++] = '\xBF';
+	converted[index++] = '\xBD';
+
+	start_col += 1;
+	buf++;
+
+	/* For invalid codepoints, skip extra bytes. */
+	if (charlength < -1)
+	   buf += charlength + 7;
     }
 
     /* Null-terminate converted. */
-- 
GitLab