From 9838a5182eb2eb03dd71d9f99afa1704cdd43d6a Mon Sep 17 00:00:00 2001
From: David Lawrence Ramsey <pooka109@gmail.com>
Date: Wed, 6 Jul 2005 19:12:41 +0000
Subject: [PATCH] in display_string(), display invalid multibyte sequences as
 Unicode 0xFFFD (Replacement Character)

git-svn-id: svn://svn.savannah.gnu.org/nano/trunk/nano@2819 35c25a1d-7b9e-4130-9fde-d3aeb78583b8
---
 ChangeLog   |  4 ++++
 src/winio.c | 58 +++++++++++++++++++++++++++++++++--------------------
 2 files changed, 40 insertions(+), 22 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 8003babc..31dec6ad 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -33,6 +33,10 @@ CVS code -
   do_replace()
 	- Blank out last_replace properly again just before displaying
 	  the "Replace" prompt. (DLR, found by Mike Frysinger)
+- winio.c:
+  display_string()
+	- Display invalid multibyte sequences as Unicode 0xFFFD
+	  (Replacement Character). (DLR)
 - configure.ac:
 	- Since we only use vsnprintf() now, remove the tests for
 	  snprintf(). (DLR)
diff --git a/src/winio.c b/src/winio.c
index 5e1d1904..5e9e932a 100644
--- a/src/winio.c
+++ b/src/winio.c
@@ -2265,10 +2265,18 @@ char *display_string(const char *buf, size_t start_col, size_t len, bool
 	/* The string we return. */
     size_t index;
 	/* Current position in converted. */
+    bool bad_char;
+	/* Whether we have an invalid multibyte character. */
+#ifdef NANO_WIDE
+    const char *bad_buf_mb = "\xEF\xBF\xBD";
+	/* What to display when we have an invalid multibyte
+	 * character: Unicode 0xFFFD (Replacement Character). */
+    const int bad_buf_mb_len = 3;
+	/* The length of bad_buf_mb. */
+#endif
 
     char *buf_mb = charalloc(mb_cur_max());
     int buf_mb_len;
-    bool bad_char;
 
     /* If dollars is TRUE, make room for the "$" at the end of the
      * line. */
@@ -2329,6 +2337,7 @@ char *display_string(const char *buf, size_t start_col, size_t len, bool
 	buf_mb_len = parse_mbchar(buf + start_index, buf_mb, &bad_char,
 		NULL);
 
+	/* If buf contains a tab character, interpret it. */
 	if (*buf_mb == '\t') {
 #if !defined(NANO_SMALL) && defined(ENABLE_NANORC)
 	    if (ISSET(WHITESPACE_DISPLAY)) {
@@ -2344,25 +2353,38 @@ char *display_string(const char *buf, size_t start_col, size_t len, bool
 		converted[index++] = ' ';
 		start_col++;
 	    }
-	/* If buf contains a control character, interpret it.  If it
-	 * contains an invalid multibyte control character, interpret
-	 * that character as though it's a normal control character. */
+	/* If buf contains a control character, interpret it. */
 	} else if (is_cntrl_mbchar(buf_mb)) {
-	    char *ctrl_buf_mb = charalloc(mb_cur_max());
-	    int ctrl_buf_mb_len, i;
+	    int i;
 
 	    converted[index++] = '^';
 	    start_col++;
 
-	    ctrl_buf_mb = control_mbrep(buf_mb, ctrl_buf_mb,
-		&ctrl_buf_mb_len);
+#ifdef NANO_WIDE
+	    /* If buf contains an invalid multibyte control character,
+	     * display it as such. */
+	    if (ISSET(USE_UTF8) && bad_char) {
+		for (i = 0; i < bad_buf_mb_len; i++)
+		    converted[index++] = bad_buf_mb[i];
 
-	    for (i = 0; i < ctrl_buf_mb_len; i++)
-		converted[index++] = ctrl_buf_mb[i];
+		start_col += mbwidth(bad_buf_mb);
+	    } else
+#endif
+	    {
+		char *ctrl_buf_mb = charalloc(mb_cur_max());
+		int ctrl_buf_mb_len;
 
-	    start_col += mbwidth(ctrl_buf_mb);
+		ctrl_buf_mb = control_mbrep(buf_mb, ctrl_buf_mb,
+			&ctrl_buf_mb_len);
+
+		for (i = 0; i < ctrl_buf_mb_len; i++)
+		    converted[index++] = ctrl_buf_mb[i];
+
+		start_col += mbwidth(ctrl_buf_mb);
 
-	    free(ctrl_buf_mb);
+		free(ctrl_buf_mb);
+	    }
+	/* If buf contains a space character, interpret it. */
 	} else if (*buf_mb == ' ') {
 #if !defined(NANO_SMALL) && defined(ENABLE_NANORC)
 	    if (ISSET(WHITESPACE_DISPLAY)) {
@@ -2375,26 +2397,18 @@ char *display_string(const char *buf, size_t start_col, size_t len, bool
 #endif
 		converted[index++] = ' '; 
 	    start_col++;
+	/* If buf contains a non-control character, interpret it. */
 	} else {
 	    int i;
 
 #ifdef NANO_WIDE
 	    /* If buf contains an invalid multibyte non-control
-	     * character, interpret that character as though it's a
-	     * normal non-control character. */
+	     * character, display it as such. */
 	    if (ISSET(USE_UTF8) && bad_char) {
-		char *bad_buf_mb;
-		int bad_buf_mb_len;
-
-		bad_buf_mb = make_mbchar((unsigned char)*buf_mb,
-			&bad_buf_mb_len);
-
 		for (i = 0; i < bad_buf_mb_len; i++)
 		    converted[index++] = bad_buf_mb[i];
 
 		start_col += mbwidth(bad_buf_mb);
-
-		free(bad_buf_mb);
 	    } else {
 #endif
 		for (i = 0; i < buf_mb_len; i++)
-- 
GitLab