From f7bcbeb87e76faada041ba5d36db82f61e8335ec Mon Sep 17 00:00:00 2001
From: David Lawrence Ramsey <pooka109@gmail.com>
Date: Mon, 18 Jul 2005 03:23:37 +0000
Subject: [PATCH] don't display invalid multibyte sequences as Unicode 0xFFFD
 in display_string() anymore, as it's inconsistent with how we handle them
 elsewhere

git-svn-id: svn://svn.savannah.gnu.org/nano/trunk/nano@2886 35c25a1d-7b9e-4130-9fde-d3aeb78583b8
---
 ChangeLog   |  3 ---
 src/winio.c | 52 +++++++++++++++++++++-------------------------------
 2 files changed, 21 insertions(+), 34 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 7e8bea97..09775a4f 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -163,9 +163,6 @@ CVS code -
   do_statusbar_output()
 	- When adding a character, just add its length in bytes to
 	  statusbar_x instead of calling do_statusbar_right(). (DLR)
-  display_string()
-	- Display invalid multibyte sequences as Unicode 0xFFFD
-	  (Replacement Character). (DLR)
   titlebar()
 	- Rework to display only one space after the version number, so
 	  that there's more room for other things, and to not display
diff --git a/src/winio.c b/src/winio.c
index 0ec0a3ff..da0d89d3 100644
--- a/src/winio.c
+++ b/src/winio.c
@@ -2285,13 +2285,6 @@ char *display_string(const char *buf, size_t start_col, size_t len, bool
 	/* Current position in converted. */
     bool bad_char;
 	/* Whether we have an invalid multibyte character. */
-#ifdef ENABLE_UTF8
-    const char *bad_buf_mb = "\xEF\xBF\xBD";
-	/* What to display when we have an invalid multibyte
-	 * character: Unicode 0xFFFD (Replacement Character). */
-    const int bad_buf_mb_len = 3;
-	/* The length of bad_buf_mb. */
-#endif
 
     char *buf_mb = charalloc(mb_cur_max());
     int buf_mb_len;
@@ -2371,37 +2364,25 @@ char *display_string(const char *buf, size_t start_col, size_t len, bool
 		converted[index++] = ' ';
 		start_col++;
 	    }
-	/* If buf contains a control character, interpret it. */
+	/* If buf contains a control character, interpret it.  If buf
+	 * contains an invalid multibyte control character, interpret
+	 * it as though it's a normal control character.*/
 	} else if (is_cntrl_mbchar(buf_mb)) {
-	    int i;
+	    char *ctrl_buf_mb = charalloc(mb_cur_max());
+	    int ctrl_buf_mb_len, i;
 
 	    converted[index++] = '^';
 	    start_col++;
 
-#ifdef ENABLE_UTF8
-	    /* If buf contains an invalid multibyte control character,
-	     * display it as such. */
-	    if (ISSET(USE_UTF8) && bad_char) {
-		for (i = 0; i < bad_buf_mb_len; i++)
-		    converted[index++] = bad_buf_mb[i];
+	    ctrl_buf_mb = control_mbrep(buf_mb, ctrl_buf_mb,
+		&ctrl_buf_mb_len);
 
-		start_col += mbwidth(bad_buf_mb);
-	    } else
-#endif
-	    {
-		char *ctrl_buf_mb = charalloc(mb_cur_max());
-		int ctrl_buf_mb_len;
+	    for (i = 0; i < ctrl_buf_mb_len; i++)
+		converted[index++] = ctrl_buf_mb[i];
 
-		ctrl_buf_mb = control_mbrep(buf_mb, ctrl_buf_mb,
-			&ctrl_buf_mb_len);
-
-		for (i = 0; i < ctrl_buf_mb_len; i++)
-		    converted[index++] = ctrl_buf_mb[i];
-
-		start_col += mbwidth(ctrl_buf_mb);
+	    start_col += mbwidth(ctrl_buf_mb);
 
-		free(ctrl_buf_mb);
-	    }
+	    free(ctrl_buf_mb);
 	/* If buf contains a space character, interpret it. */
 	} else if (*buf_mb == ' ') {
 #if !defined(NANO_SMALL) && defined(ENABLE_NANORC)
@@ -2421,12 +2402,21 @@ char *display_string(const char *buf, size_t start_col, size_t len, bool
 
 #ifdef ENABLE_UTF8
 	    /* If buf contains an invalid multibyte non-control
-	     * character, display it as such. */
+	     * character, interpret it as though it's a normal
+	     * non-control character. */
 	    if (ISSET(USE_UTF8) && bad_char) {
+		char *bad_buf_mb;
+		int bad_buf_mb_len;
+
+		bad_buf_mb = make_mbchar((unsigned char)*buf_mb,
+			&bad_buf_mb_len);
+
 		for (i = 0; i < bad_buf_mb_len; i++)
 		    converted[index++] = bad_buf_mb[i];
 
 		start_col += mbwidth(bad_buf_mb);
+
+		free(bad_buf_mb);
 	    } else {
 #endif
 		for (i = 0; i < buf_mb_len; i++)
-- 
GitLab