From a0aa4df04147477e7f2eeeb841b744826f224490 Mon Sep 17 00:00:00 2001
From: David Lawrence Ramsey <pooka109@gmail.com>
Date: Mon, 18 Jul 2005 05:17:59 +0000
Subject: [PATCH] properly handle cases where a search string and a match to it
 are not the same length in bytes, i.e, when the latter contains invalid
 multibyte characters interpreted as normal characters

git-svn-id: svn://svn.savannah.gnu.org/nano/trunk/nano@2888 35c25a1d-7b9e-4130-9fde-d3aeb78583b8
---
 ChangeLog    |  5 +++++
 src/chars.c  | 20 +++-----------------
 src/search.c | 28 ++++++++++++++++++++++------
 3 files changed, 30 insertions(+), 23 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 09775a4f..35f07f73 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -70,6 +70,11 @@ CVS code -
 	  do_cursorpos(). (DLR)
 	- Change the NANO_WIDE #define to ENABLE_UTF8, as the latter is
 	  clearer. (DLR)
+	- Properly handle cases where a search string and a match to it
+	  are not the same length in bytes, i.e, when the latter
+	  contains invalid multibyte characters interpreted as normal
+	  characters.  Changes to mbstrncasecmp(), mbstrcasestr(),
+	  mbrevstrcasestr(), findnextstr(), and do_replace_loop(). (DLR)
 - files.c:
   open_file()
 	- Assert that filename isn't NULL, and don't do anything special
diff --git a/src/chars.c b/src/chars.c
index 0d3a9ddf..2acbe146 100644
--- a/src/chars.c
+++ b/src/chars.c
@@ -475,7 +475,6 @@ int mbstrncasecmp(const char *s1, const char *s2, size_t n)
 	assert(s1 != NULL && s2 != NULL);
 
 	while (n > 0 && *s1 != '\0' && *s2 != '\0') {
-	    bool bad_s1_mb = FALSE, bad_s2_mb = FALSE;
 	    int s1_mb_len, s2_mb_len;
 
 	    s1_mb_len = parse_mbchar(s1, s1_mb, NULL, NULL);
@@ -483,7 +482,6 @@ int mbstrncasecmp(const char *s1, const char *s2, size_t n)
 	    if (mbtowc(&ws1, s1_mb, s1_mb_len) <= 0) {
 		mbtowc(NULL, NULL, 0);
 		ws1 = (unsigned char)*s1_mb;
-		bad_s1_mb = TRUE;
 	    }
 
 	    s2_mb_len = parse_mbchar(s2, s2_mb, NULL, NULL);
@@ -491,11 +489,9 @@ int mbstrncasecmp(const char *s1, const char *s2, size_t n)
 	    if (mbtowc(&ws2, s2_mb, s2_mb_len) <= 0) {
 		mbtowc(NULL, NULL, 0);
 		ws2 = (unsigned char)*s2_mb;
-		bad_s2_mb = TRUE;
 	    }
 
-	    if (n == 0 || bad_s1_mb != bad_s2_mb ||
-		towlower(ws1) != towlower(ws2))
+	    if (n == 0 || towlower(ws1) != towlower(ws2))
 		break;
 
 	    s1 += s1_mb_len;
@@ -550,14 +546,11 @@ const char *mbstrcasestr(const char *haystack, const char *needle)
 	    int r_mb_len, q_mb_len;
 
 	    while (*q != '\0') {
-		bool bad_r_mb = FALSE, bad_q_mb = FALSE;
-
 		r_mb_len = parse_mbchar(r, r_mb, NULL, NULL);
 
 		if (mbtowc(&wr, r_mb, r_mb_len) <= 0) {
 		    mbtowc(NULL, NULL, 0);
 		    wr = (unsigned char)*r;
-		    bad_r_mb = TRUE;
 		}
 
 		q_mb_len = parse_mbchar(q, q_mb, NULL, NULL);
@@ -565,11 +558,9 @@ const char *mbstrcasestr(const char *haystack, const char *needle)
 		if (mbtowc(&wq, q_mb, q_mb_len) <= 0) {
 		    mbtowc(NULL, NULL, 0);
 		    wq = (unsigned char)*q;
-		    bad_q_mb = TRUE;
 		}
 
-		if (bad_r_mb != bad_q_mb ||
-			towlower(wr) != towlower(wq))
+		if (towlower(wr) != towlower(wq))
 		    break;
 
 		r += r_mb_len;
@@ -656,14 +647,11 @@ const char *mbrevstrcasestr(const char *haystack, const char *needle,
 	    int r_mb_len, q_mb_len;
 
 	    while (*q != '\0') {
-		bool bad_r_mb = FALSE, bad_q_mb = FALSE;
-
 		r_mb_len = parse_mbchar(r, r_mb, NULL, NULL);
 
 		if (mbtowc(&wr, r_mb, r_mb_len) <= 0) {
 		    mbtowc(NULL, NULL, 0);
 		    wr = (unsigned char)*r;
-		    bad_r_mb = TRUE;
 		}
 
 		q_mb_len = parse_mbchar(q, q_mb, NULL, NULL);
@@ -671,11 +659,9 @@ const char *mbrevstrcasestr(const char *haystack, const char *needle,
 		if (mbtowc(&wq, q_mb, q_mb_len) <= 0) {
 		    mbtowc(NULL, NULL, 0);
 		    wq = (unsigned char)*q;
-		    bad_q_mb = TRUE;
 		}
 
-		if (bad_r_mb != bad_q_mb ||
-			towlower(wr) != towlower(wq))
+		if (towlower(wr) != towlower(wq))
 		    break;
 
 		r += r_mb_len;
diff --git a/src/search.c b/src/search.c
index 0fc9e8cf..f9c94720 100644
--- a/src/search.c
+++ b/src/search.c
@@ -321,12 +321,21 @@ bool findnextstr(bool can_display_wrap, bool wholeword, bool
 		/* Is this potential match a whole word? */
 
 	    /* Set found_len to the length of the potential match. */
-	    found_len =
 #ifdef HAVE_REGEX_H
-		ISSET(USE_REGEXP) ?
-		regmatches[0].rm_eo - regmatches[0].rm_so :
+	    if (ISSET(USE_REGEXP))
+		found_len = regmatches[0].rm_eo - regmatches[0].rm_so;
+	    else
 #endif
-		strlen(needle);
+	    {
+		size_t needle_len = mbstrlen(needle);
+
+		/* Get found's length in single-byte characters. */
+		found_len = 0;
+
+		for (; needle_len > 0; needle_len--)
+		    found_len += parse_mbchar(found + found_len, NULL,
+			NULL, NULL);
+	    }
 
 	    /* If we're searching for whole words, see if this potential
 	     * match is a whole word. */
@@ -784,13 +793,20 @@ ssize_t do_replace_loop(const char *needle, const filestruct
 #endif
 
 	if (i > 0 || replaceall) {	/* Yes, replace it!!!! */
-	    char *copy;
+	    char *match, *copy;
 	    size_t length_change;
 
 	    if (i == 2)
 		replaceall = TRUE;
 
-	    copy = replace_line(needle);
+	    /* Get the match's length in single-byte characters. */
+	    match = mallocstrncpy(NULL, openfile->current->data +
+		openfile->current_x, match_len + 1);
+	    match[match_len] = '\0';
+
+	    copy = replace_line(match);
+
+	    free(match);
 
 	    length_change = strlen(copy) -
 		strlen(openfile->current->data);
-- 
GitLab