From d0c4d378b40102e1f43e049dbbab60295a50147b Mon Sep 17 00:00:00 2001
From: David Lawrence Ramsey <pooka109@gmail.com>
Date: Tue, 21 Jun 2005 03:26:58 +0000
Subject: [PATCH] add a multibyte case-insensitive version of strcoll(), and
 make diralphasort() use it so that filenames are sorted properly according to
 the current locale

git-svn-id: svn://svn.savannah.gnu.org/nano/trunk/nano@2743 35c25a1d-7b9e-4130-9fde-d3aeb78583b8
---
 ChangeLog    |  19 +++++----
 configure.ac |   2 +-
 src/chars.c  | 113 +++++++++++++++++++++++++++++++++++++++++++++++++++
 src/files.c  |   5 ++-
 src/nano.h   |   6 +++
 src/proto.h  |  10 +++++
 6 files changed, 145 insertions(+), 10 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index dc8c24bf..66407c96 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -60,10 +60,13 @@ CVS code -
 	  wrappers to take wint_t instead of wchar_t to match the
 	  functions they wrap; rename some functions for consistency;
 	  add functions to detect blank characters in a string, for use
-	  in rcfile option parsing; and don't count matches between
-	  valid and invalid multibyte sequences anymore, as it causes
-	  problems when doing a replace.  New functions
-	  is_valid_mbstring(), has_blank_chars(), and
+	  in rcfile option parsing; add functions to case-insensitively
+	  compare strings while taking the current locale into account,
+	  for use in sorting filename lists; and don't count matches
+	  between valid and invalid multibyte sequences anymore, as it
+	  causes problems when doing a replace.  New functions
+	  nstrcasecoll(), nstrcasecoll(), mbstrcasecoll(),
+	  mbstrncasecoll(), is_valid_mbstring(), has_blank_chars(), and
 	  has_blank_mbchars(); changes to is_alnum_mbchar(),
 	  is_blank_char() (renamed nisblank()), is_blank_mbchar(),
 	  is_blank_wchar() (renamed niswblank()), is_cntrl_wchar(),
@@ -154,8 +157,9 @@ CVS code -
 	  long instead of an unsigned int. (DLR)
 	- Declare the size_t i only in the loop where it's used. (DLR)
   diralphasort()
-	- Use mbstrcasecmp() instead of strcasecmp(), so that UTF-8
-	  filenames are sorted properly. (DLR)
+	- Use mbstrcasecoll() instead of strcasecmp(), so that UTF-8
+	  filenames are sorted properly according to the current
+	  locale. (DLR)
   cwd_tab_completion(), browser_init()
 	- Rename variable next to nextdir to avoid confusion. (DLR)
   input_tab()
@@ -354,7 +358,8 @@ CVS code -
 	  Weinehall)
 	- Don't refer to the built-in file browser as crappy anymore.
 	  (DLR)
-	- Check for iswpunct() and mbstowcs(). (DLR)
+	- Check for iswpunct(), mbstowcs(), strcasecoll(), and
+	  strncasecoll(). (DLR)
 	- Change the behavior of --enable-extra to only define
 	  NANO_EXTRA, instead of defining both it and
 	  ENABLE_MULTIBUFFER. (DLR)
diff --git a/configure.ac b/configure.ac
index 9c99de43..bf1890e7 100644
--- a/configure.ac
+++ b/configure.ac
@@ -395,7 +395,7 @@ int main(void)
 
 dnl Checks for functions.
 
-AC_CHECK_FUNCS(getdelim getline isblank snprintf strcasecmp strcasestr strncasecmp strnlen vsnprintf)
+AC_CHECK_FUNCS(getdelim getline isblank snprintf strcasecmp strcasecoll strcasestr strncasecmp strncasecoll strnlen vsnprintf)
 
 if test x$enable_utf8 != xno; then
     AC_CHECK_FUNCS(iswalnum iswblank iswpunct iswspace mblen mbstowcs mbtowc wctomb wcwidth)
diff --git a/src/chars.c b/src/chars.c
index f0fc67d0..92c2e5dd 100644
--- a/src/chars.c
+++ b/src/chars.c
@@ -512,6 +512,119 @@ int mbstrncasecmp(const char *s1, const char *s2, size_t n)
 	return strncasecmp(s1, s2, n);
 }
 
+#if !defined(DISABLE_TABCOMP) || !defined(DISABLE_BROWSER)
+#ifndef HAVE_STRCASECOLL
+/* This function is equivalent to a case-insensitive strcoll(). */
+int nstrcasecoll(const char *s1, const char *s2)
+{
+    return strncasecoll(s1, s2, (size_t)-1);
+}
+#endif
+
+#ifndef HAVE_STRNCASECOLL
+/* This function is equivalent to a case-insensitive strcoll() for the
+ * first n characters of s1 and s2. */
+int nstrncasecoll(const char *s1, const char *s2, size_t n)
+{
+    int retval = 0;
+    char t1[2] = {'\0', '\0'}, t2[2] = {'\0', '\0'};
+
+    assert(s1 != NULL && s2 != NULL);
+
+    for (; n > 0 && *s1 != '\0' && *s2 != '\0'; n--, s1++, s2++) {
+	t1[0] = tolower(*s1);
+	t2[0] = tolower(*s2);
+
+	if ((retval = strcoll(t1, t2)) != 0)
+	    break;
+    }
+
+    if (n > 0)
+	return retval;
+    else
+	return 0;
+}
+#endif
+
+/* This function is equivalent to a case-insensitive strcoll() for
+ * multibyte strings. */
+int mbstrcasecoll(const char *s1, const char *s2)
+{
+    return mbstrncasecoll(s1, s2, (size_t)-1);
+}
+
+/* This function is equivalent to a case-insensitive strcoll() for the
+ * first n characters of multibyte strings. */
+int mbstrncasecoll(const char *s1, const char *s2, size_t n)
+{
+#ifdef NANO_WIDE
+    if (ISSET(USE_UTF8)) {
+	int retval = 0;
+	char *s1_mb = charalloc(MB_CUR_MAX + 1);
+	char *s2_mb = charalloc(MB_CUR_MAX + 1);
+	wchar_t ws1, ws2;
+
+	assert(s1 != NULL && s2 != NULL);
+
+	while (n > 0 && *s1 != '\0' && *s2 != '\0') {
+	    bool bad_s1_mb = FALSE, bad_s2_mb = FALSE;
+	    int s1_mb_len, s2_mb_len;
+
+	    s1_mb_len = parse_mbchar(s1, s1_mb, NULL, NULL);
+
+	    if (mbtowc(&ws1, s1_mb, s1_mb_len) <= 0) {
+		mbtowc(NULL, NULL, 0);
+		s1_mb[0] = *s1;
+		s1_mb[1] = '\0';
+		bad_s1_mb = TRUE;
+	    } else {
+		s1_mb_len = wctomb(s1_mb, towlower(ws1));
+
+		if (s1_mb_len <= 0) {
+		    wctomb(NULL, 0);
+		    s1_mb_len = 0;
+		}
+
+		s1_mb[s1_mb_len] = '\0';
+	    }
+
+	    s2_mb_len = parse_mbchar(s2, s2_mb, NULL, NULL);
+
+	    if (mbtowc(&ws2, s2_mb, s2_mb_len) <= 0) {
+		mbtowc(NULL, NULL, 0);
+		s2_mb[0] = *s2;
+		s2_mb[1] = '\0';
+		bad_s2_mb = TRUE;
+	    } else {
+		s2_mb_len = wctomb(s2_mb, towlower(ws2));
+
+		if (s2_mb_len <= 0) {
+		    wctomb(NULL, 0);
+		    s2_mb_len = 0;
+		}
+
+		s2_mb[s2_mb_len] = '\0';
+	    }
+
+	    if (n == 0 || bad_s1_mb != bad_s2_mb ||
+		(retval = strcoll(s1_mb, s2_mb)) != 0)
+		break;
+
+	    s1 += s1_mb_len;
+	    s2 += s2_mb_len;
+	    n--;
+	}
+
+	free(s1_mb);
+	free(s2_mb);
+
+	return retval;
+    } else
+#endif
+	return strncasecoll(s1, s2, n);
+}
+#endif /* !DISABLE_TABCOMP || !DISABLE_BROWSER */
+
 #ifndef HAVE_STRCASESTR
 /* This function is equivalent to strcasestr().  It was adapted from
  * mutt's mutt_stristr() function. */
diff --git a/src/files.c b/src/files.c
index fddedc7d..7d32f071 100644
--- a/src/files.c
+++ b/src/files.c
@@ -1994,7 +1994,8 @@ char *real_dir_from_tilde(const char *buf)
 
 #if !defined(DISABLE_TABCOMP) || !defined(DISABLE_BROWSER)
 /* Our sort routine for file listings.  Sort alphabetically and
- * case-insensitively, and sort directories before filenames. */
+ * case-insensitively (taking the locale into account), and sort
+ * directories before filenames. */
 int diralphasort(const void *va, const void *vb)
 {
     struct stat fileinfo;
@@ -2008,7 +2009,7 @@ int diralphasort(const void *va, const void *vb)
     if (!aisdir && bisdir)
 	return 1;
 
-    return mbstrcasecmp(a, b);
+    return mbstrcasecoll(a, b);
 }
 
 /* Free the memory allocated for array, which should contain len
diff --git a/src/nano.h b/src/nano.h
index df92597c..7ba24bf2 100644
--- a/src/nano.h
+++ b/src/nano.h
@@ -120,6 +120,12 @@
 #ifndef HAVE_STRNCASECMP
 #define strncasecmp nstrncasecmp
 #endif
+#ifndef HAVE_STRCASECOLL
+#define strcasecoll nstrcasecoll
+#endif
+#ifndef HAVE_STRNCASECOLL
+#define strncasecoll nstrncasecoll
+#endif
 #ifndef HAVE_STRCASESTR
 #define strcasestr nstrcasestr
 #endif
diff --git a/src/proto.h b/src/proto.h
index 6465bf59..4ceccb76 100644
--- a/src/proto.h
+++ b/src/proto.h
@@ -195,6 +195,16 @@ int mbstrcasecmp(const char *s1, const char *s2);
 int nstrncasecmp(const char *s1, const char *s2, size_t n);
 #endif
 int mbstrncasecmp(const char *s1, const char *s2, size_t n);
+#if !defined(DISABLE_TABCOMP) || !defined(DISABLE_BROWSER)
+#ifndef HAVE_STRCASECOLL
+int nstrcasecoll(const char *s1, const char *s2);
+#endif
+#ifndef HAVE_STRNCASECOLL
+int nstrncasecoll(const char *s1, const char *s2, size_t n);
+#endif
+int mbstrcasecoll(const char *s1, const char *s2);
+int mbstrncasecoll(const char *s1, const char *s2, size_t n);
+#endif
 #ifndef HAVE_STRCASESTR
 const char *nstrcasestr(const char *haystack, const char *needle);
 #endif
-- 
GitLab