From 6f12992cea6f7cb9a6e30f3dfafa50a25a8e17a6 Mon Sep 17 00:00:00 2001
From: Benno Schulenberg <bensberg@justemail.net>
Date: Thu, 30 Jun 2016 18:02:45 +0200
Subject: [PATCH] new feature: add the option --wordchars, to set extra word
 characters

This allows the user to specify which other characters, besides the
default alphanumeric ones, should be considered as part of a word, so
that word operations like Ctrl+Left and Ctrl+Right will pass them by.

Using this option overrides the option --wordbounds.

This fulfills https://savannah.gnu.org/bugs/?47283.
---
 doc/man/nano.1        |  7 ++++++-
 doc/man/nanorc.5      |  7 ++++++-
 doc/nanorc.sample.in  |  7 ++++++-
 doc/texinfo/nano.texi | 15 +++++++++++++--
 src/chars.c           | 21 ++++++++++++++++-----
 src/global.c          |  4 ++++
 src/nano.c            | 16 ++++++++++++++--
 src/proto.h           |  3 +++
 src/rcfile.c          |  4 ++++
 src/utils.c           |  4 ++--
 10 files changed, 74 insertions(+), 14 deletions(-)

diff --git a/doc/man/nano.1 b/doc/man/nano.1
index fb4182ba..4d78208b 100644
--- a/doc/man/nano.1
+++ b/doc/man/nano.1
@@ -148,9 +148,14 @@ keystroke instead of 25.  Note that \fB\-c\fP overrides this.
 Show the current version number and exit.
 .TP
 .BR \-W ", " \-\-wordbounds
-Detect word boundaries more accurately by treating punctuation
+Detect word boundaries differently by treating punctuation
 characters as part of a word.
 .TP
+.BR "\-X ""\fIcharacters\fB""" ", " "\-\-wordchars=""" \fIcharacters """
+Specify which other characters (besides the normal alphanumeric ones)
+should be considered as part of a word.  This overrides option
+\fB\-W\fR (\fB\-\-wordbounds\fR).
+.TP
 .BR \-Y\ \fIname\fR ", " \-\-syntax= \fIname
 Specify the name of the syntax highlighting to use from among the ones
 defined in the \fInanorc\fP files.
diff --git a/doc/man/nanorc.5 b/doc/man/nanorc.5
index acacac3a..7c26b59c 100644
--- a/doc/man/nanorc.5
+++ b/doc/man/nanorc.5
@@ -253,8 +253,13 @@ Set the two characters used to indicate the presence of tabs and
 spaces.  They must be single-column characters.
 .TP
 .B set wordbounds
-Detect word boundaries more accurately by treating punctuation
+Detect word boundaries differently by treating punctuation
 characters as parts of words.
+.TP
+.B set wordchars \fIstring\fP
+Specify which other characters (besides the normal alphanumeric ones)
+should be considered as parts of words.  This overrides the option
+\fBwordbounds\fR.
 
 .SH SYNTAX HIGHLIGHTING
 Coloring the different syntactic elements of a file
diff --git a/doc/nanorc.sample.in b/doc/nanorc.sample.in
index 47d3bc76..3a1bfe06 100644
--- a/doc/nanorc.sample.in
+++ b/doc/nanorc.sample.in
@@ -178,10 +178,15 @@
 ## The default otherwise:
 # set whitespace ">."
 
-## Detect word boundaries more accurately by treating punctuation
+## Detect word boundaries differently by treating punctuation
 ## characters as parts of words.
 # set wordbounds
 
+## The characters (besides alphanumeric ones) that should be considered
+## as parts of words.  This option does not have a default value.  When
+## set, it overrides option 'set wordbounds'.
+# set wordchars "<_>."
+
 
 ## Paint the interface elements of nano.
 ## This is an example; by default there are no colors.
diff --git a/doc/texinfo/nano.texi b/doc/texinfo/nano.texi
index 4b40cb0b..509d749a 100644
--- a/doc/texinfo/nano.texi
+++ b/doc/texinfo/nano.texi
@@ -231,9 +231,15 @@ Show the current version number and exit.
 
 @item -W
 @itemx --wordbounds
-Detect word boundaries more accurately by treating punctuation
+Detect word boundaries differently by treating punctuation
 characters as parts of words.
 
+@item -X "@var{characters}"
+@itemx --wordchars="@var{characters}"
+Specify which other characters (besides the normal alphanumeric ones)
+should be considered as parts of words.  This overrides option
+@option{-W} (@option{--wordbounds}).
+
 @item -Y @var{name}
 @itemx --syntax=@var{name}
 Specify a specific syntax from the nanorc files to use for highlighting.
@@ -831,9 +837,14 @@ spaces.  They must be single-column characters.  The default pair
 for a UTF-8 locale is @t{"Â»Â·"}, and for other locales @t{">."}.
 
 @item set wordbounds
-Detect word boundaries more accurately by treating punctuation
+Detect word boundaries differently by treating punctuation
 characters as part of a word.
 
+@item set wordchars "@var{string}"
+Specify which other characters (besides the normal alphanumeric ones)
+should be considered as parts of words.  This overrides the option
+@code{wordbounds}.
+
 @end table
 
 @node Syntax Highlighting
diff --git a/src/chars.c b/src/chars.c
index e2852cee..4a6def3b 100644
--- a/src/chars.c
+++ b/src/chars.c
@@ -183,15 +183,26 @@ bool is_punct_mbchar(const char *c)
 	return ispunct((unsigned char)*c);
 }
 
-/* Return TRUE for a multibyte character found in a word (currently only
- * an alphanumeric or punctuation character, and only the latter if
- * allow_punct is TRUE) and FALSE otherwise. */
+/* Return TRUE when the given multibyte character c is a word-forming
+ * character (that is: alphanumeric, or specified in wordchars, or
+ * punctuation when allow_punct is TRUE), and FALSE otherwise. */
 bool is_word_mbchar(const char *c, bool allow_punct)
 {
     assert(c != NULL);
 
-    return is_alnum_mbchar(c) || (allow_punct ? is_punct_mbchar(c) :
-	FALSE);
+    if (is_alnum_mbchar(c))
+	return TRUE;
+
+    if (word_chars != NULL && *word_chars != '\0') {
+	char *symbol = charalloc(MB_CUR_MAX + 1);
+	int symlen = parse_mbchar(c, symbol, NULL);
+
+	symbol[symlen] = '\0';
+
+	return (strstr(word_chars, symbol) != NULL);
+    }
+
+    return (allow_punct && is_punct_mbchar(c));
 }
 
 /* Return the visible representation of control character c. */
diff --git a/src/global.c b/src/global.c
index c9135a7f..ca7d4e1e 100644
--- a/src/global.c
+++ b/src/global.c
@@ -124,6 +124,9 @@ size_t quotelen;
 #endif
 #endif
 
+char *word_chars = NULL;
+	/* Nonalphanumeric characters that also form words. */
+
 bool nodelay_mode = FALSE;
 	/* Are we checking for a cancel wile doing something? */
 
@@ -1669,6 +1672,7 @@ void thanks_for_all_the_fish(void)
     delwin(edit);
     delwin(bottomwin);
 
+    free(word_chars);
 #ifndef DISABLE_JUSTIFY
     free(quotestr);
 #ifdef HAVE_REGEX_H
diff --git a/src/nano.c b/src/nano.c
index 0d5cc471..93e746dc 100644
--- a/src/nano.c
+++ b/src/nano.c
@@ -860,6 +860,8 @@ void usage(void)
 #ifndef NANO_TINY
     print_opt("-W", "--wordbounds",
 	N_("Detect word boundaries more accurately"));
+    print_opt("-X", "--wordchars",
+	N_("Which other characters are word parts"));
 #endif
 #ifndef DISABLE_COLOR
     if (!ISSET(RESTRICTED))
@@ -1995,6 +1997,7 @@ int main(int argc, char **argv)
 	{"smooth", 0, NULL, 'S'},
 	{"quickblank", 0, NULL, 'U'},
 	{"wordbounds", 0, NULL, 'W'},
+	{"wordchars", 1, NULL, 'X'},
 	{"autoindent", 0, NULL, 'i'},
 	{"cut", 0, NULL, 'k'},
 	{"unix", 0, NULL, 'u'},
@@ -2040,11 +2043,11 @@ int main(int argc, char **argv)
     while ((optchr =
 #ifdef HAVE_GETOPT_LONG
 	getopt_long(argc, argv,
-		"ABC:DEFGHIKLNOPQ:RST:UVWY:abcdefghijklmno:pqr:s:tuvwxz$",
+		"ABC:DEFGHIKLNOPQ:RST:UVWX:Y:abcdefghijklmno:pqr:s:tuvwxz$",
 		long_options, NULL)
 #else
 	getopt(argc, argv,
-		"ABC:DEFGHIKLNOPQ:RST:UVWY:abcdefghijklmno:pqr:s:tuvwxz$")
+		"ABC:DEFGHIKLNOPQ:RST:UVWX:Y:abcdefghijklmno:pqr:s:tuvwxz$")
 #endif
 		) != -1) {
 	switch (optchr) {
@@ -2146,6 +2149,9 @@ int main(int argc, char **argv)
 	    case 'W':
 		SET(WORD_BOUNDS);
 		break;
+	    case 'X':
+		word_chars = mallocstrcpy(word_chars, optarg);
+		break;
 #endif
 #ifndef DISABLE_COLOR
 	    case 'Y':
@@ -2279,6 +2285,7 @@ int main(int argc, char **argv)
 #endif
 #ifndef NANO_TINY
 	char *backup_dir_cpy = backup_dir;
+	char *word_chars_cpy = word_chars;
 #endif
 #ifndef DISABLE_JUSTIFY
 	char *quotestr_cpy = quotestr;
@@ -2297,6 +2304,7 @@ int main(int argc, char **argv)
 #endif
 #ifndef NANO_TINY
 	backup_dir = NULL;
+	word_chars = NULL;
 #endif
 #ifndef DISABLE_JUSTIFY
 	quotestr = NULL;
@@ -2327,6 +2335,10 @@ int main(int argc, char **argv)
 	    free(backup_dir);
 	    backup_dir = backup_dir_cpy;
 	}
+	if (word_chars_cpy != NULL) {
+	    free(word_chars);
+	    word_chars = word_chars_cpy;
+	}
 #endif
 #ifndef DISABLE_JUSTIFY
 	if (quotestr_cpy != NULL) {
diff --git a/src/proto.h b/src/proto.h
index 497d985a..23c1204a 100644
--- a/src/proto.h
+++ b/src/proto.h
@@ -91,7 +91,10 @@ extern size_t quotelen;
 #endif
 #endif /* !DISABLE_JUSTIFY */
 
+extern char *word_chars;
+
 extern bool nodelay_mode;
+
 extern char *answer;
 
 extern ssize_t tabsize;
diff --git a/src/rcfile.c b/src/rcfile.c
index 0e57c68a..c9f6b538 100644
--- a/src/rcfile.c
+++ b/src/rcfile.c
@@ -102,6 +102,7 @@ static const rcoption rcopts[] = {
     {"unix", MAKE_IT_UNIX},
     {"whitespace", 0},
     {"wordbounds", WORD_BOUNDS},
+    {"wordchars", 0},
 #endif
 #ifndef DISABLE_COLOR
     {"titlecolor", 0},
@@ -1177,6 +1178,9 @@ void parse_rcfile(FILE *rcstream
 	if (strcasecmp(rcopts[i].name, "backupdir") == 0)
 	    backup_dir = option;
 	else
+	if (strcasecmp(rcopts[i].name, "wordchars") == 0)
+	    word_chars = option;
+	else
 #endif
 #ifndef DISABLE_SPELLER
 	if (strcasecmp(rcopts[i].name, "speller") == 0)
diff --git a/src/utils.c b/src/utils.c
index 4b661501..470f15bb 100644
--- a/src/utils.c
+++ b/src/utils.c
@@ -294,8 +294,8 @@ bool is_separate_word(size_t position, size_t length, const char *buf)
      * word isn't a non-punctuation "word" character, and if we're at
      * the end of the line or the character after the word isn't a
      * non-punctuation "word" character, we have a whole word. */
-    retval = (position == 0 || !is_word_mbchar(before, FALSE)) &&
-		(word_end == strlen(buf) || !is_word_mbchar(after, FALSE));
+    retval = (position == 0 || !is_alnum_mbchar(before)) &&
+		(word_end == strlen(buf) || !is_alnum_mbchar(after));
 
     free(before);
     free(after);
-- 
GitLab