From 6f12992cea6f7cb9a6e30f3dfafa50a25a8e17a6 Mon Sep 17 00:00:00 2001 From: Benno Schulenberg <bensberg@justemail.net> Date: Thu, 30 Jun 2016 18:02:45 +0200 Subject: [PATCH] new feature: add the option --wordchars, to set extra word characters This allows the user to specify which other characters, besides the default alphanumeric ones, should be considered as part of a word, so that word operations like Ctrl+Left and Ctrl+Right will pass them by. Using this option overrides the option --wordbounds. This fulfills https://savannah.gnu.org/bugs/?47283. --- doc/man/nano.1 | 7 ++++++- doc/man/nanorc.5 | 7 ++++++- doc/nanorc.sample.in | 7 ++++++- doc/texinfo/nano.texi | 15 +++++++++++++-- src/chars.c | 21 ++++++++++++++++----- src/global.c | 4 ++++ src/nano.c | 16 ++++++++++++++-- src/proto.h | 3 +++ src/rcfile.c | 4 ++++ src/utils.c | 4 ++-- 10 files changed, 74 insertions(+), 14 deletions(-) diff --git a/doc/man/nano.1 b/doc/man/nano.1 index fb4182ba..4d78208b 100644 --- a/doc/man/nano.1 +++ b/doc/man/nano.1 @@ -148,9 +148,14 @@ keystroke instead of 25. Note that \fB\-c\fP overrides this. Show the current version number and exit. .TP .BR \-W ", " \-\-wordbounds -Detect word boundaries more accurately by treating punctuation +Detect word boundaries differently by treating punctuation characters as part of a word. .TP +.BR "\-X ""\fIcharacters\fB""" ", " "\-\-wordchars=""" \fIcharacters """ +Specify which other characters (besides the normal alphanumeric ones) +should be considered as part of a word. This overrides option +\fB\-W\fR (\fB\-\-wordbounds\fR). +.TP .BR \-Y\ \fIname\fR ", " \-\-syntax= \fIname Specify the name of the syntax highlighting to use from among the ones defined in the \fInanorc\fP files. diff --git a/doc/man/nanorc.5 b/doc/man/nanorc.5 index acacac3a..7c26b59c 100644 --- a/doc/man/nanorc.5 +++ b/doc/man/nanorc.5 @@ -253,8 +253,13 @@ Set the two characters used to indicate the presence of tabs and spaces. They must be single-column characters. .TP .B set wordbounds -Detect word boundaries more accurately by treating punctuation +Detect word boundaries differently by treating punctuation characters as parts of words. +.TP +.B set wordchars \fIstring\fP +Specify which other characters (besides the normal alphanumeric ones) +should be considered as parts of words. This overrides the option +\fBwordbounds\fR. .SH SYNTAX HIGHLIGHTING Coloring the different syntactic elements of a file diff --git a/doc/nanorc.sample.in b/doc/nanorc.sample.in index 47d3bc76..3a1bfe06 100644 --- a/doc/nanorc.sample.in +++ b/doc/nanorc.sample.in @@ -178,10 +178,15 @@ ## The default otherwise: # set whitespace ">." -## Detect word boundaries more accurately by treating punctuation +## Detect word boundaries differently by treating punctuation ## characters as parts of words. # set wordbounds +## The characters (besides alphanumeric ones) that should be considered +## as parts of words. This option does not have a default value. When +## set, it overrides option 'set wordbounds'. +# set wordchars "<_>." + ## Paint the interface elements of nano. ## This is an example; by default there are no colors. diff --git a/doc/texinfo/nano.texi b/doc/texinfo/nano.texi index 4b40cb0b..509d749a 100644 --- a/doc/texinfo/nano.texi +++ b/doc/texinfo/nano.texi @@ -231,9 +231,15 @@ Show the current version number and exit. @item -W @itemx --wordbounds -Detect word boundaries more accurately by treating punctuation +Detect word boundaries differently by treating punctuation characters as parts of words. +@item -X "@var{characters}" +@itemx --wordchars="@var{characters}" +Specify which other characters (besides the normal alphanumeric ones) +should be considered as parts of words. This overrides option +@option{-W} (@option{--wordbounds}). + @item -Y @var{name} @itemx --syntax=@var{name} Specify a specific syntax from the nanorc files to use for highlighting. @@ -831,9 +837,14 @@ spaces. They must be single-column characters. The default pair for a UTF-8 locale is @t{"»·"}, and for other locales @t{">."}. @item set wordbounds -Detect word boundaries more accurately by treating punctuation +Detect word boundaries differently by treating punctuation characters as part of a word. +@item set wordchars "@var{string}" +Specify which other characters (besides the normal alphanumeric ones) +should be considered as parts of words. This overrides the option +@code{wordbounds}. + @end table @node Syntax Highlighting diff --git a/src/chars.c b/src/chars.c index e2852cee..4a6def3b 100644 --- a/src/chars.c +++ b/src/chars.c @@ -183,15 +183,26 @@ bool is_punct_mbchar(const char *c) return ispunct((unsigned char)*c); } -/* Return TRUE for a multibyte character found in a word (currently only - * an alphanumeric or punctuation character, and only the latter if - * allow_punct is TRUE) and FALSE otherwise. */ +/* Return TRUE when the given multibyte character c is a word-forming + * character (that is: alphanumeric, or specified in wordchars, or + * punctuation when allow_punct is TRUE), and FALSE otherwise. */ bool is_word_mbchar(const char *c, bool allow_punct) { assert(c != NULL); - return is_alnum_mbchar(c) || (allow_punct ? is_punct_mbchar(c) : - FALSE); + if (is_alnum_mbchar(c)) + return TRUE; + + if (word_chars != NULL && *word_chars != '\0') { + char *symbol = charalloc(MB_CUR_MAX + 1); + int symlen = parse_mbchar(c, symbol, NULL); + + symbol[symlen] = '\0'; + + return (strstr(word_chars, symbol) != NULL); + } + + return (allow_punct && is_punct_mbchar(c)); } /* Return the visible representation of control character c. */ diff --git a/src/global.c b/src/global.c index c9135a7f..ca7d4e1e 100644 --- a/src/global.c +++ b/src/global.c @@ -124,6 +124,9 @@ size_t quotelen; #endif #endif +char *word_chars = NULL; + /* Nonalphanumeric characters that also form words. */ + bool nodelay_mode = FALSE; /* Are we checking for a cancel wile doing something? */ @@ -1669,6 +1672,7 @@ void thanks_for_all_the_fish(void) delwin(edit); delwin(bottomwin); + free(word_chars); #ifndef DISABLE_JUSTIFY free(quotestr); #ifdef HAVE_REGEX_H diff --git a/src/nano.c b/src/nano.c index 0d5cc471..93e746dc 100644 --- a/src/nano.c +++ b/src/nano.c @@ -860,6 +860,8 @@ void usage(void) #ifndef NANO_TINY print_opt("-W", "--wordbounds", N_("Detect word boundaries more accurately")); + print_opt("-X", "--wordchars", + N_("Which other characters are word parts")); #endif #ifndef DISABLE_COLOR if (!ISSET(RESTRICTED)) @@ -1995,6 +1997,7 @@ int main(int argc, char **argv) {"smooth", 0, NULL, 'S'}, {"quickblank", 0, NULL, 'U'}, {"wordbounds", 0, NULL, 'W'}, + {"wordchars", 1, NULL, 'X'}, {"autoindent", 0, NULL, 'i'}, {"cut", 0, NULL, 'k'}, {"unix", 0, NULL, 'u'}, @@ -2040,11 +2043,11 @@ int main(int argc, char **argv) while ((optchr = #ifdef HAVE_GETOPT_LONG getopt_long(argc, argv, - "ABC:DEFGHIKLNOPQ:RST:UVWY:abcdefghijklmno:pqr:s:tuvwxz$", + "ABC:DEFGHIKLNOPQ:RST:UVWX:Y:abcdefghijklmno:pqr:s:tuvwxz$", long_options, NULL) #else getopt(argc, argv, - "ABC:DEFGHIKLNOPQ:RST:UVWY:abcdefghijklmno:pqr:s:tuvwxz$") + "ABC:DEFGHIKLNOPQ:RST:UVWX:Y:abcdefghijklmno:pqr:s:tuvwxz$") #endif ) != -1) { switch (optchr) { @@ -2146,6 +2149,9 @@ int main(int argc, char **argv) case 'W': SET(WORD_BOUNDS); break; + case 'X': + word_chars = mallocstrcpy(word_chars, optarg); + break; #endif #ifndef DISABLE_COLOR case 'Y': @@ -2279,6 +2285,7 @@ int main(int argc, char **argv) #endif #ifndef NANO_TINY char *backup_dir_cpy = backup_dir; + char *word_chars_cpy = word_chars; #endif #ifndef DISABLE_JUSTIFY char *quotestr_cpy = quotestr; @@ -2297,6 +2304,7 @@ int main(int argc, char **argv) #endif #ifndef NANO_TINY backup_dir = NULL; + word_chars = NULL; #endif #ifndef DISABLE_JUSTIFY quotestr = NULL; @@ -2327,6 +2335,10 @@ int main(int argc, char **argv) free(backup_dir); backup_dir = backup_dir_cpy; } + if (word_chars_cpy != NULL) { + free(word_chars); + word_chars = word_chars_cpy; + } #endif #ifndef DISABLE_JUSTIFY if (quotestr_cpy != NULL) { diff --git a/src/proto.h b/src/proto.h index 497d985a..23c1204a 100644 --- a/src/proto.h +++ b/src/proto.h @@ -91,7 +91,10 @@ extern size_t quotelen; #endif #endif /* !DISABLE_JUSTIFY */ +extern char *word_chars; + extern bool nodelay_mode; + extern char *answer; extern ssize_t tabsize; diff --git a/src/rcfile.c b/src/rcfile.c index 0e57c68a..c9f6b538 100644 --- a/src/rcfile.c +++ b/src/rcfile.c @@ -102,6 +102,7 @@ static const rcoption rcopts[] = { {"unix", MAKE_IT_UNIX}, {"whitespace", 0}, {"wordbounds", WORD_BOUNDS}, + {"wordchars", 0}, #endif #ifndef DISABLE_COLOR {"titlecolor", 0}, @@ -1177,6 +1178,9 @@ void parse_rcfile(FILE *rcstream if (strcasecmp(rcopts[i].name, "backupdir") == 0) backup_dir = option; else + if (strcasecmp(rcopts[i].name, "wordchars") == 0) + word_chars = option; + else #endif #ifndef DISABLE_SPELLER if (strcasecmp(rcopts[i].name, "speller") == 0) diff --git a/src/utils.c b/src/utils.c index 4b661501..470f15bb 100644 --- a/src/utils.c +++ b/src/utils.c @@ -294,8 +294,8 @@ bool is_separate_word(size_t position, size_t length, const char *buf) * word isn't a non-punctuation "word" character, and if we're at * the end of the line or the character after the word isn't a * non-punctuation "word" character, we have a whole word. */ - retval = (position == 0 || !is_word_mbchar(before, FALSE)) && - (word_end == strlen(buf) || !is_word_mbchar(after, FALSE)); + retval = (position == 0 || !is_alnum_mbchar(before)) && + (word_end == strlen(buf) || !is_alnum_mbchar(after)); free(before); free(after); -- GitLab