Commit 67287088 authored by David Lawrence Ramsey's avatar David Lawrence Ramsey
Browse files

detect words more accurately by taking punctuation into account, and

convert all word-detecting functions to use the same wrapper function
for ease of maintenance


git-svn-id: svn://svn.savannah.gnu.org/nano/trunk/nano@2640 35c25a1d-7b9e-4130-9fde-d3aeb78583b8
parent d16d0bfb
Showing with 58 additions and 19 deletions
+58 -19
...@@ -65,6 +65,12 @@ CVS code - ...@@ -65,6 +65,12 @@ CVS code -
this is disabled when NANO_SMALL is defined. New functions this is disabled when NANO_SMALL is defined. New functions
do_word_count() and do_next_word_void(); changes to do_word_count() and do_next_word_void(); changes to
shortcut_init() and do_next_word(). (DLR) shortcut_init() and do_next_word(). (DLR)
- Detect words more accurately by taking punctuation into
account, and convert all word-detecting functions to use the
same wrapper function for ease of maintenance. New functions
is_punct_mbchar() and is_word_mbchar(); changes to
do_next_word(), do_prev_word(), is_whole_word(),
do_statusbar_next_word(), and do_statusbar_prev_word(). (DLR)
- chars.c: - chars.c:
make_mbstring() make_mbstring()
- Change erroneous ENABLE_EXTRA #ifdef to NANO_EXTRA to fix a - Change erroneous ENABLE_EXTRA #ifdef to NANO_EXTRA to fix a
...@@ -233,6 +239,7 @@ CVS code - ...@@ -233,6 +239,7 @@ CVS code -
Weinehall) Weinehall)
- Don't refer to the built-in file browser as crappy anymore. - Don't refer to the built-in file browser as crappy anymore.
(DLR) (DLR)
- Check for iswpunct(). (DLR)
- doc/faq.html: - doc/faq.html:
- Update the question about the FAQ to mention the current - Update the question about the FAQ to mention the current
maintainer. (DLR) maintainer. (DLR)
......
...@@ -399,7 +399,7 @@ dnl Checks for functions. ...@@ -399,7 +399,7 @@ dnl Checks for functions.
AC_CHECK_FUNCS(snprintf vsnprintf isblank strcasecmp strncasecmp strcasestr strnlen getline getdelim) AC_CHECK_FUNCS(snprintf vsnprintf isblank strcasecmp strncasecmp strcasestr strnlen getline getdelim)
if test x$enable_utf8 != xno; then if test x$enable_utf8 != xno; then
AC_CHECK_FUNCS(iswalnum mblen mbtowc wctomb wcwidth iswspace iswblank) AC_CHECK_FUNCS(iswalnum iswblank iswpunct iswspace mblen mbtowc wctomb wcwidth)
fi fi
if test x$ac_cv_func_snprintf = xno || test x$ac_cv_func_vsnprintf = xno; then if test x$ac_cv_func_snprintf = xno || test x$ac_cv_func_vsnprintf = xno; then
...@@ -472,12 +472,13 @@ fi ...@@ -472,12 +472,13 @@ fi
if test x$enable_utf8 != xno && \ if test x$enable_utf8 != xno && \
test x$CURSES_LIB_WIDE = xyes && \ test x$CURSES_LIB_WIDE = xyes && \
test x$ac_cv_func_iswalnum = xyes && \ test x$ac_cv_func_iswalnum = xyes && \
test x$ac_cv_func_iswpunct = xyes && \
(test x$ac_cv_func_iswblank = xyes || test x$ac_cv_func_iswspace = xyes) && \
test x$ac_cv_func_mblen = xyes && \ test x$ac_cv_func_mblen = xyes && \
test x$ac_cv_func_mbtowc = xyes && \ test x$ac_cv_func_mbtowc = xyes && \
test x$ac_cv_func_wctomb = xyes && \ test x$ac_cv_func_wctomb = xyes && \
test x$ac_cv_func_wcwidth = xyes && \ test x$ac_cv_func_wcwidth = xyes; then
(test x$ac_cv_func_iswspace = xyes || test x$ac_cv_func_iswblank = xyes); then AC_DEFINE(NANO_WIDE, 1, [Define this if your system has sufficient wide character support (a wide curses library, iswalnum(), iswpunct(), iswblank() or iswspace(), mblen(), mbtowc(), wctomb(), and wcwidth()).])
AC_DEFINE(NANO_WIDE, 1, [Define this if your system has sufficient wide character support (a wide curses library, iswalnum(), iswspace() or iswblank(), mblen(), mbtowc(), wctomb(), and wcwidth()).])
else else
if test x$enable_utf8 = xyes; then if test x$enable_utf8 = xyes; then
AC_MSG_ERROR([ AC_MSG_ERROR([
......
...@@ -146,6 +146,35 @@ bool is_cntrl_mbchar(const char *c) ...@@ -146,6 +146,35 @@ bool is_cntrl_mbchar(const char *c)
return is_cntrl_char((unsigned char)*c); return is_cntrl_char((unsigned char)*c);
} }
/* This function is equivalent to ispunct() for multibyte characters. */
bool is_punct_mbchar(const char *c)
{
assert(c != NULL);
#ifdef NANO_WIDE
if (!ISSET(NO_UTF8)) {
wchar_t wc;
int c_mb_len = mbtowc(&wc, c, MB_CUR_MAX);
if (c_mb_len <= 0) {
mbtowc(NULL, NULL, 0);
wc = (unsigned char)*c;
}
return iswpunct(wc);
} else
#endif
return ispunct((unsigned char)*c);
}
/* This function returns TRUE for a multibyte character found in a word
* (currently only an alphanumeric or punctuation character) and FALSE
* otherwise. */
bool is_word_mbchar(const char *c)
{
return is_alnum_mbchar(c) || is_punct_mbchar(c);
}
/* c is a control character. It displays as ^@, ^?, or ^[ch], where ch /* c is a control character. It displays as ^@, ^?, or ^[ch], where ch
* is c + 64. We return that character. */ * is c + 64. We return that character. */
char control_rep(char c) char control_rep(char c)
......
...@@ -1468,7 +1468,7 @@ bool do_next_word(bool allow_update) ...@@ -1468,7 +1468,7 @@ bool do_next_word(bool allow_update)
/* If we've found it, stop moving forward through the current /* If we've found it, stop moving forward through the current
* line. */ * line. */
if (!is_alnum_mbchar(char_mb)) if (!is_word_mbchar(char_mb))
break; break;
/* If we haven't found it, then we've started on a word, so set /* If we haven't found it, then we've started on a word, so set
* started_on_word to TRUE. */ * started_on_word to TRUE. */
...@@ -1489,7 +1489,7 @@ bool do_next_word(bool allow_update) ...@@ -1489,7 +1489,7 @@ bool do_next_word(bool allow_update)
/* If we've found it, stop moving forward through the /* If we've found it, stop moving forward through the
* current line. */ * current line. */
if (is_alnum_mbchar(char_mb)) if (is_word_mbchar(char_mb))
break; break;
current_x += char_mb_len; current_x += char_mb_len;
...@@ -1546,7 +1546,7 @@ void do_prev_word(void) ...@@ -1546,7 +1546,7 @@ void do_prev_word(void)
/* If we've found it, stop moving backward through the current /* If we've found it, stop moving backward through the current
* line. */ * line. */
if (!is_alnum_mbchar(char_mb)) if (!is_word_mbchar(char_mb))
break; break;
if (current_x == 0) if (current_x == 0)
...@@ -1569,7 +1569,7 @@ void do_prev_word(void) ...@@ -1569,7 +1569,7 @@ void do_prev_word(void)
/* If we've found it, stop moving backward through the /* If we've found it, stop moving backward through the
* current line. */ * current line. */
if (is_alnum_mbchar(char_mb)) if (is_word_mbchar(char_mb))
break; break;
if (current_x == 0) if (current_x == 0)
...@@ -1608,7 +1608,7 @@ void do_prev_word(void) ...@@ -1608,7 +1608,7 @@ void do_prev_word(void)
/* If we've found it, stop moving backward through the /* If we've found it, stop moving backward through the
* current line. */ * current line. */
if (!is_alnum_mbchar(char_mb)) if (!is_word_mbchar(char_mb))
break; break;
if (current_x == 0) if (current_x == 0)
......
...@@ -173,6 +173,8 @@ bool is_cntrl_char(int c); ...@@ -173,6 +173,8 @@ bool is_cntrl_char(int c);
bool is_cntrl_wchar(wint_t wc); bool is_cntrl_wchar(wint_t wc);
#endif #endif
bool is_cntrl_mbchar(const char *c); bool is_cntrl_mbchar(const char *c);
bool is_punct_mbchar(const char *c);
bool is_word_mbchar(const char *c);
char control_rep(char c); char control_rep(char c);
#ifdef NANO_WIDE #ifdef NANO_WIDE
wchar_t control_wrep(wchar_t c); wchar_t control_wrep(wchar_t c);
......
...@@ -273,11 +273,11 @@ bool is_whole_word(size_t pos, const char *buf, const char *word) ...@@ -273,11 +273,11 @@ bool is_whole_word(size_t pos, const char *buf, const char *word)
parse_mbchar(buf + word_end, r, NULL, NULL); parse_mbchar(buf + word_end, r, NULL, NULL);
/* If we're at the beginning of the line or the character before the /* If we're at the beginning of the line or the character before the
* word isn't an alphanumeric character, and if we're at the end of * word isn't a "word" character, and if we're at the end of the
* the line or the character after the word isn't an alphanumeric * line or the character after the word isn't a "word" character, we
* character, we have a whole word. */ * have a whole word. */
retval = (pos == 0 || !is_alnum_mbchar(p)) && retval = (pos == 0 || !is_word_mbchar(p)) &&
(word_end == strlen(buf) || !is_alnum_mbchar(r)); (word_end == strlen(buf) || !is_word_mbchar(r));
free(p); free(p);
free(r); free(r);
......
...@@ -1917,7 +1917,7 @@ void do_statusbar_next_word(void) ...@@ -1917,7 +1917,7 @@ void do_statusbar_next_word(void)
/* If we've found it, stop moving forward through the current /* If we've found it, stop moving forward through the current
* line. */ * line. */
if (!is_alnum_mbchar(char_mb)) if (!is_word_mbchar(char_mb))
break; break;
statusbar_x += char_mb_len; statusbar_x += char_mb_len;
...@@ -1933,7 +1933,7 @@ void do_statusbar_next_word(void) ...@@ -1933,7 +1933,7 @@ void do_statusbar_next_word(void)
/* If we've found it, stop moving forward through the current /* If we've found it, stop moving forward through the current
* line. */ * line. */
if (is_alnum_mbchar(char_mb)) if (is_word_mbchar(char_mb))
break; break;
statusbar_x += char_mb_len; statusbar_x += char_mb_len;
...@@ -1960,7 +1960,7 @@ void do_statusbar_prev_word(void) ...@@ -1960,7 +1960,7 @@ void do_statusbar_prev_word(void)
/* If we've found it, stop moving backward through the current /* If we've found it, stop moving backward through the current
* line. */ * line. */
if (!is_alnum_mbchar(char_mb)) if (!is_word_mbchar(char_mb))
break; break;
if (statusbar_x == 0) if (statusbar_x == 0)
...@@ -1982,7 +1982,7 @@ void do_statusbar_prev_word(void) ...@@ -1982,7 +1982,7 @@ void do_statusbar_prev_word(void)
/* If we've found it, stop moving backward through the current /* If we've found it, stop moving backward through the current
* line. */ * line. */
if (is_alnum_mbchar(char_mb)) if (is_word_mbchar(char_mb))
break; break;
if (statusbar_x == 0) if (statusbar_x == 0)
...@@ -2005,7 +2005,7 @@ void do_statusbar_prev_word(void) ...@@ -2005,7 +2005,7 @@ void do_statusbar_prev_word(void)
/* If we've found it, stop moving backward through the /* If we've found it, stop moving backward through the
* current line. */ * current line. */
if (!is_alnum_mbchar(char_mb)) if (!is_word_mbchar(char_mb))
break; break;
if (statusbar_x == 0) if (statusbar_x == 0)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment