diff --git a/src/chars.c b/src/chars.c index cd19c4ae355e4bf4aa02a75541ed7488987d8994..2c3c2038b8aaef3e2164d6d7758762a515cab0b9 100644 --- a/src/chars.c +++ b/src/chars.c @@ -93,6 +93,26 @@ void wctomb_reset(void) IGNORE_CALL_RESULT(wctomb(NULL, 0)); } +/* This function is equivalent to isalpha() for multibyte characters. */ +bool is_alpha_mbchar(const char *c) +{ + assert(c != NULL); + +#ifdef ENABLE_UTF8 + if (use_utf8) { + wchar_t wc; + + if (mbtowc(&wc, c, MB_CUR_MAX) < 0) { + mbtowc_reset(); + return 0; + } + + return iswalpha(wc); + } else +#endif + return isalpha((unsigned char)*c); +} + /* This function is equivalent to isalnum() for multibyte characters. */ bool is_alnum_mbchar(const char *c) { diff --git a/src/proto.h b/src/proto.h index d7ae25d8d1066849d73e45f5c3a157a539e236f9..039ceb7f17f2928ee495e32da2f6c8a8f4a3ec14 100644 --- a/src/proto.h +++ b/src/proto.h @@ -183,6 +183,7 @@ bool nisblank(int c); bool niswblank(wchar_t wc); #endif bool is_byte(int c); +bool is_alpha_mbchar(const char *c); bool is_alnum_mbchar(const char *c); bool is_blank_mbchar(const char *c); bool is_ascii_cntrl_char(int c); diff --git a/src/utils.c b/src/utils.c index 470f15bbf07d3d658d17ffb4aa89afb867e9d825..67d90d892589a5f75cb286c836ed766427540f14 100644 --- a/src/utils.c +++ b/src/utils.c @@ -290,12 +290,11 @@ bool is_separate_word(size_t position, size_t length, const char *buf) parse_mbchar(buf + move_mbleft(buf, position), before, NULL); parse_mbchar(buf + word_end, after, NULL); - /* If we're at the beginning of the line or the character before the - * word isn't a non-punctuation "word" character, and if we're at - * the end of the line or the character after the word isn't a - * non-punctuation "word" character, we have a whole word. */ - retval = (position == 0 || !is_alnum_mbchar(before)) && - (word_end == strlen(buf) || !is_alnum_mbchar(after)); + /* If the word starts at the beginning of the line OR the character before + * the word isn't a letter, and if the word ends at the end of the line OR + * the character after the word isn't a letter, we have a whole word. */ + retval = (position == 0 || !is_alpha_mbchar(before)) && + (word_end == strlen(buf) || !is_alpha_mbchar(after)); free(before); free(after);