chars.c 20.2 KB
Newer Older
1
/**************************************************************************
2
 *   chars.c  --  This file is part of GNU nano.                          *
3
 *                                                                        *
4
5
 *   Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009,  *
 *   2010, 2011, 2013, 2014 Free Software Foundation, Inc.                *
6
7
 *   Copyright (C) 2016 Benno Schulenberg                                 *
 *                                                                        *
8
9
10
11
 *   GNU nano is free software: you can redistribute it and/or modify     *
 *   it under the terms of the GNU General Public License as published    *
 *   by the Free Software Foundation, either version 3 of the License,    *
 *   or (at your option) any later version.                               *
12
 *                                                                        *
13
14
15
16
 *   GNU nano is distributed in the hope that it will be useful,          *
 *   but WITHOUT ANY WARRANTY; without even the implied warranty          *
 *   of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.              *
 *   See the GNU General Public License for more details.                 *
17
18
 *                                                                        *
 *   You should have received a copy of the GNU General Public License    *
19
 *   along with this program.  If not, see http://www.gnu.org/licenses/.  *
20
21
22
 *                                                                        *
 **************************************************************************/

23
#include "proto.h"
24

25
#include <string.h>
26
27
#include <ctype.h>

28
#ifdef ENABLE_UTF8
29
#ifdef HAVE_WCHAR_H
30
31
#include <wchar.h>
#endif
32
#ifdef HAVE_WCTYPE_H
33
34
#include <wctype.h>
#endif
35

36
37
38
39
40
41
42
43
44
45
46
47
48
49
static bool use_utf8 = FALSE;
	/* Whether we've enabled UTF-8 support. */

/* Enable UTF-8 support. */
void utf8_init(void)
{
    use_utf8 = TRUE;
}

/* Is UTF-8 support enabled? */
bool using_utf8(void)
{
    return use_utf8;
}
Benno Schulenberg's avatar
Benno Schulenberg committed
50
#endif /* ENABLE_UTF8 */
51

52
/* Concatenate two allocated strings, and free the second. */
53
char *addstrings(char* str1, size_t len1, char* str2, size_t len2)
54
55
56
{
    str1 = charealloc(str1, len1 + len2 + 1);
    str1[len1] = '\0';
57

58
59
60
61
62
63
    strncat(&str1[len1], str2, len2);
    free(str2);

    return str1;
}

64
65
#ifndef HAVE_ISBLANK
/* This function is equivalent to isblank(). */
66
bool nisblank(int c)
67
{
68
    return isspace(c) && (c == '\t' || !is_cntrl_char(c));
69
}
70
#endif
71

72
#if !defined(HAVE_ISWBLANK) && defined(ENABLE_UTF8)
73
/* This function is equivalent to iswblank(). */
74
bool niswblank(wchar_t wc)
75
{
76
    return iswspace(wc) && (wc == '\t' || !is_cntrl_wchar(wc));
77
}
78
#endif
79

80
/* Return TRUE if the value of c is in byte range, and FALSE otherwise. */
81
82
83
84
85
bool is_byte(int c)
{
    return ((unsigned int)c == (unsigned char)c);
}

86
87
88
89
90
void mbtowc_reset(void)
{
    IGNORE_CALL_RESULT(mbtowc(NULL, NULL, 0));
}

91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
/* This function is equivalent to isalpha() for multibyte characters. */
bool is_alpha_mbchar(const char *c)
{
    assert(c != NULL);

#ifdef ENABLE_UTF8
    if (use_utf8) {
	wchar_t wc;

	if (mbtowc(&wc, c, MB_CUR_MAX) < 0) {
	    mbtowc_reset();
	    return 0;
	}

	return iswalpha(wc);
    } else
#endif
	return isalpha((unsigned char)*c);
}

111
112
113
114
115
/* This function is equivalent to isalnum() for multibyte characters. */
bool is_alnum_mbchar(const char *c)
{
    assert(c != NULL);

116
#ifdef ENABLE_UTF8
117
    if (use_utf8) {
118
119
	wchar_t wc;

120
121
	if (mbtowc(&wc, c, MB_CUR_MAX) < 0) {
	    mbtowc_reset();
122
	    return 0;
123
	}
124
125
126
127
128
129
130

	return iswalnum(wc);
    } else
#endif
	return isalnum((unsigned char)*c);
}

131
132
133
134
135
/* This function is equivalent to isblank() for multibyte characters. */
bool is_blank_mbchar(const char *c)
{
    assert(c != NULL);

136
#ifdef ENABLE_UTF8
137
    if (use_utf8) {
138
139
	wchar_t wc;

140
141
	if (mbtowc(&wc, c, MB_CUR_MAX) < 0) {
	    mbtowc_reset();
142
	    return 0;
143
	}
144

145
	return iswblank(wc);
146
147
    } else
#endif
148
	return isblank((unsigned char)*c);
149
150
}

151
152
153
154
155
156
157
/* This function is equivalent to iscntrl(), except in that it only
 * handles non-high-bit control characters. */
bool is_ascii_cntrl_char(int c)
{
    return (0 <= c && c < 32);
}

158
/* This function is equivalent to iscntrl(), except in that it also
David Lawrence Ramsey's avatar
David Lawrence Ramsey committed
159
 * handles high-bit control characters. */
160
bool is_cntrl_char(int c)
161
{
162
    return ((c & 0x60) == 0 || c == 127);
163
164
165
166
167
168
169
}

/* This function is equivalent to iscntrl() for multibyte characters,
 * except in that it also handles multibyte control characters with
 * their high bits set. */
bool is_cntrl_mbchar(const char *c)
{
170
#ifdef ENABLE_UTF8
171
    if (use_utf8) {
172
173
	return ((c[0] & 0xE0) == 0 || c[0] == 127 ||
		((signed char)c[0] == -62 && (signed char)c[1] < -96));
174
175
176
177
178
    } else
#endif
	return is_cntrl_char((unsigned char)*c);
}

179
180
/* This function is equivalent to ispunct() for multibyte characters. */
bool is_punct_mbchar(const char *c)
181
182
183
{
    assert(c != NULL);

184
#ifdef ENABLE_UTF8
185
    if (use_utf8) {
186
187
	wchar_t wc;

188
189
	if (mbtowc(&wc, c, MB_CUR_MAX) < 0) {
	    mbtowc_reset();
190
	    return 0;
191
	}
192

193
	return iswpunct(wc);
194
195
    } else
#endif
196
197
198
	return ispunct((unsigned char)*c);
}

199
200
201
/* Return TRUE when the given multibyte character c is a word-forming
 * character (that is: alphanumeric, or specified in wordchars, or
 * punctuation when allow_punct is TRUE), and FALSE otherwise. */
202
203
bool is_word_mbchar(const char *c, bool allow_punct)
{
204
205
206
    if (*c == '\0')
	return FALSE;

207
208
209
210
    if (is_alnum_mbchar(c))
	return TRUE;

    if (word_chars != NULL && *word_chars != '\0') {
211
	char symbol[mb_cur_max() + 1];
212
213
214
	int symlen = parse_mbchar(c, symbol, NULL);

	symbol[symlen] = '\0';
215
	return (strstr(word_chars, symbol) != NULL);
216
217
218
    }

    return (allow_punct && is_punct_mbchar(c));
219
220
}

221
/* Return the visible representation of control character c. */
222
char control_rep(const signed char c)
223
{
224
    if (c == DEL_CODE)
225
	return '?';
226
227
228
229
    else if (c == -97)
	return '=';
    else if (c < 0)
	return c + 224;
230
231
232
233
    else
	return c + 64;
}

234
/* Return the visible representation of multibyte control character c. */
235
char control_mbrep(const char *c, bool isdata)
236
{
237
238
    /* An embedded newline is an encoded NUL if it is data. */
    if (*c == '\n' && (isdata || as_an_at))
239
240
	return '@';

241
#ifdef ENABLE_UTF8
242
    if (use_utf8) {
243
	if ((unsigned char)c[0] < 128)
244
	    return control_rep(c[0]);
245
	else
246
	    return control_rep(c[1]);
247
    } else
248
#endif
249
	return control_rep(*c);
250
251
}

252
253
/* Assess how many bytes the given (multibyte) character occupies.  Return -1
 * if the byte sequence is invalid, and return the number of bytes minus 8
254
255
256
 * when it encodes an invalid codepoint.  Also, in the second parameter,
 * return the number of columns that the character occupies. */
int length_of_char(const char *c, int *width)
257
{
258
    assert(c != NULL);
259
260

#ifdef ENABLE_UTF8
261
    if (use_utf8) {
262
	wchar_t wc;
263
	int charlen = mbtowc(&wc, c, MB_CUR_MAX);
264

265
266
	/* If the sequence is invalid... */
	if (charlen < 0) {
267
	    mbtowc_reset();
268
	    return -1;
269
	}
270
271
272
273

	/* If the codepoint is invalid... */
	if (!is_valid_unicode(wc))
	    return charlen - 8;
274
275
276
277
278
	else {
	    *width = wcwidth(wc);
	    /* If the codepoint is unassigned, assume a width of one. */
	    if (*width < 0)
		*width = 1;
279
	    return charlen;
280
	}
281
    } else
282
#endif
283
	return 1;
284
285
}

286
287
288
289
290
/* This function is equivalent to wcwidth() for multibyte characters. */
int mbwidth(const char *c)
{
    assert(c != NULL);

291
#ifdef ENABLE_UTF8
292
    if (use_utf8) {
293
	wchar_t wc;
294
	int width;
295

296
297
	if (mbtowc(&wc, c, MB_CUR_MAX) < 0) {
	    mbtowc_reset();
298
	    return 1;
299
	}
300
301

	width = wcwidth(wc);
David Lawrence Ramsey's avatar
David Lawrence Ramsey committed
302

303
304
	if (width == -1)
	    return 1;
305
306
307
308
309
310
311

	return width;
    } else
#endif
	return 1;
}

312
/* Return the maximum length (in bytes) of a character. */
313
314
int mb_cur_max(void)
{
315
#ifdef ENABLE_UTF8
316
317
318
    if (use_utf8)
	return MB_CUR_MAX;
    else
319
#endif
320
	return 1;
321
322
}

323
324
325
326
/* Convert the Unicode value in chr to a multibyte character, if possible.
 * If the conversion succeeds, return the (dynamically allocated) multibyte
 * character and its length.  Otherwise, return an undefined (dynamically
 * allocated) multibyte character and a length of zero. */
327
char *make_mbchar(long chr, int *chr_mb_len)
328
{
329
330
    char *chr_mb;

331
#ifdef ENABLE_UTF8
332
    if (use_utf8) {
333
	chr_mb = charalloc(MB_CUR_MAX);
334
	*chr_mb_len = wctomb(chr_mb, (wchar_t)chr);
335

336
	/* Reject invalid Unicode characters. */
337
	if (*chr_mb_len < 0 || !is_valid_unicode((wchar_t)chr)) {
338
	    IGNORE_CALL_RESULT(wctomb(NULL, 0));
339
	    *chr_mb_len = 0;
340
	}
341
    } else
342
#endif
343
    {
344
	*chr_mb_len = 1;
345
	chr_mb = mallocstrncpy(NULL, (char *)&chr, 1);
346
347
348
349
350
351
352
    }

    return chr_mb;
}

/* Parse a multibyte character from buf.  Return the number of bytes
 * used.  If chr isn't NULL, store the multibyte character in it.  If
353
 * col isn't NULL, add the character's width (in columns) to it. */
354
int parse_mbchar(const char *buf, char *chr, size_t *col)
355
{
356
    int length;
357
358
359

    assert(buf != NULL);

360
#ifdef ENABLE_UTF8
361
    if (use_utf8) {
362
	/* Get the number of bytes in the multibyte character. */
363
	length = mblen(buf, MB_CUR_MAX);
364

365
	/* When the multibyte sequence is invalid, only take the first byte. */
366
	if (length <= 0) {
367
	    IGNORE_CALL_RESULT(mblen(NULL, 0));
368
	    length = 1;
369
	}
370

371
	/* When requested, store the multibyte character in chr. */
372
373
	if (chr != NULL) {
	    int i;
374

375
	    for (i = 0; i < length; i++)
376
377
378
		chr[i] = buf[i];
	}

379
	/* When requested, add the width of the character to col. */
380
	if (col != NULL) {
381
	    /* If we have a tab, compute its width in columns based on the
382
383
384
	     * current value of col. */
	    if (*buf == '\t')
		*col += tabsize - *col % tabsize;
385
386
	    /* If we have a control character, it's two columns wide: one
	     * column for the "^", and one for the visible character. */
387
	    else if (is_cntrl_mbchar(buf)) {
388
		*col += 2;
389
	    /* If we have a normal character, get its width normally. */
390
391
392
	    } else
		*col += mbwidth(buf);
	}
393
    } else
394
#endif
395
    {
396
	/* A byte character is one byte long. */
397
	length = 1;
398

399
	/* When requested, store the byte character in chr. */
400
401
402
	if (chr != NULL)
	    *chr = *buf;

403
	/* When requested, add the width of the character to col. */
404
	if (col != NULL) {
405
	    /* If we have a tab, compute its width in columns using the
406
407
408
	     * current value of col. */
	    if (*buf == '\t')
		*col += tabsize - *col % tabsize;
409
410
	    /* If we have a control character, it's two columns wide: one
	     * column for the "^", and one for the visible character. */
411
412
413
414
415
416
417
418
	    else if (is_cntrl_char((unsigned char)*buf))
		*col += 2;
	    /* If we have a normal character, it's one column wide. */
	    else
		(*col)++;
	}
    }

419
    return length;
420
}
421
422
423
424
425

/* Return the index in buf of the beginning of the multibyte character
 * before the one at pos. */
size_t move_mbleft(const char *buf, size_t pos)
{
426
    size_t before, char_len = 0;
427

428
    assert(buf != NULL && pos <= strlen(buf));
429
430

    /* There is no library function to move backward one multibyte
431
432
433
434
435
436
437
     * character.  So we just start groping for one at the farthest
     * possible point. */
    if (mb_cur_max() > pos)
	before = 0;
    else
	before = pos - mb_cur_max();

438
439
440
    while (before < pos) {
	char_len = parse_mbchar(buf + before, NULL, NULL);
	before += char_len;
441
442
    }

443
    return before - char_len;
444
445
446
447
448
449
}

/* Return the index in buf of the beginning of the multibyte character
 * after the one at pos. */
size_t move_mbright(const char *buf, size_t pos)
{
450
    return pos + parse_mbchar(buf + pos, NULL, NULL);
451
}
452
453
454
455
456

#ifndef HAVE_STRCASECMP
/* This function is equivalent to strcasecmp(). */
int nstrcasecmp(const char *s1, const char *s2)
{
457
    return strncasecmp(s1, s2, HIGHEST_POSITIVE);
458
459
460
461
462
463
}
#endif

/* This function is equivalent to strcasecmp() for multibyte strings. */
int mbstrcasecmp(const char *s1, const char *s2)
{
464
    return mbstrncasecmp(s1, s2, HIGHEST_POSITIVE);
465
466
467
468
469
470
}

#ifndef HAVE_STRNCASECMP
/* This function is equivalent to strncasecmp(). */
int nstrncasecmp(const char *s1, const char *s2, size_t n)
{
471
472
473
    if (s1 == s2)
	return 0;

David Lawrence Ramsey's avatar
David Lawrence Ramsey committed
474
    for (; *s1 != '\0' && *s2 != '\0' && n > 0; s1++, s2++, n--) {
475
476
477
478
	if (tolower(*s1) != tolower(*s2))
	    break;
    }

479
    return (n > 0) ? tolower(*s1) - tolower(*s2) : 0;
480
481
482
}
#endif

483
/* This function is equivalent to strncasecmp() for multibyte strings. */
484
485
int mbstrncasecmp(const char *s1, const char *s2, size_t n)
{
486
#ifdef ENABLE_UTF8
487
    if (use_utf8) {
488
	wchar_t wc1, wc2;
489

490
	while (*s1 != '\0' && *s2 != '\0' && n > 0) {
491
	    bool bad1 = FALSE, bad2 = FALSE;
492

493
	    if (mbtowc(&wc1, s1, MB_CUR_MAX) < 0) {
494
		mbtowc_reset();
495
		bad1 = TRUE;
496
497
	    }

498
	    if (mbtowc(&wc2, s2, MB_CUR_MAX) < 0) {
499
		mbtowc_reset();
500
		bad2 = TRUE;
501
502
	    }

503
504
505
506
	    if (bad1 || bad2) {
		if (*s1 != *s2)
		    return (unsigned char)*s1 - (unsigned char)*s2;

507
508
		if (bad1 != bad2)
		    return (bad1 ? 1 : -1);
509
510
	    } else {
		int difference = towlower(wc1) - towlower(wc2);
511

512
513
		if (difference != 0)
		    return difference;
514
515
	    }

516
517
518
	    s1 += move_mbright(s1, 0);
	    s2 += move_mbright(s2, 0);
	    n--;
519
520
	}

521
	return (n > 0) ? ((unsigned char)*s1 - (unsigned char)*s2) : 0;
522
523
    } else
#endif
524
	return strncasecmp(s1, s2, n);
525
526
527
}

#ifndef HAVE_STRCASESTR
528
/* This function is equivalent to strcasestr(). */
529
char *nstrcasestr(const char *haystack, const char *needle)
530
{
531
    size_t needle_len;
532

David Lawrence Ramsey's avatar
David Lawrence Ramsey committed
533
    if (*needle == '\0')
534
	return (char *)haystack;
535

536
    needle_len = strlen(needle);
537

538
    while (*haystack != '\0') {
539
	if (strncasecmp(haystack, needle, needle_len) == 0)
540
	    return (char *)haystack;
541
542

	haystack++;
543
544
545
546
547
548
    }

    return NULL;
}
#endif

549
/* This function is equivalent to strcasestr() for multibyte strings. */
550
char *mbstrcasestr(const char *haystack, const char *needle)
551
{
552
#ifdef ENABLE_UTF8
553
    if (use_utf8) {
554
	size_t needle_len;
555

David Lawrence Ramsey's avatar
David Lawrence Ramsey committed
556
	if (*needle == '\0')
557
	    return (char *)haystack;
558

559
	needle_len = mbstrlen(needle);
560

561
	while (*haystack != '\0') {
562
	    if (mbstrncasecmp(haystack, needle, needle_len) == 0)
563
		return (char *)haystack;
564
565

	    haystack += move_mbright(haystack, 0);
566
567
	}

568
	return NULL;
569
570
    } else
#endif
571
	return (char *) strcasestr(haystack, needle);
572
573
}

574
/* This function is equivalent to strstr(), except in that it scans the
575
 * string in reverse, starting at rev_start. */
576
577
char *revstrstr(const char *haystack, const char *needle,
	const char *pointer)
578
{
579
580
    size_t needle_len = strlen(needle);
    size_t tail_len = strlen(pointer);
581

582
583
    if (needle_len == 0)
	return (char *)pointer;
584

585
586
    if (strlen(haystack) < needle_len)
	return NULL;
587

588
589
    if (tail_len < needle_len)
	pointer += tail_len - needle_len;
590

591
592
593
594
    while (pointer >= haystack) {
	if (strncmp(pointer, needle, needle_len) == 0)
	    return (char *)pointer;
	pointer--;
595
596
597
598
599
600
    }

    return NULL;
}

/* This function is equivalent to strcasestr(), except in that it scans
601
 * the string in reverse, starting at rev_start. */
602
603
char *revstrcasestr(const char *haystack, const char *needle, const char
	*rev_start)
604
{
605
606
    size_t rev_start_len, needle_len;

David Lawrence Ramsey's avatar
David Lawrence Ramsey committed
607
    if (*needle == '\0')
608
	return (char *)rev_start;
609

610
611
612
613
    needle_len = strlen(needle);

    if (strlen(haystack) < needle_len)
	return NULL;
614

615
    rev_start_len = strlen(rev_start);
616

617
618
619
    for (; rev_start >= haystack; rev_start--, rev_start_len++) {
	if (rev_start_len >= needle_len && strncasecmp(rev_start,
		needle, needle_len) == 0)
620
	    return (char *)rev_start;
621
622
623
624
    }

    return NULL;
}
625
626

/* This function is equivalent to strcasestr() for multibyte strings,
627
 * except in that it scans the string in reverse, starting at rev_start. */
628
629
char *mbrevstrcasestr(const char *haystack, const char *needle, const
	char *rev_start)
630
{
631
#ifdef ENABLE_UTF8
632
    if (use_utf8) {
633
	size_t rev_start_len, needle_len;
634

David Lawrence Ramsey's avatar
David Lawrence Ramsey committed
635
	if (*needle == '\0')
636
	    return (char *)rev_start;
637

638
	needle_len = mbstrlen(needle);
639

640
641
	if (mbstrlen(haystack) < needle_len)
	    return NULL;
642

643
	rev_start_len = mbstrlen(rev_start);
644

645
	while (TRUE) {
646
	    if (rev_start_len >= needle_len &&
647
			mbstrncasecmp(rev_start, needle, needle_len) == 0)
648
		return (char *)rev_start;
649

650
	    /* If we've reached the head of the haystack, we found nothing. */
651
	    if (rev_start == haystack)
652
		return NULL;
653

654
655
656
	    rev_start = haystack + move_mbleft(haystack, rev_start - haystack);
	    rev_start_len++;
	}
657
658
659
660
    } else
#endif
	return revstrcasestr(haystack, needle, rev_start);
}
661

662
663
664
665
666
667
/* This function is equivalent to strlen() for multibyte strings. */
size_t mbstrlen(const char *s)
{
    return mbstrnlen(s, (size_t)-1);
}

668
669
670
671
672
673
#ifndef HAVE_STRNLEN
/* This function is equivalent to strnlen(). */
size_t nstrnlen(const char *s, size_t maxlen)
{
    size_t n = 0;

David Lawrence Ramsey's avatar
David Lawrence Ramsey committed
674
    for (; *s != '\0' && maxlen > 0; s++, maxlen--, n++)
675
676
677
678
679
680
681
682
683
	;

    return n;
}
#endif

/* This function is equivalent to strnlen() for multibyte strings. */
size_t mbstrnlen(const char *s, size_t maxlen)
{
684
#ifdef ENABLE_UTF8
685
    if (use_utf8) {
686
687
	size_t n = 0;

688
689
690
	for (; *s != '\0' && maxlen > 0; s += move_mbright(s, 0),
		maxlen--, n++)
	    ;
691

692
	return n;
693
694
    } else
#endif
695
	return strnlen(s, maxlen);
696
}
697

698
#if !defined(NANO_TINY) || !defined(DISABLE_JUSTIFY)
699
/* This function is equivalent to strchr() for multibyte strings. */
700
char *mbstrchr(const char *s, const char *c)
701
702
703
704
{
    assert(s != NULL && c != NULL);

#ifdef ENABLE_UTF8
705
    if (use_utf8) {
David Lawrence Ramsey's avatar
David Lawrence Ramsey committed
706
	bool bad_s_mb = FALSE, bad_c_mb = FALSE;
707
	char symbol[MB_CUR_MAX];
708
709
710
	const char *q = s;
	wchar_t ws, wc;

Benno Schulenberg's avatar
Benno Schulenberg committed
711
	if (mbtowc(&wc, c, MB_CUR_MAX) < 0) {
712
	    mbtowc_reset();
713
714
715
716
717
	    wc = (unsigned char)*c;
	    bad_c_mb = TRUE;
	}

	while (*s != '\0') {
718
	    int sym_len = parse_mbchar(s, symbol, NULL);
719

720
	    if (mbtowc(&ws, symbol, sym_len) < 0) {
721
		mbtowc_reset();
722
723
724
725
726
727
728
		ws = (unsigned char)*s;
		bad_s_mb = TRUE;
	    }

	    if (bad_s_mb == bad_c_mb && ws == wc)
		break;

729
730
	    s += sym_len;
	    q += sym_len;
731
732
	}

733
	if (*s == '\0')
734
735
736
737
738
	    q = NULL;

	return (char *)q;
    } else
#endif
739
	return (char *) strchr(s, *c);
740
}
741
#endif /* !NANO_TINY || !DISABLE_JUSTIFY */
742

743
744
745
746
747
#ifndef NANO_TINY
/* This function is equivalent to strpbrk() for multibyte strings. */
char *mbstrpbrk(const char *s, const char *accept)
{
#ifdef ENABLE_UTF8
748
    if (use_utf8) {
749
	for (; *s != '\0'; s += move_mbright(s, 0)) {
750
751
752
753
754
755
756
	    if (mbstrchr(accept, s) != NULL)
		return (char *)s;
	}

	return NULL;
    } else
#endif
757
	return (char *) strpbrk(s, accept);
758
759
760
761
762
763
764
765
766
}

/* This function is equivalent to strpbrk(), except in that it scans the
 * string in reverse, starting at rev_start. */
char *revstrpbrk(const char *s, const char *accept, const char
	*rev_start)
{
    assert(s != NULL && accept != NULL && rev_start != NULL);

767
768
769
770
771
    if (*rev_start == '\0') {
	if (rev_start == s)
	   return NULL;
	rev_start--;
    }
772

773
774
    for (; rev_start >= s; rev_start--) {
	if (strchr(accept, *rev_start) != NULL)
775
776
777
778
779
780
781
	    return (char *)rev_start;
    }

    return NULL;
}

/* This function is equivalent to strpbrk() for multibyte strings,
782
 * except in that it scans the string in reverse, starting at rev_start. */
783
784
785
786
787
788
char *mbrevstrpbrk(const char *s, const char *accept, const char
	*rev_start)
{
    assert(s != NULL && accept != NULL && rev_start != NULL);

#ifdef ENABLE_UTF8
789
    if (use_utf8) {
790
791
792
793
794
	if (*rev_start == '\0') {
	    if (rev_start == s)
		return NULL;
	    rev_start = s + move_mbleft(s, rev_start - s);
	}
795

796
797
	while (TRUE) {
	    if (mbstrchr(accept, rev_start) != NULL)
798
799
		return (char *)rev_start;

800
	    /* If we've reached the head of the string, we found nothing. */
801
	    if (rev_start == s)
802
		return NULL;
803

804
805
	    rev_start = s + move_mbleft(s, rev_start - s);
	}
806
807
808
809
810
811
    } else
#endif
	return revstrpbrk(s, accept, rev_start);
}
#endif /* !NANO_TINY */

812
#if !defined(DISABLE_NANORC) && (!defined(NANO_TINY) || !defined(DISABLE_JUSTIFY))
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
/* Return TRUE if the string s contains one or more blank characters,
 * and FALSE otherwise. */
bool has_blank_chars(const char *s)
{
    for (; *s != '\0'; s++) {
	if (isblank(*s))
	    return TRUE;
    }

    return FALSE;
}

/* Return TRUE if the multibyte string s contains one or more blank
 * multibyte characters, and FALSE otherwise. */
bool has_blank_mbchars(const char *s)
{
829
#ifdef ENABLE_UTF8
830
    if (use_utf8) {
831
	char symbol[MB_CUR_MAX];
832

833
	for (; *s != '\0'; s += move_mbright(s, 0)) {
834
	    parse_mbchar(s, symbol, NULL);
835

836
837
	    if (is_blank_mbchar(symbol))
		return TRUE;
838
839
	}

840
	return FALSE;
841
842
843
844
    } else
#endif
	return has_blank_chars(s);
}
845
#endif /* !DISABLE_NANORC && (!NANO_TINY || !DISABLE_JUSTIFY) */
846

847
#ifdef ENABLE_UTF8
848
/* Return TRUE if wc is valid Unicode, and FALSE otherwise. */
849
850
bool is_valid_unicode(wchar_t wc)
{
851
    return ((0 <= wc && wc <= 0xD7FF) ||
852
853
854
		(0xE000 <= wc && wc <= 0xFDCF) ||
		(0xFDF0 <= wc && wc <= 0xFFFD) ||
		(0xFFFF < wc && wc <= 0x10FFFF && (wc & 0xFFFF) <= 0xFFFD));
855
856
857
}
#endif

858
#ifndef DISABLE_NANORC
859
860
861
862
/* Check if the string s is a valid multibyte string.  Return TRUE if it
 * is, and FALSE otherwise. */
bool is_valid_mbstring(const char *s)
{
863
#ifdef ENABLE_UTF8
864
865
866
    if (use_utf8)
	return (mbstowcs(NULL, s, 0) != (size_t)-1);
    else
867
#endif
868
	return TRUE;
869
}
870
#endif /* !DISABLE_NANORC */