chars.c 21 KB
Newer Older
1
2
3
/**************************************************************************
 *   chars.c                                                              *
 *                                                                        *
4
5
 *   Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009,  *
 *   2010, 2011, 2013, 2014 Free Software Foundation, Inc.                *
6
7
 *   This program is free software; you can redistribute it and/or modify *
 *   it under the terms of the GNU General Public License as published by *
8
 *   the Free Software Foundation; either version 3, or (at your option)  *
9
10
 *   any later version.                                                   *
 *                                                                        *
11
12
13
14
 *   This program is distributed in the hope that it will be useful, but  *
 *   WITHOUT ANY WARRANTY; without even the implied warranty of           *
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU    *
 *   General Public License for more details.                             *
15
16
17
 *                                                                        *
 *   You should have received a copy of the GNU General Public License    *
 *   along with this program; if not, write to the Free Software          *
18
19
 *   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA            *
 *   02110-1301, USA.                                                     *
20
21
22
 *                                                                        *
 **************************************************************************/

23
#include "proto.h"
24

25
#include <string.h>
26
27
#include <ctype.h>

28
#ifdef ENABLE_UTF8
29
#ifdef HAVE_WCHAR_H
30
31
#include <wchar.h>
#endif
32
#ifdef HAVE_WCTYPE_H
33
34
#include <wctype.h>
#endif
35

36
37
38
39
40
41
42
43
44
45
46
47
48
49
static bool use_utf8 = FALSE;
	/* Whether we've enabled UTF-8 support. */

/* Enable UTF-8 support. */
void utf8_init(void)
{
    use_utf8 = TRUE;
}

/* Is UTF-8 support enabled? */
bool using_utf8(void)
{
    return use_utf8;
}
Benno Schulenberg's avatar
Benno Schulenberg committed
50
#endif /* ENABLE_UTF8 */
51

52
/* Concatenate two allocated strings, and free the second. */
53
char *addstrings(char* str1, size_t len1, char* str2, size_t len2)
54
55
56
{
    str1 = charealloc(str1, len1 + len2 + 1);
    str1[len1] = '\0';
57

58
59
60
61
62
63
    strncat(&str1[len1], str2, len2);
    free(str2);

    return str1;
}

64
65
#ifndef HAVE_ISBLANK
/* This function is equivalent to isblank(). */
66
bool nisblank(int c)
67
{
68
    return isspace(c) && (c == '\t' || !is_cntrl_char(c));
69
}
70
#endif
71

72
#if !defined(HAVE_ISWBLANK) && defined(ENABLE_UTF8)
73
/* This function is equivalent to iswblank(). */
74
bool niswblank(wchar_t wc)
75
{
76
    return iswspace(wc) && (wc == '\t' || !is_cntrl_wchar(wc));
77
}
78
#endif
79

80
/* Return TRUE if the value of c is in byte range, and FALSE otherwise. */
81
82
83
84
85
bool is_byte(int c)
{
    return ((unsigned int)c == (unsigned char)c);
}

86
87
88
89
90
91
92
93
94
95
void mbtowc_reset(void)
{
    IGNORE_CALL_RESULT(mbtowc(NULL, NULL, 0));
}

void wctomb_reset(void)
{
    IGNORE_CALL_RESULT(wctomb(NULL, 0));
}

96
97
98
99
100
/* This function is equivalent to isalnum() for multibyte characters. */
bool is_alnum_mbchar(const char *c)
{
    assert(c != NULL);

101
#ifdef ENABLE_UTF8
102
    if (use_utf8) {
103
104
	wchar_t wc;

105
106
	if (mbtowc(&wc, c, MB_CUR_MAX) < 0) {
	    mbtowc_reset();
107
	    return 0;
108
	}
109
110
111
112
113
114
115

	return iswalnum(wc);
    } else
#endif
	return isalnum((unsigned char)*c);
}

116
117
118
119
120
/* This function is equivalent to isblank() for multibyte characters. */
bool is_blank_mbchar(const char *c)
{
    assert(c != NULL);

121
#ifdef ENABLE_UTF8
122
    if (use_utf8) {
123
124
	wchar_t wc;

125
126
	if (mbtowc(&wc, c, MB_CUR_MAX) < 0) {
	    mbtowc_reset();
127
	    return 0;
128
	}
129

130
	return iswblank(wc);
131
132
    } else
#endif
133
	return isblank((unsigned char)*c);
134
135
}

136
137
138
139
140
141
142
/* This function is equivalent to iscntrl(), except in that it only
 * handles non-high-bit control characters. */
bool is_ascii_cntrl_char(int c)
{
    return (0 <= c && c < 32);
}

143
/* This function is equivalent to iscntrl(), except in that it also
David Lawrence Ramsey's avatar
David Lawrence Ramsey committed
144
 * handles high-bit control characters. */
145
bool is_cntrl_char(int c)
146
{
147
    return ((c & 0x60) == 0 || c == 127);
148
149
150
151
152
153
154
155
156
}

/* This function is equivalent to iscntrl() for multibyte characters,
 * except in that it also handles multibyte control characters with
 * their high bits set. */
bool is_cntrl_mbchar(const char *c)
{
    assert(c != NULL);

157
#ifdef ENABLE_UTF8
158
    if (use_utf8) {
159
160
	return ((c[0] & 0xE0) == 0 || c[0] == 127 ||
		((signed char)c[0] == -62 && (signed char)c[1] < -96));
161
162
163
164
165
    } else
#endif
	return is_cntrl_char((unsigned char)*c);
}

166
167
/* This function is equivalent to ispunct() for multibyte characters. */
bool is_punct_mbchar(const char *c)
168
169
170
{
    assert(c != NULL);

171
#ifdef ENABLE_UTF8
172
    if (use_utf8) {
173
174
	wchar_t wc;

175
176
	if (mbtowc(&wc, c, MB_CUR_MAX) < 0) {
	    mbtowc_reset();
177
	    return 0;
178
	}
179

180
	return iswpunct(wc);
181
182
    } else
#endif
183
184
185
	return ispunct((unsigned char)*c);
}

186
187
188
/* Return TRUE when the given multibyte character c is a word-forming
 * character (that is: alphanumeric, or specified in wordchars, or
 * punctuation when allow_punct is TRUE), and FALSE otherwise. */
189
190
191
192
bool is_word_mbchar(const char *c, bool allow_punct)
{
    assert(c != NULL);

193
194
195
196
197
198
199
200
201
202
203
204
205
    if (is_alnum_mbchar(c))
	return TRUE;

    if (word_chars != NULL && *word_chars != '\0') {
	char *symbol = charalloc(MB_CUR_MAX + 1);
	int symlen = parse_mbchar(c, symbol, NULL);

	symbol[symlen] = '\0';

	return (strstr(word_chars, symbol) != NULL);
    }

    return (allow_punct && is_punct_mbchar(c));
206
207
}

208
/* Return the visible representation of control character c. */
209
char control_rep(const signed char c)
210
{
211
212
    assert(is_cntrl_char(c));

213
    /* An embedded newline is an encoded null. */
214
215
216
217
    if (c == '\n')
	return '@';
    else if (c == NANO_CONTROL_8)
	return '?';
218
219
220
221
    else if (c == -97)
	return '=';
    else if (c < 0)
	return c + 224;
222
223
224
225
    else
	return c + 64;
}

226
227
/* Return the visible representation of multibyte control character c. */
char control_mbrep(const char *c)
228
{
229
    assert(c != NULL);
230

231
#ifdef ENABLE_UTF8
232
    if (use_utf8) {
233
	if (0 <= c[0] && c[0] <= 127)
234
	    return control_rep(c[0]);
235
	else
236
	    return control_rep(c[1]);
237
    } else
238
#endif
239
	return control_rep(*c);
240
241
}

242
243
/* Assess how many bytes the given (multibyte) character occupies.  Return -1
 * if the byte sequence is invalid, and return the number of bytes minus 8
244
245
246
 * when it encodes an invalid codepoint.  Also, in the second parameter,
 * return the number of columns that the character occupies. */
int length_of_char(const char *c, int *width)
247
{
248
    assert(c != NULL);
249
250

#ifdef ENABLE_UTF8
251
    if (use_utf8) {
252
	wchar_t wc;
253
	int charlen = mbtowc(&wc, c, MB_CUR_MAX);
254

255
256
	/* If the sequence is invalid... */
	if (charlen < 0) {
257
	    mbtowc_reset();
258
	    return -1;
259
	}
260
261
262
263

	/* If the codepoint is invalid... */
	if (!is_valid_unicode(wc))
	    return charlen - 8;
264
265
266
267
268
	else {
	    *width = wcwidth(wc);
	    /* If the codepoint is unassigned, assume a width of one. */
	    if (*width < 0)
		*width = 1;
269
	    return charlen;
270
	}
271
    } else
272
#endif
273
	return 1;
274
275
}

276
277
278
279
280
/* This function is equivalent to wcwidth() for multibyte characters. */
int mbwidth(const char *c)
{
    assert(c != NULL);

281
#ifdef ENABLE_UTF8
282
    if (use_utf8) {
283
	wchar_t wc;
284
	int width;
285

286
287
	if (mbtowc(&wc, c, MB_CUR_MAX) < 0) {
	    mbtowc_reset();
288
	    return 1;
289
	}
290
291

	width = wcwidth(wc);
David Lawrence Ramsey's avatar
David Lawrence Ramsey committed
292

293
294
	if (width == -1)
	    return 1;
295
296
297
298
299
300
301
302
303
304

	return width;
    } else
#endif
	return 1;
}

/* Return the maximum width in bytes of a multibyte character. */
int mb_cur_max(void)
{
305
    return
306
#ifdef ENABLE_UTF8
307
	use_utf8 ? MB_CUR_MAX :
308
#endif
309
	1;
310
311
}

312
313
/* Convert the Unicode value in chr to a multibyte character with the
 * same wide character value as chr, if possible.  If the conversion
314
315
316
 * succeeds, return the (dynamically allocated) multibyte character and
 * its length.  Otherwise, return an undefined (dynamically allocated)
 * multibyte character and a length of zero. */
317
char *make_mbchar(long chr, int *chr_mb_len)
318
{
319
320
    char *chr_mb;

321
    assert(chr_mb_len != NULL);
322

323
#ifdef ENABLE_UTF8
324
    if (use_utf8) {
325
	chr_mb = charalloc(MB_CUR_MAX);
326
	*chr_mb_len = wctomb(chr_mb, (wchar_t)chr);
327

328
	/* Reject invalid Unicode characters. */
329
330
	if (*chr_mb_len < 0 || !is_valid_unicode((wchar_t)chr)) {
	    wctomb_reset();
331
	    *chr_mb_len = 0;
332
	}
333
    } else
334
#endif
335
    {
336
	*chr_mb_len = 1;
337
	chr_mb = mallocstrncpy(NULL, (char *)&chr, 1);
338
339
340
341
342
343
344
    }

    return chr_mb;
}

/* Parse a multibyte character from buf.  Return the number of bytes
 * used.  If chr isn't NULL, store the multibyte character in it.  If
345
346
347
 * col isn't NULL, store the new display width in it.  If *buf is '\t',
 * we expect col to have the current display width. */
int parse_mbchar(const char *buf, char *chr, size_t *col)
348
349
350
351
352
{
    int buf_mb_len;

    assert(buf != NULL);

353
#ifdef ENABLE_UTF8
354
    if (use_utf8) {
355
356
357
	/* Get the number of bytes in the multibyte character. */
	buf_mb_len = mblen(buf, MB_CUR_MAX);

358
	/* When the multibyte sequence is invalid, only take the first byte. */
359
	if (buf_mb_len < 0) {
360
	    IGNORE_CALL_RESULT(mblen(NULL, 0));
361
	    buf_mb_len = 1;
362
363
	} else if (buf_mb_len == 0)
	    buf_mb_len++;
364

365
	/* When requested, store the multibyte character in chr. */
366
367
	if (chr != NULL) {
	    int i;
368

369
370
371
372
	    for (i = 0; i < buf_mb_len; i++)
		chr[i] = buf[i];
	}

373
	/* When requested, store the width of the wide character in col. */
374
375
376
377
378
	if (col != NULL) {
	    /* If we have a tab, get its width in columns using the
	     * current value of col. */
	    if (*buf == '\t')
		*col += tabsize - *col % tabsize;
379
380
	    /* If we have a control character, it's two columns wide: one
	     * column for the "^", and one for the visible character. */
381
	    else if (is_cntrl_mbchar(buf)) {
382
		*col += 2;
383
384
385
386
387
	    /* If we have a normal character, get its width in columns
	     * normally. */
	    } else
		*col += mbwidth(buf);
	}
388
    } else
389
#endif
390
    {
391
	/* A byte character is one byte long. */
392
393
	buf_mb_len = 1;

394
	/* When requested, store the byte character in chr. */
395
396
397
	if (chr != NULL)
	    *chr = *buf;

398
	/* When requested, store the width of the wide character in col. */
399
400
401
402
403
	if (col != NULL) {
	    /* If we have a tab, get its width in columns using the
	     * current value of col. */
	    if (*buf == '\t')
		*col += tabsize - *col % tabsize;
404
405
	    /* If we have a control character, it's two columns wide: one
	     * column for the "^", and one for the visible character. */
406
407
408
409
410
411
412
413
414
415
	    else if (is_cntrl_char((unsigned char)*buf))
		*col += 2;
	    /* If we have a normal character, it's one column wide. */
	    else
		(*col)++;
	}
    }

    return buf_mb_len;
}
416
417
418
419
420

/* Return the index in buf of the beginning of the multibyte character
 * before the one at pos. */
size_t move_mbleft(const char *buf, size_t pos)
{
421
    size_t before, char_len = 0;
422

423
    assert(buf != NULL && pos <= strlen(buf));
424
425

    /* There is no library function to move backward one multibyte
426
427
428
429
430
431
432
     * character.  So we just start groping for one at the farthest
     * possible point. */
    if (mb_cur_max() > pos)
	before = 0;
    else
	before = pos - mb_cur_max();

433
434
435
    while (before < pos) {
	char_len = parse_mbchar(buf + before, NULL, NULL);
	before += char_len;
436
437
    }

438
    return before - char_len;
439
440
441
442
443
444
}

/* Return the index in buf of the beginning of the multibyte character
 * after the one at pos. */
size_t move_mbright(const char *buf, size_t pos)
{
445
    return pos + parse_mbchar(buf + pos, NULL, NULL);
446
}
447
448
449
450
451

#ifndef HAVE_STRCASECMP
/* This function is equivalent to strcasecmp(). */
int nstrcasecmp(const char *s1, const char *s2)
{
452
    return strncasecmp(s1, s2, HIGHEST_POSITIVE);
453
454
455
456
457
458
}
#endif

/* This function is equivalent to strcasecmp() for multibyte strings. */
int mbstrcasecmp(const char *s1, const char *s2)
{
459
    return mbstrncasecmp(s1, s2, HIGHEST_POSITIVE);
460
461
462
463
464
465
}

#ifndef HAVE_STRNCASECMP
/* This function is equivalent to strncasecmp(). */
int nstrncasecmp(const char *s1, const char *s2, size_t n)
{
466
467
468
    if (s1 == s2)
	return 0;

469
470
    assert(s1 != NULL && s2 != NULL);

David Lawrence Ramsey's avatar
David Lawrence Ramsey committed
471
    for (; *s1 != '\0' && *s2 != '\0' && n > 0; s1++, s2++, n--) {
472
473
474
475
	if (tolower(*s1) != tolower(*s2))
	    break;
    }

476
    return (n > 0) ? tolower(*s1) - tolower(*s2) : 0;
477
478
479
}
#endif

480
/* This function is equivalent to strncasecmp() for multibyte strings. */
481
482
int mbstrncasecmp(const char *s1, const char *s2, size_t n)
{
483
#ifdef ENABLE_UTF8
484
    if (use_utf8) {
485
	wchar_t wc1, wc2;
486

487
488
489
	if (s1 == s2)
	    return 0;

David Lawrence Ramsey's avatar
David Lawrence Ramsey committed
490
491
	assert(s1 != NULL && s2 != NULL);

492
493
	for (; *s1 != '\0' && *s2 != '\0' && n > 0;
		s1 += move_mbright(s1, 0), s2 += move_mbright(s2, 0), n--) {
494
	    bool bad1 = FALSE, bad2 = FALSE;
495

496
	    if (mbtowc(&wc1, s1, MB_CUR_MAX) < 0) {
497
		mbtowc_reset();
498
		wc1 = (unsigned char)*s1;
499
		bad1 = TRUE;
500
501
	    }

502
	    if (mbtowc(&wc2, s2, MB_CUR_MAX) < 0) {
503
		mbtowc_reset();
504
		wc2 = (unsigned char)*s2;
505
		bad2 = TRUE;
506
507
	    }

508
	    if (bad1 != bad2 || towlower(wc1) != towlower(wc2))
509
510
511
		break;
	}

512
	return (n > 0) ? towlower(wc1) - towlower(wc2) : 0;
513
514
    } else
#endif
515
	return strncasecmp(s1, s2, n);
516
517
518
}

#ifndef HAVE_STRCASESTR
519
/* This function is equivalent to strcasestr(). */
520
char *nstrcasestr(const char *haystack, const char *needle)
521
{
522
523
    size_t haystack_len, needle_len;

524
525
    assert(haystack != NULL && needle != NULL);

David Lawrence Ramsey's avatar
David Lawrence Ramsey committed
526
    if (*needle == '\0')
527
	return (char *)haystack;
528

529
530
    haystack_len = strlen(haystack);
    needle_len = strlen(needle);
531

532
533
534
    for (; *haystack != '\0' && haystack_len >= needle_len; haystack++,
	haystack_len--) {
	if (strncasecmp(haystack, needle, needle_len) == 0)
535
	    return (char *)haystack;
536
537
538
539
540
541
    }

    return NULL;
}
#endif

542
/* This function is equivalent to strcasestr() for multibyte strings. */
543
char *mbstrcasestr(const char *haystack, const char *needle)
544
{
545
#ifdef ENABLE_UTF8
546
    if (use_utf8) {
547
	size_t haystack_len, needle_len;
548
549
550

	assert(haystack != NULL && needle != NULL);

David Lawrence Ramsey's avatar
David Lawrence Ramsey committed
551
	if (*needle == '\0')
552
	    return (char *)haystack;
553

554
555
	haystack_len = mbstrlen(haystack);
	needle_len = mbstrlen(needle);
556

557
558
	for (; *haystack != '\0' && haystack_len >= needle_len;
		haystack += move_mbright(haystack, 0), haystack_len--) {
559
560
	    if (mbstrncasecmp(haystack, needle, needle_len) == 0 &&
			mblen(haystack, MB_CUR_MAX) > 0)
561
		return (char *)haystack;
562
563
	}

564
	return NULL;
565
566
    } else
#endif
567
	return (char *) strcasestr(haystack, needle);
568
569
}

570
#if !defined(NANO_TINY) || !defined(DISABLE_TABCOMP)
571
/* This function is equivalent to strstr(), except in that it scans the
572
 * string in reverse, starting at rev_start. */
573
574
char *revstrstr(const char *haystack, const char *needle, const char
	*rev_start)
575
{
576
577
    size_t rev_start_len, needle_len;

578
579
    assert(haystack != NULL && needle != NULL && rev_start != NULL);

David Lawrence Ramsey's avatar
David Lawrence Ramsey committed
580
    if (*needle == '\0')
581
	return (char *)rev_start;
582

583
    needle_len = strlen(needle);
584

585
586
    if (strlen(haystack) < needle_len)
	return NULL;
587

588
589
590
591
592
    rev_start_len = strlen(rev_start);

    for (; rev_start >= haystack; rev_start--, rev_start_len++) {
	if (rev_start_len >= needle_len && strncmp(rev_start, needle,
		needle_len) == 0)
593
	    return (char *)rev_start;
594
595
596
597
    }

    return NULL;
}
598
#endif /* !NANO_TINY || !DISABLE_TABCOMP */
599

600
#ifndef NANO_TINY
601
/* This function is equivalent to strcasestr(), except in that it scans
602
 * the string in reverse, starting at rev_start. */
603
604
char *revstrcasestr(const char *haystack, const char *needle, const char
	*rev_start)
605
{
606
607
    size_t rev_start_len, needle_len;

608
609
    assert(haystack != NULL && needle != NULL && rev_start != NULL);

David Lawrence Ramsey's avatar
David Lawrence Ramsey committed
610
    if (*needle == '\0')
611
	return (char *)rev_start;
612

613
614
615
616
    needle_len = strlen(needle);

    if (strlen(haystack) < needle_len)
	return NULL;
617

618
    rev_start_len = strlen(rev_start);
619

620
621
622
    for (; rev_start >= haystack; rev_start--, rev_start_len++) {
	if (rev_start_len >= needle_len && strncasecmp(rev_start,
		needle, needle_len) == 0)
623
	    return (char *)rev_start;
624
625
626
627
    }

    return NULL;
}
628
629

/* This function is equivalent to strcasestr() for multibyte strings,
630
 * except in that it scans the string in reverse, starting at rev_start. */
631
632
char *mbrevstrcasestr(const char *haystack, const char *needle, const
	char *rev_start)
633
{
634
#ifdef ENABLE_UTF8
635
    if (use_utf8) {
636
	size_t rev_start_len, needle_len;
637
638
639

	assert(haystack != NULL && needle != NULL && rev_start != NULL);

David Lawrence Ramsey's avatar
David Lawrence Ramsey committed
640
	if (*needle == '\0')
641
	    return (char *)rev_start;
642

643
	needle_len = mbstrlen(needle);
644

645
646
	if (mbstrlen(haystack) < needle_len)
	    return NULL;
647

648
	rev_start_len = mbstrlen(rev_start);
649

650
	while (TRUE) {
651
652
653
	    if (rev_start_len >= needle_len &&
			mbstrncasecmp(rev_start, needle, needle_len) == 0 &&
			mblen(rev_start, MB_CUR_MAX) > 0)
654
		return (char *)rev_start;
655

656
	    /* If we've reached the head of the haystack, we found nothing. */
657
	    if (rev_start == haystack)
658
		return NULL;
659

660
661
662
	    rev_start = haystack + move_mbleft(haystack, rev_start - haystack);
	    rev_start_len++;
	}
663
664
665
666
    } else
#endif
	return revstrcasestr(haystack, needle, rev_start);
}
667
#endif /* !NANO_TINY */
668

669
670
671
672
673
674
/* This function is equivalent to strlen() for multibyte strings. */
size_t mbstrlen(const char *s)
{
    return mbstrnlen(s, (size_t)-1);
}

675
676
677
678
679
680
681
682
#ifndef HAVE_STRNLEN
/* This function is equivalent to strnlen(). */
size_t nstrnlen(const char *s, size_t maxlen)
{
    size_t n = 0;

    assert(s != NULL);

David Lawrence Ramsey's avatar
David Lawrence Ramsey committed
683
    for (; *s != '\0' && maxlen > 0; s++, maxlen--, n++)
684
685
686
687
688
689
690
691
692
693
694
	;

    return n;
}
#endif

/* This function is equivalent to strnlen() for multibyte strings. */
size_t mbstrnlen(const char *s, size_t maxlen)
{
    assert(s != NULL);

695
#ifdef ENABLE_UTF8
696
    if (use_utf8) {
697
698
	size_t n = 0;

699
700
701
	for (; *s != '\0' && maxlen > 0; s += move_mbright(s, 0),
		maxlen--, n++)
	    ;
702

703
	return n;
704
705
    } else
#endif
706
	return strnlen(s, maxlen);
707
}
708

709
#if !defined(NANO_TINY) || !defined(DISABLE_JUSTIFY)
710
/* This function is equivalent to strchr() for multibyte strings. */
711
char *mbstrchr(const char *s, const char *c)
712
713
714
715
{
    assert(s != NULL && c != NULL);

#ifdef ENABLE_UTF8
716
    if (use_utf8) {
David Lawrence Ramsey's avatar
David Lawrence Ramsey committed
717
	bool bad_s_mb = FALSE, bad_c_mb = FALSE;
718
719
720
721
	char *s_mb = charalloc(MB_CUR_MAX);
	const char *q = s;
	wchar_t ws, wc;

Benno Schulenberg's avatar
Benno Schulenberg committed
722
	if (mbtowc(&wc, c, MB_CUR_MAX) < 0) {
723
	    mbtowc_reset();
724
725
726
727
728
	    wc = (unsigned char)*c;
	    bad_c_mb = TRUE;
	}

	while (*s != '\0') {
729
	    int s_mb_len = parse_mbchar(s, s_mb, NULL);
730

731
	    if (mbtowc(&ws, s_mb, s_mb_len) < 0) {
732
		mbtowc_reset();
733
734
735
736
737
738
739
740
741
742
743
744
745
		ws = (unsigned char)*s;
		bad_s_mb = TRUE;
	    }

	    if (bad_s_mb == bad_c_mb && ws == wc)
		break;

	    s += s_mb_len;
	    q += s_mb_len;
	}

	free(s_mb);

746
	if (*s == '\0')
747
748
749
750
751
	    q = NULL;

	return (char *)q;
    } else
#endif
752
	return (char *) strchr(s, *c);
753
}
754
#endif /* !NANO_TINY || !DISABLE_JUSTIFY */
755

756
757
758
759
760
761
762
#ifndef NANO_TINY
/* This function is equivalent to strpbrk() for multibyte strings. */
char *mbstrpbrk(const char *s, const char *accept)
{
    assert(s != NULL && accept != NULL);

#ifdef ENABLE_UTF8
763
    if (use_utf8) {
764
	for (; *s != '\0'; s += move_mbright(s, 0)) {
765
766
767
768
769
770
771
	    if (mbstrchr(accept, s) != NULL)
		return (char *)s;
	}

	return NULL;
    } else
#endif
772
	return (char *) strpbrk(s, accept);
773
774
775
776
777
778
779
780
781
}

/* This function is equivalent to strpbrk(), except in that it scans the
 * string in reverse, starting at rev_start. */
char *revstrpbrk(const char *s, const char *accept, const char
	*rev_start)
{
    assert(s != NULL && accept != NULL && rev_start != NULL);

782
783
784
785
786
    if (*rev_start == '\0') {
	if (rev_start == s)
	   return NULL;
	rev_start--;
    }
787

788
789
    for (; rev_start >= s; rev_start--) {
	if (strchr(accept, *rev_start) != NULL)
790
791
792
793
794
795
796
	    return (char *)rev_start;
    }

    return NULL;
}

/* This function is equivalent to strpbrk() for multibyte strings,
797
 * except in that it scans the string in reverse, starting at rev_start. */
798
799
800
801
802
803
char *mbrevstrpbrk(const char *s, const char *accept, const char
	*rev_start)
{
    assert(s != NULL && accept != NULL && rev_start != NULL);

#ifdef ENABLE_UTF8
804
    if (use_utf8) {
805
806
807
808
809
	if (*rev_start == '\0') {
	    if (rev_start == s)
		return NULL;
	    rev_start = s + move_mbleft(s, rev_start - s);
	}
810

811
812
	while (TRUE) {
	    if (mbstrchr(accept, rev_start) != NULL)
813
814
		return (char *)rev_start;

815
	    /* If we've reached the head of the string, we found nothing. */
816
	    if (rev_start == s)
817
		return NULL;
818

819
820
	    rev_start = s + move_mbleft(s, rev_start - s);
	}
821
822
823
824
825
826
    } else
#endif
	return revstrpbrk(s, accept, rev_start);
}
#endif /* !NANO_TINY */

827
#if !defined(DISABLE_NANORC) && (!defined(NANO_TINY) || !defined(DISABLE_JUSTIFY))
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
/* Return TRUE if the string s contains one or more blank characters,
 * and FALSE otherwise. */
bool has_blank_chars(const char *s)
{
    assert(s != NULL);

    for (; *s != '\0'; s++) {
	if (isblank(*s))
	    return TRUE;
    }

    return FALSE;
}

/* Return TRUE if the multibyte string s contains one or more blank
 * multibyte characters, and FALSE otherwise. */
bool has_blank_mbchars(const char *s)
{
846
    assert(s != NULL);
847

848
#ifdef ENABLE_UTF8
849
    if (use_utf8) {
850
	bool retval = FALSE;
851
	char *chr_mb = charalloc(MB_CUR_MAX);
852

853
854
	for (; *s != '\0'; s += move_mbright(s, 0)) {
	    parse_mbchar(s, chr_mb, NULL);
855
856
857
858
859
860
861
862
863
864
865
866
867
868

	    if (is_blank_mbchar(chr_mb)) {
		retval = TRUE;
		break;
	    }
	}

	free(chr_mb);

	return retval;
    } else
#endif
	return has_blank_chars(s);
}
869
#endif /* !DISABLE_NANORC && (!NANO_TINY || !DISABLE_JUSTIFY) */
870

871
#ifdef ENABLE_UTF8
872
/* Return TRUE if wc is valid Unicode, and FALSE otherwise. */
873
874
bool is_valid_unicode(wchar_t wc)
{
875
    return ((0 <= wc && wc <= 0xD7FF) ||
876
877
878
		(0xE000 <= wc && wc <= 0xFDCF) ||
		(0xFDF0 <= wc && wc <= 0xFFFD) ||
		(0xFFFF < wc && wc <= 0x10FFFF && (wc & 0xFFFF) <= 0xFFFD));
879
880
881
}
#endif

882
#ifndef DISABLE_NANORC
883
884
885
886
887
888
/* Check if the string s is a valid multibyte string.  Return TRUE if it
 * is, and FALSE otherwise. */
bool is_valid_mbstring(const char *s)
{
    assert(s != NULL);

David Lawrence Ramsey's avatar
David Lawrence Ramsey committed
889
    return
890
#ifdef ENABLE_UTF8
891
	use_utf8 ? (mbstowcs(NULL, s, 0) != (size_t)-1) :
892
893
894
#endif
	TRUE;
}
895
#endif /* !DISABLE_NANORC */