"src/files.c" did not exist on "64f49c7a99f1a5f5b818f86b637e2e8e40f3cbd7"
chars.c 21.3 KB
Newer Older
1
2
3
/**************************************************************************
 *   chars.c                                                              *
 *                                                                        *
4
5
 *   Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009,  *
 *   2010, 2011, 2013, 2014 Free Software Foundation, Inc.                *
6
7
 *   This program is free software; you can redistribute it and/or modify *
 *   it under the terms of the GNU General Public License as published by *
8
 *   the Free Software Foundation; either version 3, or (at your option)  *
9
10
 *   any later version.                                                   *
 *                                                                        *
11
12
13
14
 *   This program is distributed in the hope that it will be useful, but  *
 *   WITHOUT ANY WARRANTY; without even the implied warranty of           *
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU    *
 *   General Public License for more details.                             *
15
16
17
 *                                                                        *
 *   You should have received a copy of the GNU General Public License    *
 *   along with this program; if not, write to the Free Software          *
18
19
 *   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA            *
 *   02110-1301, USA.                                                     *
20
21
22
 *                                                                        *
 **************************************************************************/

23
#include "proto.h"
24

25
#include <string.h>
26
27
#include <ctype.h>

28
#ifdef ENABLE_UTF8
29
#ifdef HAVE_WCHAR_H
30
31
#include <wchar.h>
#endif
32
#ifdef HAVE_WCTYPE_H
33
34
#include <wctype.h>
#endif
35

36
37
38
39
40
41
42
43
44
45
46
47
48
49
static bool use_utf8 = FALSE;
	/* Whether we've enabled UTF-8 support. */

/* Enable UTF-8 support. */
void utf8_init(void)
{
    use_utf8 = TRUE;
}

/* Is UTF-8 support enabled? */
bool using_utf8(void)
{
    return use_utf8;
}
Benno Schulenberg's avatar
Benno Schulenberg committed
50
#endif /* ENABLE_UTF8 */
51

52
/* Concatenate two allocated strings, and free the second. */
53
char *addstrings(char* str1, size_t len1, char* str2, size_t len2)
54
55
56
{
    str1 = charealloc(str1, len1 + len2 + 1);
    str1[len1] = '\0';
57

58
59
60
61
62
63
    strncat(&str1[len1], str2, len2);
    free(str2);

    return str1;
}

64
65
#ifndef HAVE_ISBLANK
/* This function is equivalent to isblank(). */
66
bool nisblank(int c)
67
{
68
    return isspace(c) && (c == '\t' || !is_cntrl_char(c));
69
}
70
#endif
71

72
#if !defined(HAVE_ISWBLANK) && defined(ENABLE_UTF8)
73
/* This function is equivalent to iswblank(). */
74
bool niswblank(wchar_t wc)
75
{
76
    return iswspace(wc) && (wc == '\t' || !is_cntrl_wchar(wc));
77
}
78
#endif
79

80
/* Return TRUE if the value of c is in byte range, and FALSE otherwise. */
81
82
83
84
85
bool is_byte(int c)
{
    return ((unsigned int)c == (unsigned char)c);
}

86
87
88
89
90
91
92
93
94
95
void mbtowc_reset(void)
{
    IGNORE_CALL_RESULT(mbtowc(NULL, NULL, 0));
}

void wctomb_reset(void)
{
    IGNORE_CALL_RESULT(wctomb(NULL, 0));
}

96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
/* This function is equivalent to isalpha() for multibyte characters. */
bool is_alpha_mbchar(const char *c)
{
    assert(c != NULL);

#ifdef ENABLE_UTF8
    if (use_utf8) {
	wchar_t wc;

	if (mbtowc(&wc, c, MB_CUR_MAX) < 0) {
	    mbtowc_reset();
	    return 0;
	}

	return iswalpha(wc);
    } else
#endif
	return isalpha((unsigned char)*c);
}

116
117
118
119
120
/* This function is equivalent to isalnum() for multibyte characters. */
bool is_alnum_mbchar(const char *c)
{
    assert(c != NULL);

121
#ifdef ENABLE_UTF8
122
    if (use_utf8) {
123
124
	wchar_t wc;

125
126
	if (mbtowc(&wc, c, MB_CUR_MAX) < 0) {
	    mbtowc_reset();
127
	    return 0;
128
	}
129
130
131
132
133
134
135

	return iswalnum(wc);
    } else
#endif
	return isalnum((unsigned char)*c);
}

136
137
138
139
140
/* This function is equivalent to isblank() for multibyte characters. */
bool is_blank_mbchar(const char *c)
{
    assert(c != NULL);

141
#ifdef ENABLE_UTF8
142
    if (use_utf8) {
143
144
	wchar_t wc;

145
146
	if (mbtowc(&wc, c, MB_CUR_MAX) < 0) {
	    mbtowc_reset();
147
	    return 0;
148
	}
149

150
	return iswblank(wc);
151
152
    } else
#endif
153
	return isblank((unsigned char)*c);
154
155
}

156
157
158
159
160
161
162
/* This function is equivalent to iscntrl(), except in that it only
 * handles non-high-bit control characters. */
bool is_ascii_cntrl_char(int c)
{
    return (0 <= c && c < 32);
}

163
/* This function is equivalent to iscntrl(), except in that it also
David Lawrence Ramsey's avatar
David Lawrence Ramsey committed
164
 * handles high-bit control characters. */
165
bool is_cntrl_char(int c)
166
{
167
    return ((c & 0x60) == 0 || c == 127);
168
169
170
171
172
173
174
175
176
}

/* This function is equivalent to iscntrl() for multibyte characters,
 * except in that it also handles multibyte control characters with
 * their high bits set. */
bool is_cntrl_mbchar(const char *c)
{
    assert(c != NULL);

177
#ifdef ENABLE_UTF8
178
    if (use_utf8) {
179
180
	return ((c[0] & 0xE0) == 0 || c[0] == 127 ||
		((signed char)c[0] == -62 && (signed char)c[1] < -96));
181
182
183
184
185
    } else
#endif
	return is_cntrl_char((unsigned char)*c);
}

186
187
/* This function is equivalent to ispunct() for multibyte characters. */
bool is_punct_mbchar(const char *c)
188
189
190
{
    assert(c != NULL);

191
#ifdef ENABLE_UTF8
192
    if (use_utf8) {
193
194
	wchar_t wc;

195
196
	if (mbtowc(&wc, c, MB_CUR_MAX) < 0) {
	    mbtowc_reset();
197
	    return 0;
198
	}
199

200
	return iswpunct(wc);
201
202
    } else
#endif
203
204
205
	return ispunct((unsigned char)*c);
}

206
207
208
/* Return TRUE when the given multibyte character c is a word-forming
 * character (that is: alphanumeric, or specified in wordchars, or
 * punctuation when allow_punct is TRUE), and FALSE otherwise. */
209
210
211
212
bool is_word_mbchar(const char *c, bool allow_punct)
{
    assert(c != NULL);

213
214
215
    if (*c == '\0')
	return FALSE;

216
217
218
219
    if (is_alnum_mbchar(c))
	return TRUE;

    if (word_chars != NULL && *word_chars != '\0') {
220
	bool wordforming;
221
222
223
224
	char *symbol = charalloc(MB_CUR_MAX + 1);
	int symlen = parse_mbchar(c, symbol, NULL);

	symbol[symlen] = '\0';
225
226
	wordforming = (strstr(word_chars, symbol) != NULL);
	free(symbol);
227

228
	return wordforming;
229
230
231
    }

    return (allow_punct && is_punct_mbchar(c));
232
233
}

234
/* Return the visible representation of control character c. */
235
char control_rep(const signed char c)
236
{
237
238
    assert(is_cntrl_char(c));

239
    /* An embedded newline is an encoded null. */
240
241
    if (c == '\n')
	return '@';
242
    else if (c == DEL_CODE)
243
	return '?';
244
245
246
247
    else if (c == -97)
	return '=';
    else if (c < 0)
	return c + 224;
248
249
250
251
    else
	return c + 64;
}

252
253
/* Return the visible representation of multibyte control character c. */
char control_mbrep(const char *c)
254
{
255
    assert(c != NULL);
256

257
#ifdef ENABLE_UTF8
258
    if (use_utf8) {
259
	if (0 <= c[0] && c[0] <= 127)
260
	    return control_rep(c[0]);
261
	else
262
	    return control_rep(c[1]);
263
    } else
264
#endif
265
	return control_rep(*c);
266
267
}

268
269
/* Assess how many bytes the given (multibyte) character occupies.  Return -1
 * if the byte sequence is invalid, and return the number of bytes minus 8
270
271
272
 * when it encodes an invalid codepoint.  Also, in the second parameter,
 * return the number of columns that the character occupies. */
int length_of_char(const char *c, int *width)
273
{
274
    assert(c != NULL);
275
276

#ifdef ENABLE_UTF8
277
    if (use_utf8) {
278
	wchar_t wc;
279
	int charlen = mbtowc(&wc, c, MB_CUR_MAX);
280

281
282
	/* If the sequence is invalid... */
	if (charlen < 0) {
283
	    mbtowc_reset();
284
	    return -1;
285
	}
286
287
288
289

	/* If the codepoint is invalid... */
	if (!is_valid_unicode(wc))
	    return charlen - 8;
290
291
292
293
294
	else {
	    *width = wcwidth(wc);
	    /* If the codepoint is unassigned, assume a width of one. */
	    if (*width < 0)
		*width = 1;
295
	    return charlen;
296
	}
297
    } else
298
#endif
299
	return 1;
300
301
}

302
303
304
305
306
/* This function is equivalent to wcwidth() for multibyte characters. */
int mbwidth(const char *c)
{
    assert(c != NULL);

307
#ifdef ENABLE_UTF8
308
    if (use_utf8) {
309
	wchar_t wc;
310
	int width;
311

312
313
	if (mbtowc(&wc, c, MB_CUR_MAX) < 0) {
	    mbtowc_reset();
314
	    return 1;
315
	}
316
317

	width = wcwidth(wc);
David Lawrence Ramsey's avatar
David Lawrence Ramsey committed
318

319
320
	if (width == -1)
	    return 1;
321
322
323
324
325
326
327
328
329
330

	return width;
    } else
#endif
	return 1;
}

/* Return the maximum width in bytes of a multibyte character. */
int mb_cur_max(void)
{
331
    return
332
#ifdef ENABLE_UTF8
333
	use_utf8 ? MB_CUR_MAX :
334
#endif
335
	1;
336
337
}

338
339
/* Convert the Unicode value in chr to a multibyte character with the
 * same wide character value as chr, if possible.  If the conversion
340
341
342
 * succeeds, return the (dynamically allocated) multibyte character and
 * its length.  Otherwise, return an undefined (dynamically allocated)
 * multibyte character and a length of zero. */
343
char *make_mbchar(long chr, int *chr_mb_len)
344
{
345
346
    char *chr_mb;

347
    assert(chr_mb_len != NULL);
348

349
#ifdef ENABLE_UTF8
350
    if (use_utf8) {
351
	chr_mb = charalloc(MB_CUR_MAX);
352
	*chr_mb_len = wctomb(chr_mb, (wchar_t)chr);
353

354
	/* Reject invalid Unicode characters. */
355
356
	if (*chr_mb_len < 0 || !is_valid_unicode((wchar_t)chr)) {
	    wctomb_reset();
357
	    *chr_mb_len = 0;
358
	}
359
    } else
360
#endif
361
    {
362
	*chr_mb_len = 1;
363
	chr_mb = mallocstrncpy(NULL, (char *)&chr, 1);
364
365
366
367
368
369
370
    }

    return chr_mb;
}

/* Parse a multibyte character from buf.  Return the number of bytes
 * used.  If chr isn't NULL, store the multibyte character in it.  If
371
372
373
 * col isn't NULL, store the new display width in it.  If *buf is '\t',
 * we expect col to have the current display width. */
int parse_mbchar(const char *buf, char *chr, size_t *col)
374
375
376
377
378
{
    int buf_mb_len;

    assert(buf != NULL);

379
#ifdef ENABLE_UTF8
380
    if (use_utf8) {
381
382
383
	/* Get the number of bytes in the multibyte character. */
	buf_mb_len = mblen(buf, MB_CUR_MAX);

384
	/* When the multibyte sequence is invalid, only take the first byte. */
385
	if (buf_mb_len < 0) {
386
	    IGNORE_CALL_RESULT(mblen(NULL, 0));
387
	    buf_mb_len = 1;
388
389
	} else if (buf_mb_len == 0)
	    buf_mb_len++;
390

391
	/* When requested, store the multibyte character in chr. */
392
393
	if (chr != NULL) {
	    int i;
394

395
396
397
398
	    for (i = 0; i < buf_mb_len; i++)
		chr[i] = buf[i];
	}

399
	/* When requested, store the width of the wide character in col. */
400
401
402
403
404
	if (col != NULL) {
	    /* If we have a tab, get its width in columns using the
	     * current value of col. */
	    if (*buf == '\t')
		*col += tabsize - *col % tabsize;
405
406
	    /* If we have a control character, it's two columns wide: one
	     * column for the "^", and one for the visible character. */
407
	    else if (is_cntrl_mbchar(buf)) {
408
		*col += 2;
409
410
411
412
413
	    /* If we have a normal character, get its width in columns
	     * normally. */
	    } else
		*col += mbwidth(buf);
	}
414
    } else
415
#endif
416
    {
417
	/* A byte character is one byte long. */
418
419
	buf_mb_len = 1;

420
	/* When requested, store the byte character in chr. */
421
422
423
	if (chr != NULL)
	    *chr = *buf;

424
	/* When requested, store the width of the wide character in col. */
425
426
427
428
429
	if (col != NULL) {
	    /* If we have a tab, get its width in columns using the
	     * current value of col. */
	    if (*buf == '\t')
		*col += tabsize - *col % tabsize;
430
431
	    /* If we have a control character, it's two columns wide: one
	     * column for the "^", and one for the visible character. */
432
433
434
435
436
437
438
439
440
441
	    else if (is_cntrl_char((unsigned char)*buf))
		*col += 2;
	    /* If we have a normal character, it's one column wide. */
	    else
		(*col)++;
	}
    }

    return buf_mb_len;
}
442
443
444
445
446

/* Return the index in buf of the beginning of the multibyte character
 * before the one at pos. */
size_t move_mbleft(const char *buf, size_t pos)
{
447
    size_t before, char_len = 0;
448

449
    assert(buf != NULL && pos <= strlen(buf));
450
451

    /* There is no library function to move backward one multibyte
452
453
454
455
456
457
458
     * character.  So we just start groping for one at the farthest
     * possible point. */
    if (mb_cur_max() > pos)
	before = 0;
    else
	before = pos - mb_cur_max();

459
460
461
    while (before < pos) {
	char_len = parse_mbchar(buf + before, NULL, NULL);
	before += char_len;
462
463
    }

464
    return before - char_len;
465
466
467
468
469
470
}

/* Return the index in buf of the beginning of the multibyte character
 * after the one at pos. */
size_t move_mbright(const char *buf, size_t pos)
{
471
    return pos + parse_mbchar(buf + pos, NULL, NULL);
472
}
473
474
475
476
477

#ifndef HAVE_STRCASECMP
/* This function is equivalent to strcasecmp(). */
int nstrcasecmp(const char *s1, const char *s2)
{
478
    return strncasecmp(s1, s2, HIGHEST_POSITIVE);
479
480
481
482
483
484
}
#endif

/* This function is equivalent to strcasecmp() for multibyte strings. */
int mbstrcasecmp(const char *s1, const char *s2)
{
485
    return mbstrncasecmp(s1, s2, HIGHEST_POSITIVE);
486
487
488
489
490
491
}

#ifndef HAVE_STRNCASECMP
/* This function is equivalent to strncasecmp(). */
int nstrncasecmp(const char *s1, const char *s2, size_t n)
{
492
493
494
    if (s1 == s2)
	return 0;

495
496
    assert(s1 != NULL && s2 != NULL);

David Lawrence Ramsey's avatar
David Lawrence Ramsey committed
497
    for (; *s1 != '\0' && *s2 != '\0' && n > 0; s1++, s2++, n--) {
498
499
500
501
	if (tolower(*s1) != tolower(*s2))
	    break;
    }

502
    return (n > 0) ? tolower(*s1) - tolower(*s2) : 0;
503
504
505
}
#endif

506
/* This function is equivalent to strncasecmp() for multibyte strings. */
507
508
int mbstrncasecmp(const char *s1, const char *s2, size_t n)
{
509
#ifdef ENABLE_UTF8
510
    if (use_utf8) {
511
	wchar_t wc1, wc2;
512

David Lawrence Ramsey's avatar
David Lawrence Ramsey committed
513
514
	assert(s1 != NULL && s2 != NULL);

515
	while (*s1 != '\0' && *s2 != '\0' && n > 0) {
516
	    bool bad1 = FALSE, bad2 = FALSE;
517

518
	    if (mbtowc(&wc1, s1, MB_CUR_MAX) < 0) {
519
		mbtowc_reset();
520
		bad1 = TRUE;
521
522
	    }

523
	    if (mbtowc(&wc2, s2, MB_CUR_MAX) < 0) {
524
		mbtowc_reset();
525
		bad2 = TRUE;
526
527
	    }

528
529
530
531
	    if (bad1 || bad2) {
		if (*s1 != *s2)
		    return (unsigned char)*s1 - (unsigned char)*s2;

532
533
		if (bad1 != bad2)
		    return (bad1 ? 1 : -1);
534
535
	    } else {
		int difference = towlower(wc1) - towlower(wc2);
536

537
538
		if (difference != 0)
		    return difference;
539
540
	    }

541
542
543
	    s1 += move_mbright(s1, 0);
	    s2 += move_mbright(s2, 0);
	    n--;
544
545
	}

546
	return (n > 0) ? ((unsigned char)*s1 - (unsigned char)*s2) : 0;
547
548
    } else
#endif
549
	return strncasecmp(s1, s2, n);
550
551
552
}

#ifndef HAVE_STRCASESTR
553
/* This function is equivalent to strcasestr(). */
554
char *nstrcasestr(const char *haystack, const char *needle)
555
{
556
    size_t needle_len;
557

558
559
    assert(haystack != NULL && needle != NULL);

David Lawrence Ramsey's avatar
David Lawrence Ramsey committed
560
    if (*needle == '\0')
561
	return (char *)haystack;
562

563
    needle_len = strlen(needle);
564

565
    while (*haystack != '\0') {
566
	if (strncasecmp(haystack, needle, needle_len) == 0)
567
	    return (char *)haystack;
568
569

	haystack++;
570
571
572
573
574
575
    }

    return NULL;
}
#endif

576
/* This function is equivalent to strcasestr() for multibyte strings. */
577
char *mbstrcasestr(const char *haystack, const char *needle)
578
{
579
#ifdef ENABLE_UTF8
580
    if (use_utf8) {
581
	size_t needle_len;
582
583
584

	assert(haystack != NULL && needle != NULL);

David Lawrence Ramsey's avatar
David Lawrence Ramsey committed
585
	if (*needle == '\0')
586
	    return (char *)haystack;
587

588
	needle_len = mbstrlen(needle);
589

590
	while (*haystack != '\0') {
591
	    if (mbstrncasecmp(haystack, needle, needle_len) == 0)
592
		return (char *)haystack;
593
594

	    haystack += move_mbright(haystack, 0);
595
596
	}

597
	return NULL;
598
599
    } else
#endif
600
	return (char *) strcasestr(haystack, needle);
601
602
}

603
#if !defined(NANO_TINY) || !defined(DISABLE_TABCOMP)
604
/* This function is equivalent to strstr(), except in that it scans the
605
 * string in reverse, starting at rev_start. */
606
607
char *revstrstr(const char *haystack, const char *needle, const char
	*rev_start)
608
{
609
610
    size_t rev_start_len, needle_len;

611
612
    assert(haystack != NULL && needle != NULL && rev_start != NULL);

David Lawrence Ramsey's avatar
David Lawrence Ramsey committed
613
    if (*needle == '\0')
614
	return (char *)rev_start;
615

616
    needle_len = strlen(needle);
617

618
619
    if (strlen(haystack) < needle_len)
	return NULL;
620

621
622
623
624
625
    rev_start_len = strlen(rev_start);

    for (; rev_start >= haystack; rev_start--, rev_start_len++) {
	if (rev_start_len >= needle_len && strncmp(rev_start, needle,
		needle_len) == 0)
626
	    return (char *)rev_start;
627
628
629
630
    }

    return NULL;
}
631
#endif /* !NANO_TINY || !DISABLE_TABCOMP */
632

633
#ifndef NANO_TINY
634
/* This function is equivalent to strcasestr(), except in that it scans
635
 * the string in reverse, starting at rev_start. */
636
637
char *revstrcasestr(const char *haystack, const char *needle, const char
	*rev_start)
638
{
639
640
    size_t rev_start_len, needle_len;

641
642
    assert(haystack != NULL && needle != NULL && rev_start != NULL);

David Lawrence Ramsey's avatar
David Lawrence Ramsey committed
643
    if (*needle == '\0')
644
	return (char *)rev_start;
645

646
647
648
649
    needle_len = strlen(needle);

    if (strlen(haystack) < needle_len)
	return NULL;
650

651
    rev_start_len = strlen(rev_start);
652

653
654
655
    for (; rev_start >= haystack; rev_start--, rev_start_len++) {
	if (rev_start_len >= needle_len && strncasecmp(rev_start,
		needle, needle_len) == 0)
656
	    return (char *)rev_start;
657
658
659
660
    }

    return NULL;
}
661
662

/* This function is equivalent to strcasestr() for multibyte strings,
663
 * except in that it scans the string in reverse, starting at rev_start. */
664
665
char *mbrevstrcasestr(const char *haystack, const char *needle, const
	char *rev_start)
666
{
667
#ifdef ENABLE_UTF8
668
    if (use_utf8) {
669
	size_t rev_start_len, needle_len;
670
671
672

	assert(haystack != NULL && needle != NULL && rev_start != NULL);

David Lawrence Ramsey's avatar
David Lawrence Ramsey committed
673
	if (*needle == '\0')
674
	    return (char *)rev_start;
675

676
	needle_len = mbstrlen(needle);
677

678
679
	if (mbstrlen(haystack) < needle_len)
	    return NULL;
680

681
	rev_start_len = mbstrlen(rev_start);
682

683
	while (TRUE) {
684
	    if (rev_start_len >= needle_len &&
685
			mbstrncasecmp(rev_start, needle, needle_len) == 0)
686
		return (char *)rev_start;
687

688
	    /* If we've reached the head of the haystack, we found nothing. */
689
	    if (rev_start == haystack)
690
		return NULL;
691

692
693
694
	    rev_start = haystack + move_mbleft(haystack, rev_start - haystack);
	    rev_start_len++;
	}
695
696
697
698
    } else
#endif
	return revstrcasestr(haystack, needle, rev_start);
}
699
#endif /* !NANO_TINY */
700

701
702
703
704
705
706
/* This function is equivalent to strlen() for multibyte strings. */
size_t mbstrlen(const char *s)
{
    return mbstrnlen(s, (size_t)-1);
}

707
708
709
710
711
712
713
714
#ifndef HAVE_STRNLEN
/* This function is equivalent to strnlen(). */
size_t nstrnlen(const char *s, size_t maxlen)
{
    size_t n = 0;

    assert(s != NULL);

David Lawrence Ramsey's avatar
David Lawrence Ramsey committed
715
    for (; *s != '\0' && maxlen > 0; s++, maxlen--, n++)
716
717
718
719
720
721
722
723
724
725
726
	;

    return n;
}
#endif

/* This function is equivalent to strnlen() for multibyte strings. */
size_t mbstrnlen(const char *s, size_t maxlen)
{
    assert(s != NULL);

727
#ifdef ENABLE_UTF8
728
    if (use_utf8) {
729
730
	size_t n = 0;

731
732
733
	for (; *s != '\0' && maxlen > 0; s += move_mbright(s, 0),
		maxlen--, n++)
	    ;
734

735
	return n;
736
737
    } else
#endif
738
	return strnlen(s, maxlen);
739
}
740

741
#if !defined(NANO_TINY) || !defined(DISABLE_JUSTIFY)
742
/* This function is equivalent to strchr() for multibyte strings. */
743
char *mbstrchr(const char *s, const char *c)
744
745
746
747
{
    assert(s != NULL && c != NULL);

#ifdef ENABLE_UTF8
748
    if (use_utf8) {
David Lawrence Ramsey's avatar
David Lawrence Ramsey committed
749
	bool bad_s_mb = FALSE, bad_c_mb = FALSE;
750
751
752
753
	char *s_mb = charalloc(MB_CUR_MAX);
	const char *q = s;
	wchar_t ws, wc;

Benno Schulenberg's avatar
Benno Schulenberg committed
754
	if (mbtowc(&wc, c, MB_CUR_MAX) < 0) {
755
	    mbtowc_reset();
756
757
758
759
760
	    wc = (unsigned char)*c;
	    bad_c_mb = TRUE;
	}

	while (*s != '\0') {
761
	    int s_mb_len = parse_mbchar(s, s_mb, NULL);
762

763
	    if (mbtowc(&ws, s_mb, s_mb_len) < 0) {
764
		mbtowc_reset();
765
766
767
768
769
770
771
772
773
774
775
776
777
		ws = (unsigned char)*s;
		bad_s_mb = TRUE;
	    }

	    if (bad_s_mb == bad_c_mb && ws == wc)
		break;

	    s += s_mb_len;
	    q += s_mb_len;
	}

	free(s_mb);

778
	if (*s == '\0')
779
780
781
782
783
	    q = NULL;

	return (char *)q;
    } else
#endif
784
	return (char *) strchr(s, *c);
785
}
786
#endif /* !NANO_TINY || !DISABLE_JUSTIFY */
787

788
789
790
791
792
793
794
#ifndef NANO_TINY
/* This function is equivalent to strpbrk() for multibyte strings. */
char *mbstrpbrk(const char *s, const char *accept)
{
    assert(s != NULL && accept != NULL);

#ifdef ENABLE_UTF8
795
    if (use_utf8) {
796
	for (; *s != '\0'; s += move_mbright(s, 0)) {
797
798
799
800
801
802
803
	    if (mbstrchr(accept, s) != NULL)
		return (char *)s;
	}

	return NULL;
    } else
#endif
804
	return (char *) strpbrk(s, accept);
805
806
807
808
809
810
811
812
813
}

/* This function is equivalent to strpbrk(), except in that it scans the
 * string in reverse, starting at rev_start. */
char *revstrpbrk(const char *s, const char *accept, const char
	*rev_start)
{
    assert(s != NULL && accept != NULL && rev_start != NULL);

814
815
816
817
818
    if (*rev_start == '\0') {
	if (rev_start == s)
	   return NULL;
	rev_start--;
    }
819

820
821
    for (; rev_start >= s; rev_start--) {
	if (strchr(accept, *rev_start) != NULL)
822
823
824
825
826
827
828
	    return (char *)rev_start;
    }

    return NULL;
}

/* This function is equivalent to strpbrk() for multibyte strings,
829
 * except in that it scans the string in reverse, starting at rev_start. */
830
831
832
833
834
835
char *mbrevstrpbrk(const char *s, const char *accept, const char
	*rev_start)
{
    assert(s != NULL && accept != NULL && rev_start != NULL);

#ifdef ENABLE_UTF8
836
    if (use_utf8) {
837
838
839
840
841
	if (*rev_start == '\0') {
	    if (rev_start == s)
		return NULL;
	    rev_start = s + move_mbleft(s, rev_start - s);
	}
842

843
844
	while (TRUE) {
	    if (mbstrchr(accept, rev_start) != NULL)
845
846
		return (char *)rev_start;

847
	    /* If we've reached the head of the string, we found nothing. */
848
	    if (rev_start == s)
849
		return NULL;
850

851
852
	    rev_start = s + move_mbleft(s, rev_start - s);
	}
853
854
855
856
857
858
    } else
#endif
	return revstrpbrk(s, accept, rev_start);
}
#endif /* !NANO_TINY */

859
#if !defined(DISABLE_NANORC) && (!defined(NANO_TINY) || !defined(DISABLE_JUSTIFY))
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
/* Return TRUE if the string s contains one or more blank characters,
 * and FALSE otherwise. */
bool has_blank_chars(const char *s)
{
    assert(s != NULL);

    for (; *s != '\0'; s++) {
	if (isblank(*s))
	    return TRUE;
    }

    return FALSE;
}

/* Return TRUE if the multibyte string s contains one or more blank
 * multibyte characters, and FALSE otherwise. */
bool has_blank_mbchars(const char *s)
{
878
    assert(s != NULL);
879

880
#ifdef ENABLE_UTF8
881
    if (use_utf8) {
882
	bool retval = FALSE;
883
	char *chr_mb = charalloc(MB_CUR_MAX);
884

885
886
	for (; *s != '\0'; s += move_mbright(s, 0)) {
	    parse_mbchar(s, chr_mb, NULL);
887
888
889
890
891
892
893
894
895
896
897
898
899
900

	    if (is_blank_mbchar(chr_mb)) {
		retval = TRUE;
		break;
	    }
	}

	free(chr_mb);

	return retval;
    } else
#endif
	return has_blank_chars(s);
}
901
#endif /* !DISABLE_NANORC && (!NANO_TINY || !DISABLE_JUSTIFY) */
902

903
#ifdef ENABLE_UTF8
904
/* Return TRUE if wc is valid Unicode, and FALSE otherwise. */
905
906
bool is_valid_unicode(wchar_t wc)
{
907
    return ((0 <= wc && wc <= 0xD7FF) ||
908
909
910
		(0xE000 <= wc && wc <= 0xFDCF) ||
		(0xFDF0 <= wc && wc <= 0xFFFD) ||
		(0xFFFF < wc && wc <= 0x10FFFF && (wc & 0xFFFF) <= 0xFFFD));
911
912
913
}
#endif

914
#ifndef DISABLE_NANORC
915
916
917
918
919
920
/* Check if the string s is a valid multibyte string.  Return TRUE if it
 * is, and FALSE otherwise. */
bool is_valid_mbstring(const char *s)
{
    assert(s != NULL);

David Lawrence Ramsey's avatar
David Lawrence Ramsey committed
921
    return
922
#ifdef ENABLE_UTF8
923
	use_utf8 ? (mbstowcs(NULL, s, 0) != (size_t)-1) :
924
925
926
#endif
	TRUE;
}
927
#endif /* !DISABLE_NANORC */