chars.c 19.2 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
/* $Id$ */
/**************************************************************************
 *   chars.c                                                              *
 *                                                                        *
 *   Copyright (C) 2005 Chris Allegretta                                  *
 *   This program is free software; you can redistribute it and/or modify *
 *   it under the terms of the GNU General Public License as published by *
 *   the Free Software Foundation; either version 2, or (at your option)  *
 *   any later version.                                                   *
 *                                                                        *
11
12
13
14
 *   This program is distributed in the hope that it will be useful, but  *
 *   WITHOUT ANY WARRANTY; without even the implied warranty of           *
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU    *
 *   General Public License for more details.                             *
15
16
17
 *                                                                        *
 *   You should have received a copy of the GNU General Public License    *
 *   along with this program; if not, write to the Free Software          *
18
19
 *   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA            *
 *   02110-1301, USA.                                                     *
20
21
22
23
24
25
26
27
 *                                                                        *
 **************************************************************************/

#ifdef HAVE_CONFIG_H
#include <config.h>
#endif

#include <stdlib.h>
28
#include <string.h>
29
30
31
32
#include <ctype.h>
#include <assert.h>
#include "proto.h"

33
#ifdef ENABLE_UTF8
34
#ifdef HAVE_WCHAR_H
35
36
#include <wchar.h>
#endif
37
#ifdef HAVE_WCTYPE_H
38
39
#include <wctype.h>
#endif
40
#endif
41

42
43
#ifndef HAVE_ISBLANK
/* This function is equivalent to isblank(). */
44
bool nisblank(int c)
45
{
46
    return isspace(c) && (c == '\t' || !is_cntrl_char(c));
47
}
48
#endif
49

50
#if !defined(HAVE_ISWBLANK) && defined(ENABLE_UTF8)
51
/* This function is equivalent to iswblank(). */
52
bool niswblank(wchar_t wc)
53
{
54
    return iswspace(wc) && (wc == '\t' || !is_cntrl_wchar(wc));
55
}
56
#endif
57

58
59
60
61
62
63
64
/* Return TRUE if the value of c is in byte range, and FALSE
 * otherwise. */
bool is_byte(int c)
{
    return ((unsigned int)c == (unsigned char)c);
}

65
66
67
68
69
/* This function is equivalent to isalnum() for multibyte characters. */
bool is_alnum_mbchar(const char *c)
{
    assert(c != NULL);

70
#ifdef ENABLE_UTF8
71
    if (ISSET(USE_UTF8)) {
72
73
74
75
76
77
78
79
80
81
82
83
84
85
	wchar_t wc;
	int c_mb_len = mbtowc(&wc, c, MB_CUR_MAX);

	if (c_mb_len <= 0) {
	    mbtowc(NULL, NULL, 0);
	    wc = (unsigned char)*c;
	}

	return iswalnum(wc);
    } else
#endif
	return isalnum((unsigned char)*c);
}

86
87
88
89
90
/* This function is equivalent to isblank() for multibyte characters. */
bool is_blank_mbchar(const char *c)
{
    assert(c != NULL);

91
#ifdef ENABLE_UTF8
92
    if (ISSET(USE_UTF8)) {
93
94
95
96
97
98
99
100
	wchar_t wc;
	int c_mb_len = mbtowc(&wc, c, MB_CUR_MAX);

	if (c_mb_len <= 0) {
	    mbtowc(NULL, NULL, 0);
	    wc = (unsigned char)*c;
	}

101
	return iswblank(wc);
102
103
    } else
#endif
104
	return isblank((unsigned char)*c);
105
106
107
108
}

/* This function is equivalent to iscntrl(), except in that it also
 * handles control characters with their high bits set. */
109
bool is_cntrl_char(int c)
110
{
111
112
    return (-128 <= c && c < -96) || (0 <= c && c < 32) ||
	(127 <= c && c < 160);
113
114
}

115
#ifdef ENABLE_UTF8
116
117
118
/* This function is equivalent to iscntrl() for wide characters, except
 * in that it also handles wide control characters with their high bits
 * set. */
119
bool is_cntrl_wchar(wchar_t wc)
120
{
121
    return (0 <= wc && wc < 32) || (127 <= wc && wc < 160);
122
123
124
}
#endif

125
126
127
128
129
130
131
/* This function is equivalent to iscntrl() for multibyte characters,
 * except in that it also handles multibyte control characters with
 * their high bits set. */
bool is_cntrl_mbchar(const char *c)
{
    assert(c != NULL);

132
#ifdef ENABLE_UTF8
133
    if (ISSET(USE_UTF8)) {
134
135
136
137
138
139
140
141
142
143
144
145
146
147
	wchar_t wc;
	int c_mb_len = mbtowc(&wc, c, MB_CUR_MAX);

	if (c_mb_len <= 0) {
	    mbtowc(NULL, NULL, 0);
	    wc = (unsigned char)*c;
	}

	return is_cntrl_wchar(wc);
    } else
#endif
	return is_cntrl_char((unsigned char)*c);
}

148
149
/* This function is equivalent to ispunct() for multibyte characters. */
bool is_punct_mbchar(const char *c)
150
151
152
{
    assert(c != NULL);

153
#ifdef ENABLE_UTF8
154
    if (ISSET(USE_UTF8)) {
155
156
157
158
159
160
161
162
	wchar_t wc;
	int c_mb_len = mbtowc(&wc, c, MB_CUR_MAX);

	if (c_mb_len <= 0) {
	    mbtowc(NULL, NULL, 0);
	    wc = (unsigned char)*c;
	}

163
	return iswpunct(wc);
164
165
    } else
#endif
166
167
168
169
	return ispunct((unsigned char)*c);
}

/* Return TRUE for a multibyte character found in a word (currently only
David Lawrence Ramsey's avatar
David Lawrence Ramsey committed
170
 * an alphanumeric or punctuation character, and only the latter if
171
172
173
174
175
176
177
 * allow_punct is TRUE) and FALSE otherwise. */
bool is_word_mbchar(const char *c, bool allow_punct)
{
    assert(c != NULL);

    return is_alnum_mbchar(c) || (allow_punct ? is_punct_mbchar(c) :
	FALSE);
178
179
}

David Lawrence Ramsey's avatar
David Lawrence Ramsey committed
180
/* c is a control character.  It displays as ^@, ^?, or ^[ch], where ch
181
 * is (c + 64).  We return that character. */
182
char control_rep(char c)
183
184
185
186
187
188
189
190
191
192
{
    /* Treat newlines embedded in a line as encoded nulls. */
    if (c == '\n')
	return '@';
    else if (c == NANO_CONTROL_8)
	return '?';
    else
	return c + 64;
}

193
#ifdef ENABLE_UTF8
194
/* c is a wide control character.  It displays as ^@, ^?, or ^[ch],
195
 * where ch is (c + 64).  We return that wide character. */
196
197
198
199
200
201
202
203
204
205
206
207
208
wchar_t control_wrep(wchar_t wc)
{
    /* Treat newlines embedded in a line as encoded nulls. */
    if (wc == '\n')
	return '@';
    else if (wc == NANO_CONTROL_8)
	return '?';
    else
	return wc + 64;
}
#endif

/* c is a multibyte control character.  It displays as ^@, ^?, or ^[ch],
209
 * where ch is (c + 64).  We return that multibyte character. */
210
211
char *control_mbrep(const char *c, char *crep, int *crep_len)
{
212
    assert(c != NULL && crep != NULL && crep_len != NULL);
213

214
#ifdef ENABLE_UTF8
215
    if (ISSET(USE_UTF8)) {
David Lawrence Ramsey's avatar
David Lawrence Ramsey committed
216
	wchar_t wc;
217

218
	if (mbtowc(&wc, c, MB_CUR_MAX) <= 0) {
219
	    mbtowc(NULL, NULL, 0);
220
	    wc = (unsigned char)*c;
221
222
	}

223
	*crep_len = wctomb(crep, control_wrep(wc));
224

David Lawrence Ramsey's avatar
David Lawrence Ramsey committed
225
	if (*crep_len <= 0) {
226
	    wctomb(NULL, 0);
David Lawrence Ramsey's avatar
David Lawrence Ramsey committed
227
	    *crep_len = 0;
228
229
230
231
	}
    } else {
#endif
	*crep_len = 1;
232
	*crep = control_rep(*c);
233
#ifdef ENABLE_UTF8
234
235
    }
#endif
236
237

    return crep;
238
239
240
241
242
243
244
}

/* This function is equivalent to wcwidth() for multibyte characters. */
int mbwidth(const char *c)
{
    assert(c != NULL);

245
#ifdef ENABLE_UTF8
246
    if (ISSET(USE_UTF8)) {
247
248
249
250
251
252
253
254
255
	wchar_t wc;
	int c_mb_len = mbtowc(&wc, c, MB_CUR_MAX), width;

	if (c_mb_len <= 0) {
	    mbtowc(NULL, NULL, 0);
	    wc = (unsigned char)*c;
	}

	width = wcwidth(wc);
David Lawrence Ramsey's avatar
David Lawrence Ramsey committed
256

257
258
259
260
261
262
263
264
265
266
267
268
	if (width == -1)
	    width++;

	return width;
    } else
#endif
	return 1;
}

/* Return the maximum width in bytes of a multibyte character. */
int mb_cur_max(void)
{
269
    return
270
#ifdef ENABLE_UTF8
271
	ISSET(USE_UTF8) ? MB_CUR_MAX :
272
#endif
273
	1;
274
275
276
}

/* Convert the value in chr to a multibyte character with the same
277
278
279
280
 * wide character value as chr, if possible.  If the conversion
 * succeeds, return the (dynamically allocated) multibyte character and
 * its length.  Otherwise, return an undefined (dynamically allocated)
 * multibyte character and a length of zero. */
281
char *make_mbchar(int chr, int *chr_mb_len)
282
{
283
284
    char *chr_mb;

285
    assert(chr_mb_len != NULL);
286

287
#ifdef ENABLE_UTF8
288
    if (ISSET(USE_UTF8)) {
289
	chr_mb = charalloc(MB_CUR_MAX);
290
291
292
	*chr_mb_len = wctomb(chr_mb, chr);

	if (*chr_mb_len <= 0) {
293
294
	    wctomb(NULL, 0);
	    *chr_mb_len = 0;
295
296
297
298
	}
    } else {
#endif
	*chr_mb_len = 1;
299
	chr_mb = mallocstrncpy(NULL, (char *)&chr, 1);
300
#ifdef ENABLE_UTF8
301
302
303
304
305
306
307
308
    }
#endif

    return chr_mb;
}

/* Parse a multibyte character from buf.  Return the number of bytes
 * used.  If chr isn't NULL, store the multibyte character in it.  If
309
310
311
312
313
 * bad_chr isn't NULL, set it to TRUE if we have a bad multibyte
 * character.  If col isn't NULL, store the new display width in it.  If
 * *str is '\t', we expect col to have the current display width. */
int parse_mbchar(const char *buf, char *chr, bool *bad_chr, size_t
	*col)
314
315
316
317
318
319
320
321
{
    int buf_mb_len;

    assert(buf != NULL);

    if (bad_chr != NULL)
	*bad_chr = FALSE;

322
#ifdef ENABLE_UTF8
323
    if (ISSET(USE_UTF8)) {
324
325
326
327
	/* Get the number of bytes in the multibyte character. */
	buf_mb_len = mblen(buf, MB_CUR_MAX);

	/* If buf contains a null byte or an invalid multibyte
328
329
	 * character, set bad_chr to TRUE (if it contains the latter)
	 * and interpret buf's first byte. */
330
331
	if (buf_mb_len <= 0) {
	    mblen(NULL, 0);
332
	    if (buf_mb_len < 0 && bad_chr != NULL)
333
		*bad_chr = TRUE;
334
	    buf_mb_len = 1;
335
336
337
338
339
	}

	/* Save the multibyte character in chr. */
	if (chr != NULL) {
	    int i;
340

341
342
343
344
345
346
347
348
349
350
351
352
353
	    for (i = 0; i < buf_mb_len; i++)
		chr[i] = buf[i];
	}

	/* Save the column width of the wide character in col. */
	if (col != NULL) {
	    /* If we have a tab, get its width in columns using the
	     * current value of col. */
	    if (*buf == '\t')
		*col += tabsize - *col % tabsize;
	    /* If we have a control character, get its width using one
	     * column for the "^" that will be displayed in front of it,
	     * and the width in columns of its visible equivalent as
David Lawrence Ramsey's avatar
David Lawrence Ramsey committed
354
	     * returned by control_mbrep(). */
355
	    else if (is_cntrl_mbchar(buf)) {
David Lawrence Ramsey's avatar
David Lawrence Ramsey committed
356
		char *ctrl_buf_mb = charalloc(MB_CUR_MAX);
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
		int ctrl_buf_mb_len;

		(*col)++;

		ctrl_buf_mb = control_mbrep(buf, ctrl_buf_mb,
			&ctrl_buf_mb_len);

		*col += mbwidth(ctrl_buf_mb);

		free(ctrl_buf_mb);
	    /* If we have a normal character, get its width in columns
	     * normally. */
	    } else
		*col += mbwidth(buf);
	}
    } else {
#endif
	/* Get the number of bytes in the byte character. */
	buf_mb_len = 1;

	/* Save the byte character in chr. */
	if (chr != NULL)
	    *chr = *buf;

	if (col != NULL) {
	    /* If we have a tab, get its width in columns using the
	     * current value of col. */
	    if (*buf == '\t')
		*col += tabsize - *col % tabsize;
	    /* If we have a control character, it's two columns wide:
	     * one column for the "^" that will be displayed in front of
	     * it, and one column for its visible equivalent as returned
David Lawrence Ramsey's avatar
David Lawrence Ramsey committed
389
	     * by control_mbrep(). */
390
391
392
393
394
395
	    else if (is_cntrl_char((unsigned char)*buf))
		*col += 2;
	    /* If we have a normal character, it's one column wide. */
	    else
		(*col)++;
	}
396
#ifdef ENABLE_UTF8
397
398
399
400
401
    }
#endif

    return buf_mb_len;
}
402
403
404
405
406
407
408

/* Return the index in buf of the beginning of the multibyte character
 * before the one at pos. */
size_t move_mbleft(const char *buf, size_t pos)
{
    size_t pos_prev = pos;

409
    assert(buf != NULL && pos <= strlen(buf));
410
411
412
413

    /* There is no library function to move backward one multibyte
     * character.  Here is the naive, O(pos) way to do it. */
    while (TRUE) {
414
415
	int buf_mb_len = parse_mbchar(buf + pos - pos_prev, NULL, NULL,
		NULL);
416

417
	if (pos_prev <= (size_t)buf_mb_len)
418
419
420
421
422
423
424
425
426
427
428
429
	    break;

	pos_prev -= buf_mb_len;
    }

    return pos - pos_prev;
}

/* Return the index in buf of the beginning of the multibyte character
 * after the one at pos. */
size_t move_mbright(const char *buf, size_t pos)
{
430
    return pos + parse_mbchar(buf + pos, NULL, NULL, NULL);
431
}
432
433
434
435
436

#ifndef HAVE_STRCASECMP
/* This function is equivalent to strcasecmp(). */
int nstrcasecmp(const char *s1, const char *s2)
{
437
    return strncasecmp(s1, s2, (size_t)-1);
438
439
440
441
442
443
}
#endif

/* This function is equivalent to strcasecmp() for multibyte strings. */
int mbstrcasecmp(const char *s1, const char *s2)
{
444
    return mbstrncasecmp(s1, s2, (size_t)-1);
445
446
447
448
449
450
451
452
453
454
455
456
457
458
}

#ifndef HAVE_STRNCASECMP
/* This function is equivalent to strncasecmp(). */
int nstrncasecmp(const char *s1, const char *s2, size_t n)
{
    assert(s1 != NULL && s2 != NULL);

    for (; n > 0 && *s1 != '\0' && *s2 != '\0'; n--, s1++, s2++) {
	if (tolower(*s1) != tolower(*s2))
	    break;
    }

    if (n > 0)
459
	return tolower(*s1) - tolower(*s2);
460
461
462
463
464
465
466
467
468
    else
	return 0;
}
#endif

/* This function is equivalent to strncasecmp() for multibyte
 * strings. */
int mbstrncasecmp(const char *s1, const char *s2, size_t n)
{
469
#ifdef ENABLE_UTF8
470
    if (ISSET(USE_UTF8)) {
471
472
	char *s1_mb = charalloc(MB_CUR_MAX);
	char *s2_mb = charalloc(MB_CUR_MAX);
473
474
	wchar_t ws1, ws2;

David Lawrence Ramsey's avatar
David Lawrence Ramsey committed
475
476
	assert(s1 != NULL && s2 != NULL);

477
	while (n > 0 && *s1 != '\0' && *s2 != '\0') {
478
479
480
	    int s1_mb_len, s2_mb_len;

	    s1_mb_len = parse_mbchar(s1, s1_mb, NULL, NULL);
481
482
483
484
485
486

	    if (mbtowc(&ws1, s1_mb, s1_mb_len) <= 0) {
		mbtowc(NULL, NULL, 0);
		ws1 = (unsigned char)*s1_mb;
	    }

487
	    s2_mb_len = parse_mbchar(s2, s2_mb, NULL, NULL);
488
489
490
491
492
493

	    if (mbtowc(&ws2, s2_mb, s2_mb_len) <= 0) {
		mbtowc(NULL, NULL, 0);
		ws2 = (unsigned char)*s2_mb;
	    }

494
	    if (n == 0 || towlower(ws1) != towlower(ws2))
495
496
497
498
		break;

	    s1 += s1_mb_len;
	    s2 += s2_mb_len;
499
	    n--;
500
501
502
503
504
	}

	free(s1_mb);
	free(s2_mb);

505
	return towlower(ws1) - towlower(ws2);
506
507
    } else
#endif
508
	return strncasecmp(s1, s2, n);
509
510
511
512
513
514
515
516
517
518
}

#ifndef HAVE_STRCASESTR
/* This function is equivalent to strcasestr().  It was adapted from
 * mutt's mutt_stristr() function. */
const char *nstrcasestr(const char *haystack, const char *needle)
{
    assert(haystack != NULL && needle != NULL);

    for (; *haystack != '\0'; haystack++) {
519
	const char *r = haystack, *q = needle;
520

521
	for (; tolower(*r) == tolower(*q) && *q != '\0'; r++, q++)
522
523
524
525
526
527
528
529
530
531
	    ;

	if (*q == '\0')
	    return haystack;
    }

    return NULL;
}
#endif

532
533
534
/* This function is equivalent to strcasestr() for multibyte strings. */
const char *mbstrcasestr(const char *haystack, const char *needle)
{
535
#ifdef ENABLE_UTF8
536
    if (ISSET(USE_UTF8)) {
537
	char *r_mb = charalloc(MB_CUR_MAX);
538
	char *q_mb = charalloc(MB_CUR_MAX);
539
	wchar_t wr, wq;
540
541
542
543
544
	bool found_needle = FALSE;

	assert(haystack != NULL && needle != NULL);

	while (*haystack != '\0') {
545
546
	    const char *r = haystack, *q = needle;
	    int r_mb_len, q_mb_len;
547
548

	    while (*q != '\0') {
549
		r_mb_len = parse_mbchar(r, r_mb, NULL, NULL);
550

551
		if (mbtowc(&wr, r_mb, r_mb_len) <= 0) {
552
		    mbtowc(NULL, NULL, 0);
553
		    wr = (unsigned char)*r;
554
555
556
557
558
559
560
561
562
		}

		q_mb_len = parse_mbchar(q, q_mb, NULL, NULL);

		if (mbtowc(&wq, q_mb, q_mb_len) <= 0) {
		    mbtowc(NULL, NULL, 0);
		    wq = (unsigned char)*q;
		}

563
		if (towlower(wr) != towlower(wq))
564
565
		    break;

566
		r += r_mb_len;
567
568
569
570
571
572
573
574
		q += q_mb_len;
	    }

	    if (*q == '\0') {
		found_needle = TRUE;
		break;
	    }

575
	    haystack += move_mbright(haystack, 0);
576
577
	}

578
	free(r_mb);
579
580
	free(q_mb);

581
	return found_needle ? haystack : NULL;
582
583
    } else
#endif
584
	return strcasestr(haystack, needle);
585
586
}

587
#if !defined(NANO_SMALL) || !defined(DISABLE_TABCOMP)
588
/* This function is equivalent to strstr(), except in that it scans the
589
 * string in reverse, starting at rev_start. */
590
591
592
593
594
595
596
597
const char *revstrstr(const char *haystack, const char *needle, const
	char *rev_start)
{
    assert(haystack != NULL && needle != NULL && rev_start != NULL);

    for (; rev_start >= haystack; rev_start--) {
	const char *r, *q;

598
	for (r = rev_start, q = needle; *r == *q && *q != '\0'; r++, q++)
599
600
601
602
603
604
605
606
	    ;

	if (*q == '\0')
	    return rev_start;
    }

    return NULL;
}
607
#endif /* !NANO_SMALL || !DISABLE_TABCOMP */
608

609
#ifndef NANO_SMALL
610
/* This function is equivalent to strcasestr(), except in that it scans
611
 * the string in reverse, starting at rev_start. */
612
613
614
615
616
617
618
619
const char *revstrcasestr(const char *haystack, const char *needle,
	const char *rev_start)
{
    assert(haystack != NULL && needle != NULL && rev_start != NULL);

    for (; rev_start >= haystack; rev_start--) {
	const char *r = rev_start, *q = needle;

620
	for (; tolower(*r) == tolower(*q) && *q != '\0'; r++, q++)
621
622
623
624
625
626
627
628
	    ;

	if (*q == '\0')
	    return rev_start;
    }

    return NULL;
}
629
630
631
632
633
634
635

/* This function is equivalent to strcasestr() for multibyte strings,
 * except in that it scans the string in reverse, starting at
 * rev_start. */
const char *mbrevstrcasestr(const char *haystack, const char *needle,
	const char *rev_start)
{
636
#ifdef ENABLE_UTF8
637
    if (ISSET(USE_UTF8)) {
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
	char *r_mb = charalloc(MB_CUR_MAX);
	char *q_mb = charalloc(MB_CUR_MAX);
	wchar_t wr, wq;
	bool begin_line = FALSE, found_needle = FALSE;

	assert(haystack != NULL && needle != NULL && rev_start != NULL);

	while (!begin_line) {
	    const char *r = rev_start, *q = needle;
	    int r_mb_len, q_mb_len;

	    while (*q != '\0') {
		r_mb_len = parse_mbchar(r, r_mb, NULL, NULL);

		if (mbtowc(&wr, r_mb, r_mb_len) <= 0) {
		    mbtowc(NULL, NULL, 0);
		    wr = (unsigned char)*r;
		}

		q_mb_len = parse_mbchar(q, q_mb, NULL, NULL);

		if (mbtowc(&wq, q_mb, q_mb_len) <= 0) {
		    mbtowc(NULL, NULL, 0);
		    wq = (unsigned char)*q;
		}

664
		if (towlower(wr) != towlower(wq))
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
		    break;

		r += r_mb_len;
		q += q_mb_len;
	    }

	    if (*q == '\0') {
		found_needle = TRUE;
		break;
	    }

	    if (rev_start == haystack)
		begin_line = TRUE;
	    else
		rev_start = haystack + move_mbleft(haystack, rev_start -
			haystack);
	}

	free(r_mb);
	free(q_mb);

686
	return found_needle ? rev_start : NULL;
687
688
689
690
    } else
#endif
	return revstrcasestr(haystack, needle, rev_start);
}
691
#endif /* !NANO_SMALL */
692

693
694
695
696
697
698
/* This function is equivalent to strlen() for multibyte strings. */
size_t mbstrlen(const char *s)
{
    return mbstrnlen(s, (size_t)-1);
}

699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
#ifndef HAVE_STRNLEN
/* This function is equivalent to strnlen(). */
size_t nstrnlen(const char *s, size_t maxlen)
{
    size_t n = 0;

    assert(s != NULL);

    for (; maxlen > 0 && *s != '\0'; maxlen--, n++, s++)
	;

    return n;
}
#endif

/* This function is equivalent to strnlen() for multibyte strings. */
size_t mbstrnlen(const char *s, size_t maxlen)
{
    assert(s != NULL);

719
#ifdef ENABLE_UTF8
720
    if (ISSET(USE_UTF8)) {
721
722
723
724
	size_t n = 0;
	int s_mb_len;

	while (*s != '\0') {
725
	    s_mb_len = parse_mbchar(s, NULL, NULL, NULL);
726

727
	    if (maxlen == 0)
728
729
		break;

730
	    maxlen--;
731
732
	    s += s_mb_len;
	    n++;
733
734
	}

735
	return n;
736
737
    } else
#endif
738
	return strnlen(s, maxlen);
739
}
740
741

#ifndef DISABLE_JUSTIFY
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
#ifdef ENABLE_NANORC
/* Return TRUE if the string s contains one or more blank characters,
 * and FALSE otherwise. */
bool has_blank_chars(const char *s)
{
    assert(s != NULL);

    for (; *s != '\0'; s++) {
	if (isblank(*s))
	    return TRUE;
    }

    return FALSE;
}

/* Return TRUE if the multibyte string s contains one or more blank
 * multibyte characters, and FALSE otherwise. */
bool has_blank_mbchars(const char *s)
{
761
    assert(s != NULL);
762

763
#ifdef ENABLE_UTF8
764
    if (ISSET(USE_UTF8)) {
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
	char *chr_mb = charalloc(MB_CUR_MAX);
	bool retval = FALSE;

	while (*s != '\0') {
	    int chr_mb_len;

	    chr_mb_len = parse_mbchar(s, chr_mb, NULL, NULL);

	    if (is_blank_mbchar(chr_mb)) {
		retval = TRUE;
		break;
	    }

	    s += chr_mb_len;
	}

	free(chr_mb);

	return retval;
    } else
#endif
	return has_blank_chars(s);
}
David Lawrence Ramsey's avatar
David Lawrence Ramsey committed
788
#endif /* ENABLE_NANORC */
789

790
791
792
793
794
/* This function is equivalent to strchr() for multibyte strings. */
char *mbstrchr(const char *s, char *c)
{
    assert(s != NULL && c != NULL);

795
#ifdef ENABLE_UTF8
796
    if (ISSET(USE_UTF8)) {
797
798
799
	char *s_mb = charalloc(MB_CUR_MAX);
	const char *q = s;
	wchar_t ws, wc;
David Lawrence Ramsey's avatar
David Lawrence Ramsey committed
800
	int c_mb_len = mbtowc(&wc, c, MB_CUR_MAX);
801
802
803
804
805
806
807

	if (c_mb_len <= 0) {
	    mbtowc(NULL, NULL, 0);
	    wc = (unsigned char)*c;
	}

	while (*s != '\0') {
David Lawrence Ramsey's avatar
David Lawrence Ramsey committed
808
	    int s_mb_len = parse_mbchar(s, s_mb, NULL, NULL);
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831

	    if (mbtowc(&ws, s_mb, s_mb_len) <= 0) {
		mbtowc(NULL, NULL, 0);
		ws = (unsigned char)*s;
	    }

	    if (ws == wc)
		break;

	    s += s_mb_len;
	    q += s_mb_len;
	}

	free(s_mb);

	if (ws != wc)
	    q = NULL;

	return (char *)q;
    } else
#endif
	return strchr(s, *c);
}
832
#endif /* !DISABLE_JUSTIFY */
833
834
835
836
837
838
839
840
841

#ifdef ENABLE_NANORC
/* Check if the string s is a valid multibyte string.  Return TRUE if it
 * is, and FALSE otherwise. */
bool is_valid_mbstring(const char *s)
{
    assert(s != NULL);

    return 
842
#ifdef ENABLE_UTF8
843
	ISSET(USE_UTF8) ?
844
	(mbstowcs(NULL, s, 0) != (size_t)-1) :
845
846
847
848
849
#endif

	TRUE;
}
#endif /* ENABLE_NANORC */