chars.c 19.5 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
/* $Id$ */
/**************************************************************************
 *   chars.c                                                              *
 *                                                                        *
 *   Copyright (C) 2005 Chris Allegretta                                  *
 *   This program is free software; you can redistribute it and/or modify *
 *   it under the terms of the GNU General Public License as published by *
 *   the Free Software Foundation; either version 2, or (at your option)  *
 *   any later version.                                                   *
 *                                                                        *
11
12
13
14
 *   This program is distributed in the hope that it will be useful, but  *
 *   WITHOUT ANY WARRANTY; without even the implied warranty of           *
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU    *
 *   General Public License for more details.                             *
15
16
17
 *                                                                        *
 *   You should have received a copy of the GNU General Public License    *
 *   along with this program; if not, write to the Free Software          *
18
19
 *   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA            *
 *   02110-1301, USA.                                                     *
20
21
22
23
24
25
26
27
 *                                                                        *
 **************************************************************************/

#ifdef HAVE_CONFIG_H
#include <config.h>
#endif

#include <stdlib.h>
28
#include <string.h>
29
30
31
32
#include <ctype.h>
#include <assert.h>
#include "proto.h"

33
34
#ifdef NANO_WIDE
#ifdef HAVE_WCHAR_H
35
36
#include <wchar.h>
#endif
37
#ifdef HAVE_WCTYPE_H
38
39
#include <wctype.h>
#endif
40
#endif
41

42
43
#ifndef HAVE_ISBLANK
/* This function is equivalent to isblank(). */
44
int nisblank(int c)
45
{
46
    return isspace(c) && (c == '\t' || !is_cntrl_char(c));
47
}
48
#endif
49

David Lawrence Ramsey's avatar
David Lawrence Ramsey committed
50
#if !defined(HAVE_ISWBLANK) && defined(NANO_WIDE)
51
/* This function is equivalent to iswblank(). */
52
int niswblank(wchar_t wc)
53
{
54
    return iswspace(wc) && (wc == '\t' || !is_cntrl_wchar(wc));
55
}
56
#endif
57

58
59
60
61
62
63
64
/* Return TRUE if the value of c is in byte range, and FALSE
 * otherwise. */
bool is_byte(int c)
{
    return ((unsigned int)c == (unsigned char)c);
}

65
66
67
68
69
70
/* This function is equivalent to isalnum() for multibyte characters. */
bool is_alnum_mbchar(const char *c)
{
    assert(c != NULL);

#ifdef NANO_WIDE
71
    if (ISSET(USE_UTF8)) {
72
73
74
75
76
77
78
79
80
81
82
83
84
85
	wchar_t wc;
	int c_mb_len = mbtowc(&wc, c, MB_CUR_MAX);

	if (c_mb_len <= 0) {
	    mbtowc(NULL, NULL, 0);
	    wc = (unsigned char)*c;
	}

	return iswalnum(wc);
    } else
#endif
	return isalnum((unsigned char)*c);
}

86
87
88
89
90
91
/* This function is equivalent to isblank() for multibyte characters. */
bool is_blank_mbchar(const char *c)
{
    assert(c != NULL);

#ifdef NANO_WIDE
92
    if (ISSET(USE_UTF8)) {
93
94
95
96
97
98
99
100
	wchar_t wc;
	int c_mb_len = mbtowc(&wc, c, MB_CUR_MAX);

	if (c_mb_len <= 0) {
	    mbtowc(NULL, NULL, 0);
	    wc = (unsigned char)*c;
	}

101
	return iswblank(wc);
102
103
    } else
#endif
104
	return isblank((unsigned char)*c);
105
106
107
108
}

/* This function is equivalent to iscntrl(), except in that it also
 * handles control characters with their high bits set. */
109
bool is_cntrl_char(int c)
110
{
111
112
    return (-128 <= c && c < -96) || (0 <= c && c < 32) ||
	(127 <= c && c < 160);
113
114
}

115
116
117
118
#ifdef NANO_WIDE
/* This function is equivalent to iscntrl() for wide characters, except
 * in that it also handles wide control characters with their high bits
 * set. */
119
bool is_cntrl_wchar(wchar_t wc)
120
{
121
    return (0 <= wc && wc < 32) || (127 <= wc && wc < 160);
122
123
124
}
#endif

125
126
127
128
129
130
131
132
/* This function is equivalent to iscntrl() for multibyte characters,
 * except in that it also handles multibyte control characters with
 * their high bits set. */
bool is_cntrl_mbchar(const char *c)
{
    assert(c != NULL);

#ifdef NANO_WIDE
133
    if (ISSET(USE_UTF8)) {
134
135
136
137
138
139
140
141
142
143
144
145
146
147
	wchar_t wc;
	int c_mb_len = mbtowc(&wc, c, MB_CUR_MAX);

	if (c_mb_len <= 0) {
	    mbtowc(NULL, NULL, 0);
	    wc = (unsigned char)*c;
	}

	return is_cntrl_wchar(wc);
    } else
#endif
	return is_cntrl_char((unsigned char)*c);
}

148
149
/* This function is equivalent to ispunct() for multibyte characters. */
bool is_punct_mbchar(const char *c)
150
151
152
153
{
    assert(c != NULL);

#ifdef NANO_WIDE
154
    if (ISSET(USE_UTF8)) {
155
156
157
158
159
160
161
162
	wchar_t wc;
	int c_mb_len = mbtowc(&wc, c, MB_CUR_MAX);

	if (c_mb_len <= 0) {
	    mbtowc(NULL, NULL, 0);
	    wc = (unsigned char)*c;
	}

163
	return iswpunct(wc);
164
165
    } else
#endif
166
167
168
169
	return ispunct((unsigned char)*c);
}

/* Return TRUE for a multibyte character found in a word (currently only
David Lawrence Ramsey's avatar
David Lawrence Ramsey committed
170
 * an alphanumeric or punctuation character, and only the latter if
171
172
173
174
175
176
177
 * allow_punct is TRUE) and FALSE otherwise. */
bool is_word_mbchar(const char *c, bool allow_punct)
{
    assert(c != NULL);

    return is_alnum_mbchar(c) || (allow_punct ? is_punct_mbchar(c) :
	FALSE);
178
179
}

David Lawrence Ramsey's avatar
David Lawrence Ramsey committed
180
/* c is a control character.  It displays as ^@, ^?, or ^[ch], where ch
181
 * is c + 64.  We return that character. */
182
char control_rep(char c)
183
184
185
186
187
188
189
190
191
192
{
    /* Treat newlines embedded in a line as encoded nulls. */
    if (c == '\n')
	return '@';
    else if (c == NANO_CONTROL_8)
	return '?';
    else
	return c + 64;
}

193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
#ifdef NANO_WIDE
/* c is a wide control character.  It displays as ^@, ^?, or ^[ch],
 * where ch is c + 64.  We return that wide character. */
wchar_t control_wrep(wchar_t wc)
{
    /* Treat newlines embedded in a line as encoded nulls. */
    if (wc == '\n')
	return '@';
    else if (wc == NANO_CONTROL_8)
	return '?';
    else
	return wc + 64;
}
#endif

/* c is a multibyte control character.  It displays as ^@, ^?, or ^[ch],
209
210
211
 * where ch is c + 64.  We return that multibyte character. */
char *control_mbrep(const char *c, char *crep, int *crep_len)
{
212
    assert(c != NULL && crep != NULL && crep_len != NULL);
213
214

#ifdef NANO_WIDE
215
    if (ISSET(USE_UTF8)) {
David Lawrence Ramsey's avatar
David Lawrence Ramsey committed
216
	wchar_t wc;
217

218
	if (mbtowc(&wc, c, MB_CUR_MAX) <= 0) {
219
	    mbtowc(NULL, NULL, 0);
220
	    wc = (unsigned char)*c;
221
222
	}

223
	*crep_len = wctomb(crep, control_wrep(wc));
224

David Lawrence Ramsey's avatar
David Lawrence Ramsey committed
225
	if (*crep_len <= 0) {
226
	    wctomb(NULL, 0);
David Lawrence Ramsey's avatar
David Lawrence Ramsey committed
227
	    *crep_len = 0;
228
229
230
231
	}
    } else {
#endif
	*crep_len = 1;
232
	*crep = control_rep(*c);
233
234
235
#ifdef NANO_WIDE
    }
#endif
236
237

    return crep;
238
239
240
241
242
243
244
245
}

/* This function is equivalent to wcwidth() for multibyte characters. */
int mbwidth(const char *c)
{
    assert(c != NULL);

#ifdef NANO_WIDE
246
    if (ISSET(USE_UTF8)) {
247
248
249
250
251
252
253
254
255
	wchar_t wc;
	int c_mb_len = mbtowc(&wc, c, MB_CUR_MAX), width;

	if (c_mb_len <= 0) {
	    mbtowc(NULL, NULL, 0);
	    wc = (unsigned char)*c;
	}

	width = wcwidth(wc);
David Lawrence Ramsey's avatar
David Lawrence Ramsey committed
256

257
258
259
260
261
262
263
264
265
266
267
268
	if (width == -1)
	    width++;

	return width;
    } else
#endif
	return 1;
}

/* Return the maximum width in bytes of a multibyte character. */
int mb_cur_max(void)
{
269
    return
270
#ifdef NANO_WIDE
271
	ISSET(USE_UTF8) ? MB_CUR_MAX :
272
#endif
273
	1;
274
275
276
}

/* Convert the value in chr to a multibyte character with the same
277
278
279
280
 * wide character value as chr, if possible.  If the conversion
 * succeeds, return the (dynamically allocated) multibyte character and
 * its length.  Otherwise, return an undefined (dynamically allocated)
 * multibyte character and a length of zero. */
281
char *make_mbchar(int chr, int *chr_mb_len)
282
{
283
284
    char *chr_mb;

285
    assert(chr_mb_len != NULL);
286

287
#ifdef NANO_WIDE
288
    if (ISSET(USE_UTF8)) {
289
	chr_mb = charalloc(MB_CUR_MAX);
290
291
292
	*chr_mb_len = wctomb(chr_mb, chr);

	if (*chr_mb_len <= 0) {
293
294
	    wctomb(NULL, 0);
	    *chr_mb_len = 0;
295
296
297
298
	}
    } else {
#endif
	*chr_mb_len = 1;
299
	chr_mb = mallocstrncpy(NULL, (char *)&chr, 1);
300
301
302
303
304
305
306
307
308
#ifdef NANO_WIDE
    }
#endif

    return chr_mb;
}

/* Parse a multibyte character from buf.  Return the number of bytes
 * used.  If chr isn't NULL, store the multibyte character in it.  If
309
310
311
312
313
 * bad_chr isn't NULL, set it to TRUE if we have a bad multibyte
 * character.  If col isn't NULL, store the new display width in it.  If
 * *str is '\t', we expect col to have the current display width. */
int parse_mbchar(const char *buf, char *chr, bool *bad_chr, size_t
	*col)
314
315
316
317
318
319
320
321
{
    int buf_mb_len;

    assert(buf != NULL);

    if (bad_chr != NULL)
	*bad_chr = FALSE;

322
#ifdef NANO_WIDE
323
    if (ISSET(USE_UTF8)) {
324
325
326
327
	/* Get the number of bytes in the multibyte character. */
	buf_mb_len = mblen(buf, MB_CUR_MAX);

	/* If buf contains a null byte or an invalid multibyte
328
329
	 * character, set bad_chr to TRUE (if it contains the latter)
	 * and interpret buf's first byte. */
330
331
	if (buf_mb_len <= 0) {
	    mblen(NULL, 0);
332
	    if (buf_mb_len < 0 && bad_chr != NULL)
333
		*bad_chr = TRUE;
334
	    buf_mb_len = 1;
335
336
337
338
339
	}

	/* Save the multibyte character in chr. */
	if (chr != NULL) {
	    int i;
340

341
342
343
344
345
346
347
348
349
350
351
352
353
	    for (i = 0; i < buf_mb_len; i++)
		chr[i] = buf[i];
	}

	/* Save the column width of the wide character in col. */
	if (col != NULL) {
	    /* If we have a tab, get its width in columns using the
	     * current value of col. */
	    if (*buf == '\t')
		*col += tabsize - *col % tabsize;
	    /* If we have a control character, get its width using one
	     * column for the "^" that will be displayed in front of it,
	     * and the width in columns of its visible equivalent as
David Lawrence Ramsey's avatar
David Lawrence Ramsey committed
354
	     * returned by control_mbrep(). */
355
	    else if (is_cntrl_mbchar(buf)) {
David Lawrence Ramsey's avatar
David Lawrence Ramsey committed
356
		char *ctrl_buf_mb = charalloc(MB_CUR_MAX);
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
		int ctrl_buf_mb_len;

		(*col)++;

		ctrl_buf_mb = control_mbrep(buf, ctrl_buf_mb,
			&ctrl_buf_mb_len);

		*col += mbwidth(ctrl_buf_mb);

		free(ctrl_buf_mb);
	    /* If we have a normal character, get its width in columns
	     * normally. */
	    } else
		*col += mbwidth(buf);
	}
    } else {
#endif
	/* Get the number of bytes in the byte character. */
	buf_mb_len = 1;

	/* Save the byte character in chr. */
	if (chr != NULL)
	    *chr = *buf;

	if (col != NULL) {
	    /* If we have a tab, get its width in columns using the
	     * current value of col. */
	    if (*buf == '\t')
		*col += tabsize - *col % tabsize;
	    /* If we have a control character, it's two columns wide:
	     * one column for the "^" that will be displayed in front of
	     * it, and one column for its visible equivalent as returned
David Lawrence Ramsey's avatar
David Lawrence Ramsey committed
389
	     * by control_mbrep(). */
390
391
392
393
394
395
396
397
398
399
400
401
	    else if (is_cntrl_char((unsigned char)*buf))
		*col += 2;
	    /* If we have a normal character, it's one column wide. */
	    else
		(*col)++;
	}
#ifdef NANO_WIDE
    }
#endif

    return buf_mb_len;
}
402
403
404
405
406
407
408

/* Return the index in buf of the beginning of the multibyte character
 * before the one at pos. */
size_t move_mbleft(const char *buf, size_t pos)
{
    size_t pos_prev = pos;

409
    assert(buf != NULL && pos <= strlen(buf));
410
411
412
413

    /* There is no library function to move backward one multibyte
     * character.  Here is the naive, O(pos) way to do it. */
    while (TRUE) {
414
415
	int buf_mb_len = parse_mbchar(buf + pos - pos_prev, NULL, NULL,
		NULL);
416

417
	if (pos_prev <= (size_t)buf_mb_len)
418
419
420
421
422
423
424
425
426
427
428
429
	    break;

	pos_prev -= buf_mb_len;
    }

    return pos - pos_prev;
}

/* Return the index in buf of the beginning of the multibyte character
 * after the one at pos. */
size_t move_mbright(const char *buf, size_t pos)
{
430
    return pos + parse_mbchar(buf + pos, NULL, NULL, NULL);
431
}
432
433
434
435
436

#ifndef HAVE_STRCASECMP
/* This function is equivalent to strcasecmp(). */
int nstrcasecmp(const char *s1, const char *s2)
{
437
    return strncasecmp(s1, s2, (size_t)-1);
438
439
440
441
442
443
}
#endif

/* This function is equivalent to strcasecmp() for multibyte strings. */
int mbstrcasecmp(const char *s1, const char *s2)
{
444
    return mbstrncasecmp(s1, s2, (size_t)-1);
445
446
447
448
449
450
451
452
453
454
455
456
457
458
}

#ifndef HAVE_STRNCASECMP
/* This function is equivalent to strncasecmp(). */
int nstrncasecmp(const char *s1, const char *s2, size_t n)
{
    assert(s1 != NULL && s2 != NULL);

    for (; n > 0 && *s1 != '\0' && *s2 != '\0'; n--, s1++, s2++) {
	if (tolower(*s1) != tolower(*s2))
	    break;
    }

    if (n > 0)
459
	return tolower(*s1) - tolower(*s2);
460
461
462
463
464
465
466
467
468
469
    else
	return 0;
}
#endif

/* This function is equivalent to strncasecmp() for multibyte
 * strings. */
int mbstrncasecmp(const char *s1, const char *s2, size_t n)
{
#ifdef NANO_WIDE
470
    if (ISSET(USE_UTF8)) {
471
472
	char *s1_mb = charalloc(MB_CUR_MAX);
	char *s2_mb = charalloc(MB_CUR_MAX);
473
474
	wchar_t ws1, ws2;

David Lawrence Ramsey's avatar
David Lawrence Ramsey committed
475
476
	assert(s1 != NULL && s2 != NULL);

477
	while (n > 0 && *s1 != '\0' && *s2 != '\0') {
478
	    bool bad_s1_mb = FALSE, bad_s2_mb = FALSE;
479
480
481
	    int s1_mb_len, s2_mb_len;

	    s1_mb_len = parse_mbchar(s1, s1_mb, NULL, NULL);
482
483
484
485

	    if (mbtowc(&ws1, s1_mb, s1_mb_len) <= 0) {
		mbtowc(NULL, NULL, 0);
		ws1 = (unsigned char)*s1_mb;
486
		bad_s1_mb = TRUE;
487
488
	    }

489
	    s2_mb_len = parse_mbchar(s2, s2_mb, NULL, NULL);
490
491
492
493

	    if (mbtowc(&ws2, s2_mb, s2_mb_len) <= 0) {
		mbtowc(NULL, NULL, 0);
		ws2 = (unsigned char)*s2_mb;
494
		bad_s2_mb = TRUE;
495
496
	    }

497
498
	    if (n == 0 || bad_s1_mb != bad_s2_mb ||
		towlower(ws1) != towlower(ws2))
499
500
501
502
		break;

	    s1 += s1_mb_len;
	    s2 += s2_mb_len;
503
	    n--;
504
505
506
507
508
	}

	free(s1_mb);
	free(s2_mb);

509
	return towlower(ws1) - towlower(ws2);
510
511
    } else
#endif
512
	return strncasecmp(s1, s2, n);
513
514
515
516
517
518
519
520
521
522
}

#ifndef HAVE_STRCASESTR
/* This function is equivalent to strcasestr().  It was adapted from
 * mutt's mutt_stristr() function. */
const char *nstrcasestr(const char *haystack, const char *needle)
{
    assert(haystack != NULL && needle != NULL);

    for (; *haystack != '\0'; haystack++) {
523
	const char *r = haystack, *q = needle;
524

525
	for (; tolower(*r) == tolower(*q) && *q != '\0'; r++, q++)
526
527
528
529
530
531
532
533
534
535
	    ;

	if (*q == '\0')
	    return haystack;
    }

    return NULL;
}
#endif

536
537
538
539
/* This function is equivalent to strcasestr() for multibyte strings. */
const char *mbstrcasestr(const char *haystack, const char *needle)
{
#ifdef NANO_WIDE
540
    if (ISSET(USE_UTF8)) {
541
	char *r_mb = charalloc(MB_CUR_MAX);
542
	char *q_mb = charalloc(MB_CUR_MAX);
543
	wchar_t wr, wq;
544
545
546
547
548
	bool found_needle = FALSE;

	assert(haystack != NULL && needle != NULL);

	while (*haystack != '\0') {
549
550
	    const char *r = haystack, *q = needle;
	    int r_mb_len, q_mb_len;
551
552

	    while (*q != '\0') {
553
554
		bool bad_r_mb = FALSE, bad_q_mb = FALSE;

555
		r_mb_len = parse_mbchar(r, r_mb, NULL, NULL);
556

557
		if (mbtowc(&wr, r_mb, r_mb_len) <= 0) {
558
		    mbtowc(NULL, NULL, 0);
559
		    wr = (unsigned char)*r;
560
		    bad_r_mb = TRUE;
561
562
563
564
565
566
567
		}

		q_mb_len = parse_mbchar(q, q_mb, NULL, NULL);

		if (mbtowc(&wq, q_mb, q_mb_len) <= 0) {
		    mbtowc(NULL, NULL, 0);
		    wq = (unsigned char)*q;
568
		    bad_q_mb = TRUE;
569
570
		}

571
572
		if (bad_r_mb != bad_q_mb ||
			towlower(wr) != towlower(wq))
573
574
		    break;

575
		r += r_mb_len;
576
577
578
579
580
581
582
583
		q += q_mb_len;
	    }

	    if (*q == '\0') {
		found_needle = TRUE;
		break;
	    }

584
	    haystack += move_mbright(haystack, 0);
585
586
	}

587
	free(r_mb);
588
589
	free(q_mb);

590
	return found_needle ? haystack : NULL;
591
592
    } else
#endif
593
	return strcasestr(haystack, needle);
594
595
}

596
#if !defined(NANO_SMALL) || !defined(DISABLE_TABCOMP)
597
/* This function is equivalent to strstr(), except in that it scans the
598
 * string in reverse, starting at rev_start. */
599
600
601
602
603
604
605
606
const char *revstrstr(const char *haystack, const char *needle, const
	char *rev_start)
{
    assert(haystack != NULL && needle != NULL && rev_start != NULL);

    for (; rev_start >= haystack; rev_start--) {
	const char *r, *q;

607
	for (r = rev_start, q = needle; *r == *q && *q != '\0'; r++, q++)
608
609
610
611
612
613
614
615
	    ;

	if (*q == '\0')
	    return rev_start;
    }

    return NULL;
}
616
#endif /* !NANO_SMALL || !DISABLE_TABCOMP */
617

618
#ifndef NANO_SMALL
619
/* This function is equivalent to strcasestr(), except in that it scans
620
 * the string in reverse, starting at rev_start. */
621
622
623
624
625
626
627
628
const char *revstrcasestr(const char *haystack, const char *needle,
	const char *rev_start)
{
    assert(haystack != NULL && needle != NULL && rev_start != NULL);

    for (; rev_start >= haystack; rev_start--) {
	const char *r = rev_start, *q = needle;

629
	for (; tolower(*r) == tolower(*q) && *q != '\0'; r++, q++)
630
631
632
633
634
635
636
637
	    ;

	if (*q == '\0')
	    return rev_start;
    }

    return NULL;
}
638
639
640
641
642
643
644
645

/* This function is equivalent to strcasestr() for multibyte strings,
 * except in that it scans the string in reverse, starting at
 * rev_start. */
const char *mbrevstrcasestr(const char *haystack, const char *needle,
	const char *rev_start)
{
#ifdef NANO_WIDE
646
    if (ISSET(USE_UTF8)) {
647
648
649
650
651
652
653
654
655
656
657
658
	char *r_mb = charalloc(MB_CUR_MAX);
	char *q_mb = charalloc(MB_CUR_MAX);
	wchar_t wr, wq;
	bool begin_line = FALSE, found_needle = FALSE;

	assert(haystack != NULL && needle != NULL && rev_start != NULL);

	while (!begin_line) {
	    const char *r = rev_start, *q = needle;
	    int r_mb_len, q_mb_len;

	    while (*q != '\0') {
659
660
		bool bad_r_mb = FALSE, bad_q_mb = FALSE;

661
662
663
664
665
		r_mb_len = parse_mbchar(r, r_mb, NULL, NULL);

		if (mbtowc(&wr, r_mb, r_mb_len) <= 0) {
		    mbtowc(NULL, NULL, 0);
		    wr = (unsigned char)*r;
666
		    bad_r_mb = TRUE;
667
668
669
670
671
672
673
		}

		q_mb_len = parse_mbchar(q, q_mb, NULL, NULL);

		if (mbtowc(&wq, q_mb, q_mb_len) <= 0) {
		    mbtowc(NULL, NULL, 0);
		    wq = (unsigned char)*q;
674
		    bad_q_mb = TRUE;
675
676
		}

677
678
		if (bad_r_mb != bad_q_mb ||
			towlower(wr) != towlower(wq))
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
		    break;

		r += r_mb_len;
		q += q_mb_len;
	    }

	    if (*q == '\0') {
		found_needle = TRUE;
		break;
	    }

	    if (rev_start == haystack)
		begin_line = TRUE;
	    else
		rev_start = haystack + move_mbleft(haystack, rev_start -
			haystack);
	}

	free(r_mb);
	free(q_mb);

700
	return found_needle ? rev_start : NULL;
701
702
703
704
    } else
#endif
	return revstrcasestr(haystack, needle, rev_start);
}
705
#endif /* !NANO_SMALL */
706

707
708
709
710
711
712
/* This function is equivalent to strlen() for multibyte strings. */
size_t mbstrlen(const char *s)
{
    return mbstrnlen(s, (size_t)-1);
}

713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
#ifndef HAVE_STRNLEN
/* This function is equivalent to strnlen(). */
size_t nstrnlen(const char *s, size_t maxlen)
{
    size_t n = 0;

    assert(s != NULL);

    for (; maxlen > 0 && *s != '\0'; maxlen--, n++, s++)
	;

    return n;
}
#endif

/* This function is equivalent to strnlen() for multibyte strings. */
size_t mbstrnlen(const char *s, size_t maxlen)
{
    assert(s != NULL);

#ifdef NANO_WIDE
734
    if (ISSET(USE_UTF8)) {
735
736
737
738
	size_t n = 0;
	int s_mb_len;

	while (*s != '\0') {
739
	    s_mb_len = parse_mbchar(s, NULL, NULL, NULL);
740

741
	    if (maxlen == 0)
742
743
		break;

744
	    maxlen--;
745
746
	    s += s_mb_len;
	    n++;
747
748
	}

749
	return n;
750
751
    } else
#endif
752
	return strnlen(s, maxlen);
753
}
754
755

#ifndef DISABLE_JUSTIFY
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
#ifdef ENABLE_NANORC
/* Return TRUE if the string s contains one or more blank characters,
 * and FALSE otherwise. */
bool has_blank_chars(const char *s)
{
    assert(s != NULL);

    for (; *s != '\0'; s++) {
	if (isblank(*s))
	    return TRUE;
    }

    return FALSE;
}

/* Return TRUE if the multibyte string s contains one or more blank
 * multibyte characters, and FALSE otherwise. */
bool has_blank_mbchars(const char *s)
{
775
    assert(s != NULL);
776
777

#ifdef NANO_WIDE
778
    if (ISSET(USE_UTF8)) {
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
	char *chr_mb = charalloc(MB_CUR_MAX);
	bool retval = FALSE;

	while (*s != '\0') {
	    int chr_mb_len;

	    chr_mb_len = parse_mbchar(s, chr_mb, NULL, NULL);

	    if (is_blank_mbchar(chr_mb)) {
		retval = TRUE;
		break;
	    }

	    s += chr_mb_len;
	}

	free(chr_mb);

	return retval;
    } else
#endif
	return has_blank_chars(s);
}
David Lawrence Ramsey's avatar
David Lawrence Ramsey committed
802
#endif /* ENABLE_NANORC */
803

804
805
806
807
808
809
/* This function is equivalent to strchr() for multibyte strings. */
char *mbstrchr(const char *s, char *c)
{
    assert(s != NULL && c != NULL);

#ifdef NANO_WIDE
810
    if (ISSET(USE_UTF8)) {
811
812
813
	char *s_mb = charalloc(MB_CUR_MAX);
	const char *q = s;
	wchar_t ws, wc;
David Lawrence Ramsey's avatar
David Lawrence Ramsey committed
814
	int c_mb_len = mbtowc(&wc, c, MB_CUR_MAX);
815
816
817
818
819
820
821

	if (c_mb_len <= 0) {
	    mbtowc(NULL, NULL, 0);
	    wc = (unsigned char)*c;
	}

	while (*s != '\0') {
David Lawrence Ramsey's avatar
David Lawrence Ramsey committed
822
	    int s_mb_len = parse_mbchar(s, s_mb, NULL, NULL);
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845

	    if (mbtowc(&ws, s_mb, s_mb_len) <= 0) {
		mbtowc(NULL, NULL, 0);
		ws = (unsigned char)*s;
	    }

	    if (ws == wc)
		break;

	    s += s_mb_len;
	    q += s_mb_len;
	}

	free(s_mb);

	if (ws != wc)
	    q = NULL;

	return (char *)q;
    } else
#endif
	return strchr(s, *c);
}
846
#endif /* !DISABLE_JUSTIFY */
847
848
849
850
851
852
853
854
855
856

#ifdef ENABLE_NANORC
/* Check if the string s is a valid multibyte string.  Return TRUE if it
 * is, and FALSE otherwise. */
bool is_valid_mbstring(const char *s)
{
    assert(s != NULL);

    return 
#ifdef NANO_WIDE
857
	ISSET(USE_UTF8) ?
858
	(mbstowcs(NULL, s, 0) != (size_t)-1) :
859
860
861
862
863
#endif

	TRUE;
}
#endif /* ENABLE_NANORC */