2005-01-12 03:25:57 +00:00
|
|
|
/**************************************************************************
|
|
|
|
* chars.c *
|
|
|
|
* *
|
2014-04-30 20:18:26 +00:00
|
|
|
* Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, *
|
|
|
|
* 2010, 2011, 2013, 2014 Free Software Foundation, Inc. *
|
2005-01-12 03:25:57 +00:00
|
|
|
* This program is free software; you can redistribute it and/or modify *
|
|
|
|
* it under the terms of the GNU General Public License as published by *
|
2007-08-11 05:17:36 +00:00
|
|
|
* the Free Software Foundation; either version 3, or (at your option) *
|
2005-01-12 03:25:57 +00:00
|
|
|
* any later version. *
|
|
|
|
* *
|
2005-05-15 19:57:17 +00:00
|
|
|
* This program is distributed in the hope that it will be useful, but *
|
|
|
|
* WITHOUT ANY WARRANTY; without even the implied warranty of *
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *
|
|
|
|
* General Public License for more details. *
|
2005-01-12 03:25:57 +00:00
|
|
|
* *
|
|
|
|
* You should have received a copy of the GNU General Public License *
|
|
|
|
* along with this program; if not, write to the Free Software *
|
2005-05-15 19:57:17 +00:00
|
|
|
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA *
|
|
|
|
* 02110-1301, USA. *
|
2005-01-12 03:25:57 +00:00
|
|
|
* *
|
|
|
|
**************************************************************************/
|
|
|
|
|
2005-12-08 02:47:10 +00:00
|
|
|
#include "proto.h"
|
2005-01-12 03:25:57 +00:00
|
|
|
|
2005-01-14 04:22:14 +00:00
|
|
|
#include <string.h>
|
2005-01-12 03:25:57 +00:00
|
|
|
#include <ctype.h>
|
|
|
|
|
2005-07-17 02:40:07 +00:00
|
|
|
#ifdef ENABLE_UTF8
|
2005-01-22 18:24:16 +00:00
|
|
|
#ifdef HAVE_WCHAR_H
|
2005-01-12 03:25:57 +00:00
|
|
|
#include <wchar.h>
|
|
|
|
#endif
|
2005-01-22 18:24:16 +00:00
|
|
|
#ifdef HAVE_WCTYPE_H
|
2005-01-12 03:25:57 +00:00
|
|
|
#include <wctype.h>
|
|
|
|
#endif
|
2005-07-21 22:12:03 +00:00
|
|
|
|
2006-04-12 15:27:40 +00:00
|
|
|
static bool use_utf8 = FALSE;
|
|
|
|
/* Whether we've enabled UTF-8 support. */
|
|
|
|
|
|
|
|
/* Enable UTF-8 support. */
|
|
|
|
void utf8_init(void)
|
|
|
|
{
|
|
|
|
use_utf8 = TRUE;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Is UTF-8 support enabled? */
|
|
|
|
bool using_utf8(void)
|
|
|
|
{
|
|
|
|
return use_utf8;
|
|
|
|
}
|
2014-04-05 20:28:29 +00:00
|
|
|
#endif /* ENABLE_UTF8 */
|
2005-01-12 03:25:57 +00:00
|
|
|
|
2016-05-24 08:34:40 +00:00
|
|
|
/* Concatenate two allocated strings, and free the second. */
|
2016-02-16 10:09:26 +00:00
|
|
|
char *addstrings(char* str1, size_t len1, char* str2, size_t len2)
|
2014-05-29 18:30:23 +00:00
|
|
|
{
|
|
|
|
str1 = charealloc(str1, len1 + len2 + 1);
|
|
|
|
str1[len1] = '\0';
|
2016-05-24 08:34:40 +00:00
|
|
|
|
2014-05-29 18:30:23 +00:00
|
|
|
strncat(&str1[len1], str2, len2);
|
|
|
|
free(str2);
|
|
|
|
|
|
|
|
return str1;
|
|
|
|
}
|
|
|
|
|
2005-06-12 17:48:46 +00:00
|
|
|
#ifndef HAVE_ISBLANK
|
|
|
|
/* This function is equivalent to isblank(). */
|
2005-06-29 18:17:54 +00:00
|
|
|
bool nisblank(int c)
|
2005-01-14 21:33:47 +00:00
|
|
|
{
|
2005-06-12 17:48:46 +00:00
|
|
|
return isspace(c) && (c == '\t' || !is_cntrl_char(c));
|
2005-01-14 21:33:47 +00:00
|
|
|
}
|
2005-03-16 16:32:33 +00:00
|
|
|
#endif
|
2005-03-16 15:34:22 +00:00
|
|
|
|
2005-07-17 02:40:07 +00:00
|
|
|
#if !defined(HAVE_ISWBLANK) && defined(ENABLE_UTF8)
|
2005-06-12 17:48:46 +00:00
|
|
|
/* This function is equivalent to iswblank(). */
|
2005-06-29 18:17:54 +00:00
|
|
|
bool niswblank(wchar_t wc)
|
2005-01-12 03:25:57 +00:00
|
|
|
{
|
2005-06-12 17:48:46 +00:00
|
|
|
return iswspace(wc) && (wc == '\t' || !is_cntrl_wchar(wc));
|
2005-01-12 03:25:57 +00:00
|
|
|
}
|
2005-06-12 17:48:46 +00:00
|
|
|
#endif
|
2005-01-12 03:25:57 +00:00
|
|
|
|
2016-05-24 08:34:40 +00:00
|
|
|
/* Return TRUE if the value of c is in byte range, and FALSE otherwise. */
|
2005-06-13 19:51:56 +00:00
|
|
|
bool is_byte(int c)
|
|
|
|
{
|
|
|
|
return ((unsigned int)c == (unsigned char)c);
|
|
|
|
}
|
|
|
|
|
2015-09-04 19:34:55 +00:00
|
|
|
void mbtowc_reset(void)
|
|
|
|
{
|
|
|
|
IGNORE_CALL_RESULT(mbtowc(NULL, NULL, 0));
|
|
|
|
}
|
|
|
|
|
|
|
|
void wctomb_reset(void)
|
|
|
|
{
|
|
|
|
IGNORE_CALL_RESULT(wctomb(NULL, 0));
|
|
|
|
}
|
|
|
|
|
2016-08-02 20:09:22 +00:00
|
|
|
/* This function is equivalent to isalpha() for multibyte characters. */
|
|
|
|
bool is_alpha_mbchar(const char *c)
|
|
|
|
{
|
|
|
|
assert(c != NULL);
|
|
|
|
|
|
|
|
#ifdef ENABLE_UTF8
|
|
|
|
if (use_utf8) {
|
|
|
|
wchar_t wc;
|
|
|
|
|
|
|
|
if (mbtowc(&wc, c, MB_CUR_MAX) < 0) {
|
|
|
|
mbtowc_reset();
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
return iswalpha(wc);
|
|
|
|
} else
|
|
|
|
#endif
|
|
|
|
return isalpha((unsigned char)*c);
|
|
|
|
}
|
|
|
|
|
2005-06-15 06:04:08 +00:00
|
|
|
/* This function is equivalent to isalnum() for multibyte characters. */
|
|
|
|
bool is_alnum_mbchar(const char *c)
|
|
|
|
{
|
|
|
|
assert(c != NULL);
|
|
|
|
|
2005-07-17 02:40:07 +00:00
|
|
|
#ifdef ENABLE_UTF8
|
2006-04-12 15:27:40 +00:00
|
|
|
if (use_utf8) {
|
2005-06-15 06:04:08 +00:00
|
|
|
wchar_t wc;
|
|
|
|
|
2015-09-04 19:34:55 +00:00
|
|
|
if (mbtowc(&wc, c, MB_CUR_MAX) < 0) {
|
|
|
|
mbtowc_reset();
|
2016-06-05 19:49:29 +00:00
|
|
|
return 0;
|
2015-09-04 19:34:55 +00:00
|
|
|
}
|
2005-06-15 06:04:08 +00:00
|
|
|
|
|
|
|
return iswalnum(wc);
|
|
|
|
} else
|
|
|
|
#endif
|
|
|
|
return isalnum((unsigned char)*c);
|
|
|
|
}
|
|
|
|
|
2005-01-12 03:25:57 +00:00
|
|
|
/* This function is equivalent to isblank() for multibyte characters. */
|
|
|
|
bool is_blank_mbchar(const char *c)
|
|
|
|
{
|
|
|
|
assert(c != NULL);
|
|
|
|
|
2005-07-17 02:40:07 +00:00
|
|
|
#ifdef ENABLE_UTF8
|
2006-04-12 15:27:40 +00:00
|
|
|
if (use_utf8) {
|
2005-01-12 03:25:57 +00:00
|
|
|
wchar_t wc;
|
|
|
|
|
2015-09-04 19:34:55 +00:00
|
|
|
if (mbtowc(&wc, c, MB_CUR_MAX) < 0) {
|
|
|
|
mbtowc_reset();
|
2016-06-05 19:49:29 +00:00
|
|
|
return 0;
|
2015-09-04 19:34:55 +00:00
|
|
|
}
|
2005-01-12 03:25:57 +00:00
|
|
|
|
2005-06-12 17:48:46 +00:00
|
|
|
return iswblank(wc);
|
2005-01-12 03:25:57 +00:00
|
|
|
} else
|
|
|
|
#endif
|
2005-06-12 17:48:46 +00:00
|
|
|
return isblank((unsigned char)*c);
|
2005-01-12 03:25:57 +00:00
|
|
|
}
|
|
|
|
|
2006-05-24 17:36:00 +00:00
|
|
|
/* This function is equivalent to iscntrl(), except in that it only
|
|
|
|
* handles non-high-bit control characters. */
|
|
|
|
bool is_ascii_cntrl_char(int c)
|
|
|
|
{
|
|
|
|
return (0 <= c && c < 32);
|
|
|
|
}
|
|
|
|
|
2005-01-12 03:25:57 +00:00
|
|
|
/* This function is equivalent to iscntrl(), except in that it also
|
2005-08-13 20:05:06 +00:00
|
|
|
* handles high-bit control characters. */
|
2005-02-08 20:37:53 +00:00
|
|
|
bool is_cntrl_char(int c)
|
2005-01-12 03:25:57 +00:00
|
|
|
{
|
2016-06-29 18:48:04 +00:00
|
|
|
return ((c & 0x60) == 0 || c == 127);
|
2005-01-12 03:25:57 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* This function is equivalent to iscntrl() for multibyte characters,
|
|
|
|
* except in that it also handles multibyte control characters with
|
|
|
|
* their high bits set. */
|
|
|
|
bool is_cntrl_mbchar(const char *c)
|
|
|
|
{
|
|
|
|
assert(c != NULL);
|
|
|
|
|
2005-07-17 02:40:07 +00:00
|
|
|
#ifdef ENABLE_UTF8
|
2006-04-12 15:27:40 +00:00
|
|
|
if (use_utf8) {
|
2016-06-29 18:48:04 +00:00
|
|
|
return ((c[0] & 0xE0) == 0 || c[0] == 127 ||
|
|
|
|
((signed char)c[0] == -62 && (signed char)c[1] < -96));
|
2005-01-12 03:25:57 +00:00
|
|
|
} else
|
|
|
|
#endif
|
|
|
|
return is_cntrl_char((unsigned char)*c);
|
|
|
|
}
|
|
|
|
|
2005-06-15 06:04:08 +00:00
|
|
|
/* This function is equivalent to ispunct() for multibyte characters. */
|
|
|
|
bool is_punct_mbchar(const char *c)
|
2005-06-13 02:40:04 +00:00
|
|
|
{
|
|
|
|
assert(c != NULL);
|
|
|
|
|
2005-07-17 02:40:07 +00:00
|
|
|
#ifdef ENABLE_UTF8
|
2006-04-12 15:27:40 +00:00
|
|
|
if (use_utf8) {
|
2005-06-13 02:40:04 +00:00
|
|
|
wchar_t wc;
|
|
|
|
|
2015-09-04 19:34:55 +00:00
|
|
|
if (mbtowc(&wc, c, MB_CUR_MAX) < 0) {
|
|
|
|
mbtowc_reset();
|
2016-06-05 19:49:29 +00:00
|
|
|
return 0;
|
2015-09-04 19:34:55 +00:00
|
|
|
}
|
2005-06-13 02:40:04 +00:00
|
|
|
|
2005-06-15 06:04:08 +00:00
|
|
|
return iswpunct(wc);
|
2005-06-13 02:40:04 +00:00
|
|
|
} else
|
|
|
|
#endif
|
2005-06-15 06:04:08 +00:00
|
|
|
return ispunct((unsigned char)*c);
|
|
|
|
}
|
|
|
|
|
2016-06-30 16:02:45 +00:00
|
|
|
/* Return TRUE when the given multibyte character c is a word-forming
|
|
|
|
* character (that is: alphanumeric, or specified in wordchars, or
|
|
|
|
* punctuation when allow_punct is TRUE), and FALSE otherwise. */
|
2005-06-15 06:04:08 +00:00
|
|
|
bool is_word_mbchar(const char *c, bool allow_punct)
|
|
|
|
{
|
|
|
|
assert(c != NULL);
|
|
|
|
|
2016-07-21 07:46:47 +00:00
|
|
|
if (*c == '\0')
|
|
|
|
return FALSE;
|
|
|
|
|
2016-06-30 16:02:45 +00:00
|
|
|
if (is_alnum_mbchar(c))
|
|
|
|
return TRUE;
|
|
|
|
|
|
|
|
if (word_chars != NULL && *word_chars != '\0') {
|
2016-07-22 13:30:09 +00:00
|
|
|
bool wordforming;
|
2016-06-30 16:02:45 +00:00
|
|
|
char *symbol = charalloc(MB_CUR_MAX + 1);
|
|
|
|
int symlen = parse_mbchar(c, symbol, NULL);
|
|
|
|
|
|
|
|
symbol[symlen] = '\0';
|
2016-07-22 13:30:09 +00:00
|
|
|
wordforming = (strstr(word_chars, symbol) != NULL);
|
|
|
|
free(symbol);
|
2016-06-30 16:02:45 +00:00
|
|
|
|
2016-07-22 13:30:09 +00:00
|
|
|
return wordforming;
|
2016-06-30 16:02:45 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return (allow_punct && is_punct_mbchar(c));
|
2005-06-13 02:40:04 +00:00
|
|
|
}
|
|
|
|
|
2016-06-29 18:40:22 +00:00
|
|
|
/* Return the visible representation of control character c. */
|
2016-05-30 09:28:16 +00:00
|
|
|
char control_rep(const signed char c)
|
2005-01-12 03:25:57 +00:00
|
|
|
{
|
2005-08-04 20:24:26 +00:00
|
|
|
assert(is_cntrl_char(c));
|
|
|
|
|
2016-06-29 18:40:22 +00:00
|
|
|
/* An embedded newline is an encoded null. */
|
2005-01-12 03:25:57 +00:00
|
|
|
if (c == '\n')
|
|
|
|
return '@';
|
2016-08-01 10:56:05 +00:00
|
|
|
else if (c == DEL_CODE)
|
2005-01-12 03:25:57 +00:00
|
|
|
return '?';
|
2016-05-30 09:28:16 +00:00
|
|
|
else if (c == -97)
|
|
|
|
return '=';
|
|
|
|
else if (c < 0)
|
|
|
|
return c + 224;
|
2005-01-12 03:25:57 +00:00
|
|
|
else
|
|
|
|
return c + 64;
|
|
|
|
}
|
|
|
|
|
2016-06-29 18:37:28 +00:00
|
|
|
/* Return the visible representation of multibyte control character c. */
|
|
|
|
char control_mbrep(const char *c)
|
2005-01-12 03:25:57 +00:00
|
|
|
{
|
2016-06-29 18:37:28 +00:00
|
|
|
assert(c != NULL);
|
2005-01-12 03:25:57 +00:00
|
|
|
|
2005-07-17 02:40:07 +00:00
|
|
|
#ifdef ENABLE_UTF8
|
2006-04-12 15:27:40 +00:00
|
|
|
if (use_utf8) {
|
2016-05-30 09:28:16 +00:00
|
|
|
if (0 <= c[0] && c[0] <= 127)
|
2016-06-29 18:37:28 +00:00
|
|
|
return control_rep(c[0]);
|
2016-05-30 09:28:16 +00:00
|
|
|
else
|
2016-06-29 18:37:28 +00:00
|
|
|
return control_rep(c[1]);
|
2016-05-24 08:34:40 +00:00
|
|
|
} else
|
2005-01-12 03:25:57 +00:00
|
|
|
#endif
|
2016-06-29 18:37:28 +00:00
|
|
|
return control_rep(*c);
|
2005-01-12 03:25:57 +00:00
|
|
|
}
|
|
|
|
|
2016-06-06 10:48:26 +00:00
|
|
|
/* Assess how many bytes the given (multibyte) character occupies. Return -1
|
|
|
|
* if the byte sequence is invalid, and return the number of bytes minus 8
|
2016-06-06 11:20:04 +00:00
|
|
|
* when it encodes an invalid codepoint. Also, in the second parameter,
|
|
|
|
* return the number of columns that the character occupies. */
|
|
|
|
int length_of_char(const char *c, int *width)
|
2005-07-26 06:13:45 +00:00
|
|
|
{
|
2016-06-06 10:48:26 +00:00
|
|
|
assert(c != NULL);
|
2005-07-26 06:13:45 +00:00
|
|
|
|
|
|
|
#ifdef ENABLE_UTF8
|
2006-04-12 15:27:40 +00:00
|
|
|
if (use_utf8) {
|
2005-07-26 06:13:45 +00:00
|
|
|
wchar_t wc;
|
2016-06-06 10:48:26 +00:00
|
|
|
int charlen = mbtowc(&wc, c, MB_CUR_MAX);
|
2005-07-26 06:13:45 +00:00
|
|
|
|
2016-06-06 10:48:26 +00:00
|
|
|
/* If the sequence is invalid... */
|
|
|
|
if (charlen < 0) {
|
2015-09-04 19:34:55 +00:00
|
|
|
mbtowc_reset();
|
2016-06-06 10:48:26 +00:00
|
|
|
return -1;
|
2005-07-26 06:13:45 +00:00
|
|
|
}
|
2016-06-06 10:48:26 +00:00
|
|
|
|
|
|
|
/* If the codepoint is invalid... */
|
|
|
|
if (!is_valid_unicode(wc))
|
|
|
|
return charlen - 8;
|
2016-06-06 11:20:04 +00:00
|
|
|
else {
|
|
|
|
*width = wcwidth(wc);
|
|
|
|
/* If the codepoint is unassigned, assume a width of one. */
|
|
|
|
if (*width < 0)
|
|
|
|
*width = 1;
|
2016-06-06 10:48:26 +00:00
|
|
|
return charlen;
|
2016-06-06 11:20:04 +00:00
|
|
|
}
|
2016-05-24 08:34:40 +00:00
|
|
|
} else
|
2005-07-26 06:13:45 +00:00
|
|
|
#endif
|
2016-06-06 10:48:26 +00:00
|
|
|
return 1;
|
2005-07-26 06:13:45 +00:00
|
|
|
}
|
|
|
|
|
2005-01-12 03:25:57 +00:00
|
|
|
/* This function is equivalent to wcwidth() for multibyte characters. */
|
|
|
|
int mbwidth(const char *c)
|
|
|
|
{
|
|
|
|
assert(c != NULL);
|
|
|
|
|
2005-07-17 02:40:07 +00:00
|
|
|
#ifdef ENABLE_UTF8
|
2006-04-12 15:27:40 +00:00
|
|
|
if (use_utf8) {
|
2005-01-12 03:25:57 +00:00
|
|
|
wchar_t wc;
|
2005-07-21 22:12:03 +00:00
|
|
|
int width;
|
2005-01-12 03:25:57 +00:00
|
|
|
|
2015-09-04 19:34:55 +00:00
|
|
|
if (mbtowc(&wc, c, MB_CUR_MAX) < 0) {
|
|
|
|
mbtowc_reset();
|
2016-06-05 19:42:27 +00:00
|
|
|
return 1;
|
2015-09-04 19:34:55 +00:00
|
|
|
}
|
2005-01-12 03:25:57 +00:00
|
|
|
|
|
|
|
width = wcwidth(wc);
|
2005-06-14 02:08:25 +00:00
|
|
|
|
2016-06-05 19:42:27 +00:00
|
|
|
if (width == -1)
|
|
|
|
return 1;
|
2005-01-12 03:25:57 +00:00
|
|
|
|
|
|
|
return width;
|
|
|
|
} else
|
|
|
|
#endif
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Return the maximum width in bytes of a multibyte character. */
|
|
|
|
int mb_cur_max(void)
|
|
|
|
{
|
2005-01-22 18:24:16 +00:00
|
|
|
return
|
2005-07-17 02:40:07 +00:00
|
|
|
#ifdef ENABLE_UTF8
|
2006-04-12 15:27:40 +00:00
|
|
|
use_utf8 ? MB_CUR_MAX :
|
2005-01-12 03:25:57 +00:00
|
|
|
#endif
|
2005-01-22 18:24:16 +00:00
|
|
|
1;
|
2005-01-12 03:25:57 +00:00
|
|
|
}
|
|
|
|
|
2005-08-08 23:03:25 +00:00
|
|
|
/* Convert the Unicode value in chr to a multibyte character with the
|
|
|
|
* same wide character value as chr, if possible. If the conversion
|
2005-06-14 23:36:13 +00:00
|
|
|
* succeeds, return the (dynamically allocated) multibyte character and
|
|
|
|
* its length. Otherwise, return an undefined (dynamically allocated)
|
|
|
|
* multibyte character and a length of zero. */
|
2005-08-08 23:03:25 +00:00
|
|
|
char *make_mbchar(long chr, int *chr_mb_len)
|
2005-01-12 03:25:57 +00:00
|
|
|
{
|
2005-03-14 18:47:21 +00:00
|
|
|
char *chr_mb;
|
|
|
|
|
2005-03-26 06:54:36 +00:00
|
|
|
assert(chr_mb_len != NULL);
|
2005-03-15 06:34:09 +00:00
|
|
|
|
2005-07-17 02:40:07 +00:00
|
|
|
#ifdef ENABLE_UTF8
|
2006-04-12 15:27:40 +00:00
|
|
|
if (use_utf8) {
|
2005-03-14 18:47:21 +00:00
|
|
|
chr_mb = charalloc(MB_CUR_MAX);
|
2005-08-05 03:14:29 +00:00
|
|
|
*chr_mb_len = wctomb(chr_mb, (wchar_t)chr);
|
2005-01-12 03:25:57 +00:00
|
|
|
|
2005-08-05 03:14:29 +00:00
|
|
|
/* Reject invalid Unicode characters. */
|
2015-09-04 19:34:55 +00:00
|
|
|
if (*chr_mb_len < 0 || !is_valid_unicode((wchar_t)chr)) {
|
|
|
|
wctomb_reset();
|
2005-01-12 04:32:43 +00:00
|
|
|
*chr_mb_len = 0;
|
2015-09-04 19:34:55 +00:00
|
|
|
}
|
2016-05-24 08:34:40 +00:00
|
|
|
} else
|
2005-01-12 03:25:57 +00:00
|
|
|
#endif
|
2016-05-24 08:34:40 +00:00
|
|
|
{
|
2005-01-12 03:25:57 +00:00
|
|
|
*chr_mb_len = 1;
|
2005-06-13 14:00:22 +00:00
|
|
|
chr_mb = mallocstrncpy(NULL, (char *)&chr, 1);
|
2005-01-12 03:25:57 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return chr_mb;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Parse a multibyte character from buf. Return the number of bytes
|
|
|
|
* used. If chr isn't NULL, store the multibyte character in it. If
|
2005-07-26 06:13:45 +00:00
|
|
|
* col isn't NULL, store the new display width in it. If *buf is '\t',
|
|
|
|
* we expect col to have the current display width. */
|
|
|
|
int parse_mbchar(const char *buf, char *chr, size_t *col)
|
2005-01-12 03:25:57 +00:00
|
|
|
{
|
|
|
|
int buf_mb_len;
|
|
|
|
|
|
|
|
assert(buf != NULL);
|
|
|
|
|
2005-07-17 02:40:07 +00:00
|
|
|
#ifdef ENABLE_UTF8
|
2006-04-12 15:27:40 +00:00
|
|
|
if (use_utf8) {
|
2005-01-12 03:25:57 +00:00
|
|
|
/* Get the number of bytes in the multibyte character. */
|
|
|
|
buf_mb_len = mblen(buf, MB_CUR_MAX);
|
|
|
|
|
2016-05-29 19:45:52 +00:00
|
|
|
/* When the multibyte sequence is invalid, only take the first byte. */
|
2005-07-21 22:12:03 +00:00
|
|
|
if (buf_mb_len < 0) {
|
2009-12-02 03:24:18 +00:00
|
|
|
IGNORE_CALL_RESULT(mblen(NULL, 0));
|
2005-02-11 20:09:11 +00:00
|
|
|
buf_mb_len = 1;
|
2005-07-21 22:12:03 +00:00
|
|
|
} else if (buf_mb_len == 0)
|
|
|
|
buf_mb_len++;
|
2005-01-12 03:25:57 +00:00
|
|
|
|
2016-05-29 19:45:52 +00:00
|
|
|
/* When requested, store the multibyte character in chr. */
|
2005-01-12 03:25:57 +00:00
|
|
|
if (chr != NULL) {
|
|
|
|
int i;
|
2005-03-11 04:03:32 +00:00
|
|
|
|
2005-01-12 03:25:57 +00:00
|
|
|
for (i = 0; i < buf_mb_len; i++)
|
|
|
|
chr[i] = buf[i];
|
|
|
|
}
|
|
|
|
|
2016-05-29 19:45:52 +00:00
|
|
|
/* When requested, store the width of the wide character in col. */
|
2005-01-12 03:25:57 +00:00
|
|
|
if (col != NULL) {
|
|
|
|
/* If we have a tab, get its width in columns using the
|
|
|
|
* current value of col. */
|
|
|
|
if (*buf == '\t')
|
|
|
|
*col += tabsize - *col % tabsize;
|
2016-05-28 13:56:16 +00:00
|
|
|
/* If we have a control character, it's two columns wide: one
|
|
|
|
* column for the "^", and one for the visible character. */
|
2005-01-12 03:25:57 +00:00
|
|
|
else if (is_cntrl_mbchar(buf)) {
|
2016-05-28 13:56:16 +00:00
|
|
|
*col += 2;
|
2005-01-12 03:25:57 +00:00
|
|
|
/* If we have a normal character, get its width in columns
|
|
|
|
* normally. */
|
|
|
|
} else
|
|
|
|
*col += mbwidth(buf);
|
|
|
|
}
|
2016-05-24 08:34:40 +00:00
|
|
|
} else
|
2005-01-12 03:25:57 +00:00
|
|
|
#endif
|
2016-05-24 08:34:40 +00:00
|
|
|
{
|
2016-05-29 19:45:52 +00:00
|
|
|
/* A byte character is one byte long. */
|
2005-01-12 03:25:57 +00:00
|
|
|
buf_mb_len = 1;
|
|
|
|
|
2016-05-29 19:45:52 +00:00
|
|
|
/* When requested, store the byte character in chr. */
|
2005-01-12 03:25:57 +00:00
|
|
|
if (chr != NULL)
|
|
|
|
*chr = *buf;
|
|
|
|
|
2016-05-29 19:45:52 +00:00
|
|
|
/* When requested, store the width of the wide character in col. */
|
2005-01-12 03:25:57 +00:00
|
|
|
if (col != NULL) {
|
|
|
|
/* If we have a tab, get its width in columns using the
|
|
|
|
* current value of col. */
|
|
|
|
if (*buf == '\t')
|
|
|
|
*col += tabsize - *col % tabsize;
|
2016-05-28 13:40:39 +00:00
|
|
|
/* If we have a control character, it's two columns wide: one
|
|
|
|
* column for the "^", and one for the visible character. */
|
2005-01-12 03:25:57 +00:00
|
|
|
else if (is_cntrl_char((unsigned char)*buf))
|
|
|
|
*col += 2;
|
|
|
|
/* If we have a normal character, it's one column wide. */
|
|
|
|
else
|
|
|
|
(*col)++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return buf_mb_len;
|
|
|
|
}
|
2005-01-14 21:50:32 +00:00
|
|
|
|
|
|
|
/* Return the index in buf of the beginning of the multibyte character
|
|
|
|
* before the one at pos. */
|
|
|
|
size_t move_mbleft(const char *buf, size_t pos)
|
|
|
|
{
|
2015-03-22 11:20:02 +00:00
|
|
|
size_t before, char_len = 0;
|
2005-01-14 21:50:32 +00:00
|
|
|
|
2005-03-23 05:56:11 +00:00
|
|
|
assert(buf != NULL && pos <= strlen(buf));
|
2005-01-14 21:50:32 +00:00
|
|
|
|
|
|
|
/* There is no library function to move backward one multibyte
|
2015-03-22 11:20:02 +00:00
|
|
|
* character. So we just start groping for one at the farthest
|
|
|
|
* possible point. */
|
|
|
|
if (mb_cur_max() > pos)
|
|
|
|
before = 0;
|
|
|
|
else
|
|
|
|
before = pos - mb_cur_max();
|
|
|
|
|
2014-04-14 20:42:10 +00:00
|
|
|
while (before < pos) {
|
|
|
|
char_len = parse_mbchar(buf + before, NULL, NULL);
|
|
|
|
before += char_len;
|
2005-01-14 21:50:32 +00:00
|
|
|
}
|
|
|
|
|
2014-04-14 20:42:10 +00:00
|
|
|
return before - char_len;
|
2005-01-14 21:50:32 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Return the index in buf of the beginning of the multibyte character
|
|
|
|
* after the one at pos. */
|
|
|
|
size_t move_mbright(const char *buf, size_t pos)
|
|
|
|
{
|
2005-07-26 06:13:45 +00:00
|
|
|
return pos + parse_mbchar(buf + pos, NULL, NULL);
|
2005-01-14 21:50:32 +00:00
|
|
|
}
|
2005-01-16 18:49:19 +00:00
|
|
|
|
|
|
|
#ifndef HAVE_STRCASECMP
|
|
|
|
/* This function is equivalent to strcasecmp(). */
|
|
|
|
int nstrcasecmp(const char *s1, const char *s2)
|
|
|
|
{
|
2016-06-01 19:56:38 +00:00
|
|
|
return strncasecmp(s1, s2, HIGHEST_POSITIVE);
|
2005-01-16 18:49:19 +00:00
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/* This function is equivalent to strcasecmp() for multibyte strings. */
|
|
|
|
int mbstrcasecmp(const char *s1, const char *s2)
|
|
|
|
{
|
2016-06-01 19:56:38 +00:00
|
|
|
return mbstrncasecmp(s1, s2, HIGHEST_POSITIVE);
|
2005-01-16 18:49:19 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
#ifndef HAVE_STRNCASECMP
|
|
|
|
/* This function is equivalent to strncasecmp(). */
|
|
|
|
int nstrncasecmp(const char *s1, const char *s2, size_t n)
|
|
|
|
{
|
2007-07-01 21:46:00 +00:00
|
|
|
if (s1 == s2)
|
|
|
|
return 0;
|
|
|
|
|
2005-01-16 18:49:19 +00:00
|
|
|
assert(s1 != NULL && s2 != NULL);
|
|
|
|
|
2007-07-01 21:17:05 +00:00
|
|
|
for (; *s1 != '\0' && *s2 != '\0' && n > 0; s1++, s2++, n--) {
|
2005-01-16 18:49:19 +00:00
|
|
|
if (tolower(*s1) != tolower(*s2))
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2006-10-08 15:21:23 +00:00
|
|
|
return (n > 0) ? tolower(*s1) - tolower(*s2) : 0;
|
2005-01-16 18:49:19 +00:00
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2016-05-24 08:34:40 +00:00
|
|
|
/* This function is equivalent to strncasecmp() for multibyte strings. */
|
2005-01-16 18:49:19 +00:00
|
|
|
int mbstrncasecmp(const char *s1, const char *s2, size_t n)
|
|
|
|
{
|
2005-07-17 02:40:07 +00:00
|
|
|
#ifdef ENABLE_UTF8
|
2006-04-12 15:27:40 +00:00
|
|
|
if (use_utf8) {
|
2016-05-24 14:49:00 +00:00
|
|
|
wchar_t wc1, wc2;
|
2005-01-16 18:49:19 +00:00
|
|
|
|
2005-01-16 18:58:03 +00:00
|
|
|
assert(s1 != NULL && s2 != NULL);
|
|
|
|
|
2016-07-27 20:03:48 +00:00
|
|
|
while (*s1 != '\0' && *s2 != '\0' && n > 0) {
|
2016-05-24 15:19:22 +00:00
|
|
|
bool bad1 = FALSE, bad2 = FALSE;
|
2016-07-27 20:15:34 +00:00
|
|
|
int difference;
|
2005-01-22 18:24:16 +00:00
|
|
|
|
2016-05-24 19:45:22 +00:00
|
|
|
if (mbtowc(&wc1, s1, MB_CUR_MAX) < 0) {
|
2015-09-04 19:34:55 +00:00
|
|
|
mbtowc_reset();
|
2016-05-24 15:19:22 +00:00
|
|
|
bad1 = TRUE;
|
2005-01-16 18:49:19 +00:00
|
|
|
}
|
|
|
|
|
2016-05-24 19:45:22 +00:00
|
|
|
if (mbtowc(&wc2, s2, MB_CUR_MAX) < 0) {
|
2015-09-04 19:34:55 +00:00
|
|
|
mbtowc_reset();
|
2016-05-24 15:19:22 +00:00
|
|
|
bad2 = TRUE;
|
2005-01-16 18:49:19 +00:00
|
|
|
}
|
|
|
|
|
2016-07-27 20:15:34 +00:00
|
|
|
if (bad1 || bad2) {
|
|
|
|
if (*s1 != *s2)
|
|
|
|
return (unsigned char)*s1 - (unsigned char)*s2;
|
|
|
|
|
2016-08-06 08:34:38 +00:00
|
|
|
if (bad1 != bad2)
|
|
|
|
return (bad1 ? 1 : -1);
|
|
|
|
|
2016-07-27 20:15:34 +00:00
|
|
|
s1++; s2++; n--;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
difference = towlower(wc1) - towlower(wc2);
|
|
|
|
|
|
|
|
if (difference != 0)
|
|
|
|
return difference;
|
2016-07-27 20:03:48 +00:00
|
|
|
|
|
|
|
s1 += move_mbright(s1, 0);
|
|
|
|
s2 += move_mbright(s2, 0);
|
|
|
|
n--;
|
2005-01-16 18:49:19 +00:00
|
|
|
}
|
|
|
|
|
2016-07-27 20:15:34 +00:00
|
|
|
return (n > 0) ? ((unsigned char)*s1 - (unsigned char)*s2) : 0;
|
2005-01-16 18:49:19 +00:00
|
|
|
} else
|
|
|
|
#endif
|
2005-03-20 07:24:49 +00:00
|
|
|
return strncasecmp(s1, s2, n);
|
2005-01-16 18:49:19 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
#ifndef HAVE_STRCASESTR
|
fix copyright years on source files; all functions in browser.c were
originally added in 2001; the oldest function in color.c is
do_colorinit() (now color_init()), which was originally added in 2001;
the oldest function in chars.c is revstrstr(), which was originally
added in 2001; the oldest function in help.c is do_help(), which was
originally added in 2000; the oldest function in prompt.c is statusq()
(now do_prompt()), which was originally added before 0.6.6, which was
apparently in 1999; all functions in rcfile.c were originally added in
2001; one of the oldest functions in search.c is do_search(), which was
originally added in 0.2.7, which was apparently in 1999; and one of the
oldest functions in text.c is do_wrap(), which was originally added in
0.3.1, which was apparently in 1999; also, for functions originally
adapted from other sources, add notices from the original files, as we
do with the tab completion functions adapted from busybox
git-svn-id: svn://svn.savannah.gnu.org/nano/trunk/nano@3172 35c25a1d-7b9e-4130-9fde-d3aeb78583b8
2005-11-14 22:20:35 +00:00
|
|
|
/* This function is equivalent to strcasestr(). */
|
2007-07-06 13:44:13 +00:00
|
|
|
char *nstrcasestr(const char *haystack, const char *needle)
|
2005-01-16 18:49:19 +00:00
|
|
|
{
|
2007-07-10 22:54:58 +00:00
|
|
|
size_t haystack_len, needle_len;
|
|
|
|
|
2005-01-16 18:49:19 +00:00
|
|
|
assert(haystack != NULL && needle != NULL);
|
|
|
|
|
2007-07-09 22:57:07 +00:00
|
|
|
if (*needle == '\0')
|
2007-07-06 13:44:13 +00:00
|
|
|
return (char *)haystack;
|
2007-07-02 15:45:13 +00:00
|
|
|
|
2007-07-10 22:54:58 +00:00
|
|
|
haystack_len = strlen(haystack);
|
|
|
|
needle_len = strlen(needle);
|
2005-01-16 18:49:19 +00:00
|
|
|
|
2007-07-10 22:54:58 +00:00
|
|
|
for (; *haystack != '\0' && haystack_len >= needle_len; haystack++,
|
|
|
|
haystack_len--) {
|
|
|
|
if (strncasecmp(haystack, needle, needle_len) == 0)
|
2007-07-06 13:44:13 +00:00
|
|
|
return (char *)haystack;
|
2005-01-16 18:49:19 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2005-01-22 18:24:16 +00:00
|
|
|
/* This function is equivalent to strcasestr() for multibyte strings. */
|
2007-07-06 13:44:13 +00:00
|
|
|
char *mbstrcasestr(const char *haystack, const char *needle)
|
2005-01-22 18:24:16 +00:00
|
|
|
{
|
2005-07-17 02:40:07 +00:00
|
|
|
#ifdef ENABLE_UTF8
|
2006-04-12 15:27:40 +00:00
|
|
|
if (use_utf8) {
|
2007-07-10 22:54:58 +00:00
|
|
|
size_t haystack_len, needle_len;
|
2005-01-22 18:24:16 +00:00
|
|
|
|
|
|
|
assert(haystack != NULL && needle != NULL);
|
|
|
|
|
2007-07-09 22:57:07 +00:00
|
|
|
if (*needle == '\0')
|
2007-07-06 13:44:13 +00:00
|
|
|
return (char *)haystack;
|
2007-07-02 15:45:13 +00:00
|
|
|
|
2007-07-10 22:54:58 +00:00
|
|
|
haystack_len = mbstrlen(haystack);
|
|
|
|
needle_len = mbstrlen(needle);
|
2005-01-22 18:24:16 +00:00
|
|
|
|
2007-07-10 22:54:58 +00:00
|
|
|
for (; *haystack != '\0' && haystack_len >= needle_len;
|
|
|
|
haystack += move_mbright(haystack, 0), haystack_len--) {
|
2015-07-23 19:18:25 +00:00
|
|
|
if (mbstrncasecmp(haystack, needle, needle_len) == 0 &&
|
|
|
|
mblen(haystack, MB_CUR_MAX) > 0)
|
2007-07-10 22:54:58 +00:00
|
|
|
return (char *)haystack;
|
2005-01-22 18:24:16 +00:00
|
|
|
}
|
|
|
|
|
2007-07-10 22:54:58 +00:00
|
|
|
return NULL;
|
2005-01-22 18:24:16 +00:00
|
|
|
} else
|
|
|
|
#endif
|
2011-02-24 02:47:25 +00:00
|
|
|
return (char *) strcasestr(haystack, needle);
|
2005-01-22 18:24:16 +00:00
|
|
|
}
|
|
|
|
|
2005-11-15 03:17:35 +00:00
|
|
|
#if !defined(NANO_TINY) || !defined(DISABLE_TABCOMP)
|
2005-01-18 17:00:00 +00:00
|
|
|
/* This function is equivalent to strstr(), except in that it scans the
|
2005-01-22 20:49:14 +00:00
|
|
|
* string in reverse, starting at rev_start. */
|
2007-07-06 13:44:13 +00:00
|
|
|
char *revstrstr(const char *haystack, const char *needle, const char
|
|
|
|
*rev_start)
|
2005-01-18 17:00:00 +00:00
|
|
|
{
|
2007-07-10 22:54:58 +00:00
|
|
|
size_t rev_start_len, needle_len;
|
|
|
|
|
2005-01-18 17:00:00 +00:00
|
|
|
assert(haystack != NULL && needle != NULL && rev_start != NULL);
|
|
|
|
|
2007-07-09 22:57:07 +00:00
|
|
|
if (*needle == '\0')
|
2007-07-06 13:44:13 +00:00
|
|
|
return (char *)rev_start;
|
2007-07-02 15:45:13 +00:00
|
|
|
|
2007-07-10 22:54:58 +00:00
|
|
|
needle_len = strlen(needle);
|
2005-01-18 17:00:00 +00:00
|
|
|
|
2007-07-10 22:54:58 +00:00
|
|
|
if (strlen(haystack) < needle_len)
|
|
|
|
return NULL;
|
2005-01-18 17:00:00 +00:00
|
|
|
|
2007-07-10 22:54:58 +00:00
|
|
|
rev_start_len = strlen(rev_start);
|
|
|
|
|
|
|
|
for (; rev_start >= haystack; rev_start--, rev_start_len++) {
|
|
|
|
if (rev_start_len >= needle_len && strncmp(rev_start, needle,
|
|
|
|
needle_len) == 0)
|
2007-07-06 13:44:13 +00:00
|
|
|
return (char *)rev_start;
|
2005-01-18 17:00:00 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
2005-11-15 03:17:35 +00:00
|
|
|
#endif /* !NANO_TINY || !DISABLE_TABCOMP */
|
2005-01-18 17:00:00 +00:00
|
|
|
|
2005-11-15 03:17:35 +00:00
|
|
|
#ifndef NANO_TINY
|
2005-01-18 17:00:00 +00:00
|
|
|
/* This function is equivalent to strcasestr(), except in that it scans
|
2005-01-22 20:49:14 +00:00
|
|
|
* the string in reverse, starting at rev_start. */
|
2007-07-06 13:44:13 +00:00
|
|
|
char *revstrcasestr(const char *haystack, const char *needle, const char
|
|
|
|
*rev_start)
|
2005-01-16 18:49:19 +00:00
|
|
|
{
|
2007-07-10 22:54:58 +00:00
|
|
|
size_t rev_start_len, needle_len;
|
|
|
|
|
2005-01-16 18:49:19 +00:00
|
|
|
assert(haystack != NULL && needle != NULL && rev_start != NULL);
|
|
|
|
|
2007-07-09 22:57:07 +00:00
|
|
|
if (*needle == '\0')
|
2007-07-06 13:44:13 +00:00
|
|
|
return (char *)rev_start;
|
2007-07-02 15:45:13 +00:00
|
|
|
|
2007-07-10 22:54:58 +00:00
|
|
|
needle_len = strlen(needle);
|
|
|
|
|
|
|
|
if (strlen(haystack) < needle_len)
|
|
|
|
return NULL;
|
2005-01-16 18:49:19 +00:00
|
|
|
|
2007-07-10 22:54:58 +00:00
|
|
|
rev_start_len = strlen(rev_start);
|
2005-01-16 18:49:19 +00:00
|
|
|
|
2007-07-10 22:54:58 +00:00
|
|
|
for (; rev_start >= haystack; rev_start--, rev_start_len++) {
|
|
|
|
if (rev_start_len >= needle_len && strncasecmp(rev_start,
|
|
|
|
needle, needle_len) == 0)
|
2007-07-06 13:44:13 +00:00
|
|
|
return (char *)rev_start;
|
2005-01-16 18:49:19 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
2005-01-24 01:14:17 +00:00
|
|
|
|
|
|
|
/* This function is equivalent to strcasestr() for multibyte strings,
|
2016-05-24 08:34:40 +00:00
|
|
|
* except in that it scans the string in reverse, starting at rev_start. */
|
2007-07-06 13:44:13 +00:00
|
|
|
char *mbrevstrcasestr(const char *haystack, const char *needle, const
|
|
|
|
char *rev_start)
|
2005-01-24 01:14:17 +00:00
|
|
|
{
|
2005-07-17 02:40:07 +00:00
|
|
|
#ifdef ENABLE_UTF8
|
2006-04-12 15:27:40 +00:00
|
|
|
if (use_utf8) {
|
2007-07-10 22:54:58 +00:00
|
|
|
size_t rev_start_len, needle_len;
|
2005-01-24 01:14:17 +00:00
|
|
|
|
|
|
|
assert(haystack != NULL && needle != NULL && rev_start != NULL);
|
|
|
|
|
2007-07-09 22:57:07 +00:00
|
|
|
if (*needle == '\0')
|
2007-07-06 13:44:13 +00:00
|
|
|
return (char *)rev_start;
|
2007-07-02 15:45:13 +00:00
|
|
|
|
2007-07-10 22:54:58 +00:00
|
|
|
needle_len = mbstrlen(needle);
|
2005-01-24 01:14:17 +00:00
|
|
|
|
2007-07-10 22:54:58 +00:00
|
|
|
if (mbstrlen(haystack) < needle_len)
|
|
|
|
return NULL;
|
2005-01-24 01:14:17 +00:00
|
|
|
|
2007-07-10 22:54:58 +00:00
|
|
|
rev_start_len = mbstrlen(rev_start);
|
2005-01-24 01:14:17 +00:00
|
|
|
|
2016-06-25 18:57:35 +00:00
|
|
|
while (TRUE) {
|
2015-07-23 19:18:25 +00:00
|
|
|
if (rev_start_len >= needle_len &&
|
|
|
|
mbstrncasecmp(rev_start, needle, needle_len) == 0 &&
|
|
|
|
mblen(rev_start, MB_CUR_MAX) > 0)
|
2007-07-10 22:54:58 +00:00
|
|
|
return (char *)rev_start;
|
2005-01-24 01:14:17 +00:00
|
|
|
|
2016-06-25 18:57:35 +00:00
|
|
|
/* If we've reached the head of the haystack, we found nothing. */
|
2005-01-24 01:14:17 +00:00
|
|
|
if (rev_start == haystack)
|
2016-06-25 18:57:35 +00:00
|
|
|
return NULL;
|
2005-01-24 01:14:17 +00:00
|
|
|
|
2016-06-25 18:57:35 +00:00
|
|
|
rev_start = haystack + move_mbleft(haystack, rev_start - haystack);
|
|
|
|
rev_start_len++;
|
|
|
|
}
|
2005-01-24 01:14:17 +00:00
|
|
|
} else
|
|
|
|
#endif
|
|
|
|
return revstrcasestr(haystack, needle, rev_start);
|
|
|
|
}
|
2005-11-15 03:17:35 +00:00
|
|
|
#endif /* !NANO_TINY */
|
2005-01-16 18:49:19 +00:00
|
|
|
|
2005-01-25 19:21:11 +00:00
|
|
|
/* This function is equivalent to strlen() for multibyte strings. */
|
|
|
|
size_t mbstrlen(const char *s)
|
|
|
|
{
|
|
|
|
return mbstrnlen(s, (size_t)-1);
|
|
|
|
}
|
|
|
|
|
2005-01-16 18:49:19 +00:00
|
|
|
#ifndef HAVE_STRNLEN
|
|
|
|
/* This function is equivalent to strnlen(). */
|
|
|
|
size_t nstrnlen(const char *s, size_t maxlen)
|
|
|
|
{
|
|
|
|
size_t n = 0;
|
|
|
|
|
|
|
|
assert(s != NULL);
|
|
|
|
|
2007-07-01 21:17:05 +00:00
|
|
|
for (; *s != '\0' && maxlen > 0; s++, maxlen--, n++)
|
2005-01-16 18:49:19 +00:00
|
|
|
;
|
|
|
|
|
|
|
|
return n;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/* This function is equivalent to strnlen() for multibyte strings. */
|
|
|
|
size_t mbstrnlen(const char *s, size_t maxlen)
|
|
|
|
{
|
|
|
|
assert(s != NULL);
|
|
|
|
|
2005-07-17 02:40:07 +00:00
|
|
|
#ifdef ENABLE_UTF8
|
2006-04-12 15:27:40 +00:00
|
|
|
if (use_utf8) {
|
2005-01-16 18:49:19 +00:00
|
|
|
size_t n = 0;
|
|
|
|
|
2007-07-09 22:36:32 +00:00
|
|
|
for (; *s != '\0' && maxlen > 0; s += move_mbright(s, 0),
|
|
|
|
maxlen--, n++)
|
|
|
|
;
|
2005-01-16 18:49:19 +00:00
|
|
|
|
2005-01-25 19:21:11 +00:00
|
|
|
return n;
|
2005-01-16 18:49:19 +00:00
|
|
|
} else
|
|
|
|
#endif
|
2005-03-20 07:24:49 +00:00
|
|
|
return strnlen(s, maxlen);
|
2005-01-16 18:49:19 +00:00
|
|
|
}
|
2005-03-15 05:44:03 +00:00
|
|
|
|
2006-01-06 07:10:30 +00:00
|
|
|
#if !defined(NANO_TINY) || !defined(DISABLE_JUSTIFY)
|
2005-07-21 18:05:27 +00:00
|
|
|
/* This function is equivalent to strchr() for multibyte strings. */
|
2006-01-06 05:54:44 +00:00
|
|
|
char *mbstrchr(const char *s, const char *c)
|
2005-07-21 18:05:27 +00:00
|
|
|
{
|
|
|
|
assert(s != NULL && c != NULL);
|
|
|
|
|
|
|
|
#ifdef ENABLE_UTF8
|
2006-04-12 15:27:40 +00:00
|
|
|
if (use_utf8) {
|
2005-11-15 18:42:56 +00:00
|
|
|
bool bad_s_mb = FALSE, bad_c_mb = FALSE;
|
2005-07-21 18:05:27 +00:00
|
|
|
char *s_mb = charalloc(MB_CUR_MAX);
|
|
|
|
const char *q = s;
|
|
|
|
wchar_t ws, wc;
|
|
|
|
|
2014-03-21 12:47:34 +00:00
|
|
|
if (mbtowc(&wc, c, MB_CUR_MAX) < 0) {
|
2015-09-04 19:34:55 +00:00
|
|
|
mbtowc_reset();
|
2005-07-21 18:05:27 +00:00
|
|
|
wc = (unsigned char)*c;
|
|
|
|
bad_c_mb = TRUE;
|
|
|
|
}
|
|
|
|
|
|
|
|
while (*s != '\0') {
|
2005-07-26 06:13:45 +00:00
|
|
|
int s_mb_len = parse_mbchar(s, s_mb, NULL);
|
2005-07-21 18:05:27 +00:00
|
|
|
|
2005-07-21 22:12:03 +00:00
|
|
|
if (mbtowc(&ws, s_mb, s_mb_len) < 0) {
|
2015-09-04 19:34:55 +00:00
|
|
|
mbtowc_reset();
|
2005-07-21 18:05:27 +00:00
|
|
|
ws = (unsigned char)*s;
|
|
|
|
bad_s_mb = TRUE;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (bad_s_mb == bad_c_mb && ws == wc)
|
|
|
|
break;
|
|
|
|
|
|
|
|
s += s_mb_len;
|
|
|
|
q += s_mb_len;
|
|
|
|
}
|
|
|
|
|
|
|
|
free(s_mb);
|
|
|
|
|
2005-11-15 19:01:07 +00:00
|
|
|
if (*s == '\0')
|
2005-07-21 18:05:27 +00:00
|
|
|
q = NULL;
|
|
|
|
|
|
|
|
return (char *)q;
|
|
|
|
} else
|
|
|
|
#endif
|
2011-02-24 02:47:25 +00:00
|
|
|
return (char *) strchr(s, *c);
|
2005-07-21 18:05:27 +00:00
|
|
|
}
|
2006-01-06 07:10:30 +00:00
|
|
|
#endif /* !NANO_TINY || !DISABLE_JUSTIFY */
|
2005-07-21 18:05:27 +00:00
|
|
|
|
2006-01-06 07:10:30 +00:00
|
|
|
#ifndef NANO_TINY
|
|
|
|
/* This function is equivalent to strpbrk() for multibyte strings. */
|
|
|
|
char *mbstrpbrk(const char *s, const char *accept)
|
|
|
|
{
|
|
|
|
assert(s != NULL && accept != NULL);
|
|
|
|
|
|
|
|
#ifdef ENABLE_UTF8
|
2006-04-12 15:27:40 +00:00
|
|
|
if (use_utf8) {
|
2007-07-09 22:36:32 +00:00
|
|
|
for (; *s != '\0'; s += move_mbright(s, 0)) {
|
2006-01-06 07:10:30 +00:00
|
|
|
if (mbstrchr(accept, s) != NULL)
|
|
|
|
return (char *)s;
|
|
|
|
}
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
} else
|
|
|
|
#endif
|
2011-02-24 02:47:25 +00:00
|
|
|
return (char *) strpbrk(s, accept);
|
2006-01-06 07:10:30 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* This function is equivalent to strpbrk(), except in that it scans the
|
|
|
|
* string in reverse, starting at rev_start. */
|
|
|
|
char *revstrpbrk(const char *s, const char *accept, const char
|
|
|
|
*rev_start)
|
|
|
|
{
|
|
|
|
assert(s != NULL && accept != NULL && rev_start != NULL);
|
|
|
|
|
2016-06-25 19:04:19 +00:00
|
|
|
if (*rev_start == '\0') {
|
|
|
|
if (rev_start == s)
|
|
|
|
return NULL;
|
|
|
|
rev_start--;
|
|
|
|
}
|
2006-01-06 07:10:30 +00:00
|
|
|
|
2016-06-25 19:04:19 +00:00
|
|
|
for (; rev_start >= s; rev_start--) {
|
|
|
|
if (strchr(accept, *rev_start) != NULL)
|
2006-01-06 07:10:30 +00:00
|
|
|
return (char *)rev_start;
|
|
|
|
}
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* This function is equivalent to strpbrk() for multibyte strings,
|
2016-05-24 08:34:40 +00:00
|
|
|
* except in that it scans the string in reverse, starting at rev_start. */
|
2006-01-06 07:10:30 +00:00
|
|
|
char *mbrevstrpbrk(const char *s, const char *accept, const char
|
|
|
|
*rev_start)
|
|
|
|
{
|
|
|
|
assert(s != NULL && accept != NULL && rev_start != NULL);
|
|
|
|
|
|
|
|
#ifdef ENABLE_UTF8
|
2006-04-12 15:27:40 +00:00
|
|
|
if (use_utf8) {
|
2016-06-25 19:04:19 +00:00
|
|
|
if (*rev_start == '\0') {
|
|
|
|
if (rev_start == s)
|
|
|
|
return NULL;
|
|
|
|
rev_start = s + move_mbleft(s, rev_start - s);
|
|
|
|
}
|
2006-01-06 07:10:30 +00:00
|
|
|
|
2016-06-25 19:04:19 +00:00
|
|
|
while (TRUE) {
|
|
|
|
if (mbstrchr(accept, rev_start) != NULL)
|
2006-01-06 07:10:30 +00:00
|
|
|
return (char *)rev_start;
|
|
|
|
|
2016-06-25 18:57:35 +00:00
|
|
|
/* If we've reached the head of the string, we found nothing. */
|
2006-02-02 22:30:40 +00:00
|
|
|
if (rev_start == s)
|
2016-06-25 18:57:35 +00:00
|
|
|
return NULL;
|
2006-01-06 07:10:30 +00:00
|
|
|
|
2016-06-25 18:57:35 +00:00
|
|
|
rev_start = s + move_mbleft(s, rev_start - s);
|
|
|
|
}
|
2006-01-06 07:10:30 +00:00
|
|
|
} else
|
|
|
|
#endif
|
|
|
|
return revstrpbrk(s, accept, rev_start);
|
|
|
|
}
|
|
|
|
#endif /* !NANO_TINY */
|
|
|
|
|
2014-04-13 20:50:20 +00:00
|
|
|
#if !defined(DISABLE_NANORC) && (!defined(NANO_TINY) || !defined(DISABLE_JUSTIFY))
|
2005-06-14 01:55:56 +00:00
|
|
|
/* Return TRUE if the string s contains one or more blank characters,
|
|
|
|
* and FALSE otherwise. */
|
|
|
|
bool has_blank_chars(const char *s)
|
|
|
|
{
|
|
|
|
assert(s != NULL);
|
|
|
|
|
|
|
|
for (; *s != '\0'; s++) {
|
|
|
|
if (isblank(*s))
|
|
|
|
return TRUE;
|
|
|
|
}
|
|
|
|
|
|
|
|
return FALSE;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Return TRUE if the multibyte string s contains one or more blank
|
|
|
|
* multibyte characters, and FALSE otherwise. */
|
|
|
|
bool has_blank_mbchars(const char *s)
|
|
|
|
{
|
2005-06-16 12:17:23 +00:00
|
|
|
assert(s != NULL);
|
2005-06-14 01:55:56 +00:00
|
|
|
|
2005-07-17 02:40:07 +00:00
|
|
|
#ifdef ENABLE_UTF8
|
2006-04-12 15:27:40 +00:00
|
|
|
if (use_utf8) {
|
2005-06-14 01:55:56 +00:00
|
|
|
bool retval = FALSE;
|
2007-07-09 22:36:32 +00:00
|
|
|
char *chr_mb = charalloc(MB_CUR_MAX);
|
2005-06-14 01:55:56 +00:00
|
|
|
|
2007-07-09 22:36:32 +00:00
|
|
|
for (; *s != '\0'; s += move_mbright(s, 0)) {
|
|
|
|
parse_mbchar(s, chr_mb, NULL);
|
2005-06-14 01:55:56 +00:00
|
|
|
|
|
|
|
if (is_blank_mbchar(chr_mb)) {
|
|
|
|
retval = TRUE;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
free(chr_mb);
|
|
|
|
|
|
|
|
return retval;
|
|
|
|
} else
|
|
|
|
#endif
|
|
|
|
return has_blank_chars(s);
|
|
|
|
}
|
2014-04-13 20:50:20 +00:00
|
|
|
#endif /* !DISABLE_NANORC && (!NANO_TINY || !DISABLE_JUSTIFY) */
|
2005-06-14 23:36:13 +00:00
|
|
|
|
2005-08-05 03:14:29 +00:00
|
|
|
#ifdef ENABLE_UTF8
|
2005-08-28 03:07:13 +00:00
|
|
|
/* Return TRUE if wc is valid Unicode, and FALSE otherwise. */
|
2005-08-05 03:14:29 +00:00
|
|
|
bool is_valid_unicode(wchar_t wc)
|
|
|
|
{
|
2016-03-29 14:46:53 +00:00
|
|
|
return ((0 <= wc && wc <= 0xD7FF) ||
|
2016-05-30 07:09:36 +00:00
|
|
|
(0xE000 <= wc && wc <= 0xFDCF) ||
|
|
|
|
(0xFDF0 <= wc && wc <= 0xFFFD) ||
|
|
|
|
(0xFFFF < wc && wc <= 0x10FFFF && (wc & 0xFFFF) <= 0xFFFD));
|
2005-08-05 03:14:29 +00:00
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2014-04-13 20:50:20 +00:00
|
|
|
#ifndef DISABLE_NANORC
|
2005-06-14 23:36:13 +00:00
|
|
|
/* Check if the string s is a valid multibyte string. Return TRUE if it
|
|
|
|
* is, and FALSE otherwise. */
|
|
|
|
bool is_valid_mbstring(const char *s)
|
|
|
|
{
|
|
|
|
assert(s != NULL);
|
|
|
|
|
2007-04-19 03:15:04 +00:00
|
|
|
return
|
2005-07-17 02:40:07 +00:00
|
|
|
#ifdef ENABLE_UTF8
|
2006-10-08 15:21:23 +00:00
|
|
|
use_utf8 ? (mbstowcs(NULL, s, 0) != (size_t)-1) :
|
2005-06-14 23:36:13 +00:00
|
|
|
#endif
|
|
|
|
TRUE;
|
|
|
|
}
|
2014-04-13 20:50:20 +00:00
|
|
|
#endif /* !DISABLE_NANORC */
|