From 212c85863a4ec1cdd5bae053985879b56f363a97 Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Fri, 17 Mar 2023 19:32:58 +0000 Subject: [PATCH] Avoid undefined behaviour with the ctype(3) functions. fix https://github.com/pkgconf/pkgconf/issues/291 As defined in the C standard: In all cases the argument is an int, the value of which shall be representable as an unsigned char or shall equal the value of the macro EOF. If the argument has any other value, the behavior is undefined. This is because they're designed to work with the int values returned by getc or fgetc; they need extra work to handle a char value. If EOF is -1 (as it almost always is), with 8-bit bytes, the allowed inputs to the ctype(3) functions are: {-1, 0, 1, 2, 3, ..., 255}. However, on platforms where char is signed, such as x86 with the usual ABI, code like char *ptr = ...; ... isspace(*ptr) ... may pass in values in the range: {-128, -127, -126, ..., -2, -1, 0, 1, ..., 127}. This has two problems: 1. Inputs in the set {-128, -127, -126, ..., -2} are forbidden. 2. The non-EOF byte 0xff is conflated with the value EOF = -1, so even though the input is not forbidden, it may give the wrong answer. Casting char to unsigned int first before passing the result to ctype(3) doesn't help: inputs like -128 are unchanged by this cast, because (on a two's-complement machine with 32-bit int and unsigned int), converting the signed char with integer value -128 to unsigned int gives integer value 2^32 - 128 = 0xffffff80, which is out of range, and which is converted in int back to -128, which is also out of range. It is necessary to cast char inputs to unsigned char first; you can then cast to unsigned int if you like but there's no need because the functions will always convert the argument to int by definition. So the above fragment needs to be: char *ptr = ...; ... isspace((unsigned char)*ptr) ... This patch changes unsigned int casts to unsigned char casts, and adds unsigned char casts where they are missing. --- cli/bomtool/main.c | 2 +- cli/main.c | 5 +++-- libpkgconf/argvsplit.c | 2 +- libpkgconf/dependency.c | 6 +++--- libpkgconf/libpkgconf.h | 2 +- libpkgconf/parser.c | 13 +++++++------ libpkgconf/personality.c | 2 +- libpkgconf/pkg.c | 14 +++++++------- libpkgconf/win-dirent.h | 15 ++++++++------- tests/lib1/utf8.pc | 10 ++++++++++ 10 files changed, 42 insertions(+), 29 deletions(-) create mode 100644 tests/lib1/utf8.pc diff --git a/cli/bomtool/main.c b/cli/bomtool/main.c index 1400e94..a743b14 100644 --- a/cli/bomtool/main.c +++ b/cli/bomtool/main.c @@ -309,7 +309,7 @@ main(int argc, char *argv[]) if (maximum_package_count > 0 && pkgq.length > maximum_package_count) break; - while (isspace((unsigned int)package[0])) + while (isspace((unsigned char)package[0])) package++; /* skip empty packages */ diff --git a/cli/main.c b/cli/main.c index 6b7e403..2aca647 100644 --- a/cli/main.c +++ b/cli/main.c @@ -407,7 +407,8 @@ apply_env(pkgconf_client_t *client, pkgconf_pkg_t *world, void *env_prefix_p, in char workbuf[PKGCONF_ITEM_SIZE]; for (it = want_env_prefix; *it != '\0'; it++) - if (!isalpha(*it) && !isdigit(*it)) + if (!isalpha((unsigned char)*it) && + !isdigit((unsigned char)*it)) return false; snprintf(workbuf, sizeof workbuf, "%s_CFLAGS", want_env_prefix); @@ -1327,7 +1328,7 @@ cleanup3: if (maximum_package_count > 0 && pkgq.length > maximum_package_count) break; - while (isspace((unsigned int)package[0])) + while (isspace((unsigned char)package[0])) package++; /* skip empty packages */ diff --git a/libpkgconf/argvsplit.c b/libpkgconf/argvsplit.c index b47a3d9..9b98e12 100644 --- a/libpkgconf/argvsplit.c +++ b/libpkgconf/argvsplit.c @@ -108,7 +108,7 @@ pkgconf_argv_split(const char *src, int *argc, char ***argv) else *dst_iter++ = *src_iter; } - else if (isspace((unsigned int)*src_iter)) + else if (isspace((unsigned char)*src_iter)) { if ((*argv)[argc_count] != NULL) { diff --git a/libpkgconf/dependency.c b/libpkgconf/dependency.c index 149d95b..da92c3a 100644 --- a/libpkgconf/dependency.c +++ b/libpkgconf/dependency.c @@ -329,11 +329,11 @@ pkgconf_dependency_parse_str(pkgconf_client_t *client, pkgconf_list_t *deplist_h break; case INSIDE_MODULE_NAME: - if (isspace((unsigned int)*ptr)) + if (isspace((unsigned char)*ptr)) { const char *sptr = ptr; - while (*sptr && isspace((unsigned int)*sptr)) + while (*sptr && isspace((unsigned char)*sptr)) sptr++; if (*sptr == '\0') @@ -397,7 +397,7 @@ pkgconf_dependency_parse_str(pkgconf_client_t *client, pkgconf_list_t *deplist_h break; case AFTER_OPERATOR: - if (!isspace((unsigned int)*ptr)) + if (!isspace((unsigned char)*ptr)) { vstart = ptr; state = INSIDE_VERSION; diff --git a/libpkgconf/libpkgconf.h b/libpkgconf/libpkgconf.h index e72170a..4f39fe7 100644 --- a/libpkgconf/libpkgconf.h +++ b/libpkgconf/libpkgconf.h @@ -249,7 +249,7 @@ PKGCONF_API pkgconf_cross_personality_t *pkgconf_cross_personality_default(void) PKGCONF_API pkgconf_cross_personality_t *pkgconf_cross_personality_find(const char *triplet); PKGCONF_API void pkgconf_cross_personality_deinit(pkgconf_cross_personality_t *personality); -#define PKGCONF_IS_MODULE_SEPARATOR(c) ((c) == ',' || isspace ((unsigned int)(c))) +#define PKGCONF_IS_MODULE_SEPARATOR(c) ((c) == ',' || isspace ((unsigned char)(c))) #define PKGCONF_IS_OPERATOR_CHAR(c) ((c) == '<' || (c) == '>' || (c) == '!' || (c) == '=') #define PKGCONF_PKG_PKGF_NONE 0x0000 diff --git a/libpkgconf/parser.c b/libpkgconf/parser.c index da376c6..f4cf7db 100644 --- a/libpkgconf/parser.c +++ b/libpkgconf/parser.c @@ -44,7 +44,7 @@ pkgconf_parser_parse(FILE *f, void *data, const pkgconf_parser_operand_func_t *o lineno++; p = readbuf; - while (*p && isspace((unsigned int)*p)) + while (*p && isspace((unsigned char)*p)) p++; if (*p && p != readbuf) { @@ -53,13 +53,14 @@ pkgconf_parser_parse(FILE *f, void *data, const pkgconf_parser_operand_func_t *o warned_key_whitespace = true; } key = p; - while (*p && (isalpha((unsigned int)*p) || isdigit((unsigned int)*p) || *p == '_' || *p == '.')) + while (*p && (isalpha((unsigned char)*p) || isdigit((unsigned char)*p) || *p == '_' || *p == '.')) p++; - if (!isalpha((unsigned int)*key) && !isdigit((unsigned int)*p)) + if (!isalpha((unsigned char)*key) && + !isdigit((unsigned char)*p)) continue; - while (*p && isspace((unsigned int)*p)) + while (*p && isspace((unsigned char)*p)) { if (!warned_key_whitespace) { @@ -80,12 +81,12 @@ pkgconf_parser_parse(FILE *f, void *data, const pkgconf_parser_operand_func_t *o p++; } - while (*p && isspace((unsigned int)*p)) + while (*p && isspace((unsigned char)*p)) p++; value = p; p = value + (strlen(value) - 1); - while (*p && isspace((unsigned int) *p) && p > value) + while (*p && isspace((unsigned char) *p) && p > value) { if (!warned_value_whitespace && op == '=') { diff --git a/libpkgconf/personality.c b/libpkgconf/personality.c index 6706416..9f88d0f 100644 --- a/libpkgconf/personality.c +++ b/libpkgconf/personality.c @@ -148,7 +148,7 @@ valid_triplet(const char *triplet) const char *c = triplet; for (; *c; c++) - if (!isalnum(*c) && *c != '-' && *c != '_') + if (!isalnum((unsigned char)*c) && *c != '-' && *c != '_') return false; return true; diff --git a/libpkgconf/pkg.c b/libpkgconf/pkg.c index ad98eb0..40565a4 100644 --- a/libpkgconf/pkg.c +++ b/libpkgconf/pkg.c @@ -857,9 +857,9 @@ pkgconf_compare_version(const char *a, const char *b) while (*one || *two) { - while (*one && !isalnum((unsigned int)*one) && *one != '~') + while (*one && !isalnum((unsigned char)*one) && *one != '~') one++; - while (*two && !isalnum((unsigned int)*two) && *two != '~') + while (*two && !isalnum((unsigned char)*two) && *two != '~') two++; if (*one == '~' || *two == '~') @@ -880,22 +880,22 @@ pkgconf_compare_version(const char *a, const char *b) str1 = one; str2 = two; - if (isdigit((unsigned int)*str1)) + if (isdigit((unsigned char)*str1)) { - while (*str1 && isdigit((unsigned int)*str1)) + while (*str1 && isdigit((unsigned char)*str1)) str1++; - while (*str2 && isdigit((unsigned int)*str2)) + while (*str2 && isdigit((unsigned char)*str2)) str2++; isnum = true; } else { - while (*str1 && isalpha((unsigned int)*str1)) + while (*str1 && isalpha((unsigned char)*str1)) str1++; - while (*str2 && isalpha((unsigned int)*str2)) + while (*str2 && isalpha((unsigned char)*str2)) str2++; isnum = false; diff --git a/libpkgconf/win-dirent.h b/libpkgconf/win-dirent.h index a2e847a..0734d6d 100644 --- a/libpkgconf/win-dirent.h +++ b/libpkgconf/win-dirent.h @@ -912,7 +912,7 @@ static int strverscmp(const char *a, const char *b) /* Count backwards and find the leftmost digit */ j = i; - while (j > 0 && isdigit(a[j-1])) { + while (j > 0 && isdigit((unsigned char)a[j-1])) { --j; } @@ -924,23 +924,24 @@ static int strverscmp(const char *a, const char *b) } /* String with more digits is smaller, e.g 002 < 01 */ - if (isdigit(a[j])) { - if (!isdigit(b[j])) { + if (isdigit((unsigned char)a[j])) { + if (!isdigit((unsigned char)b[j])) { return -1; } - } else if (isdigit(b[j])) { + } else if ((unsigned char)isdigit(b[j])) { return 1; } - } else if (isdigit(a[j]) && isdigit(b[j])) { + } else if ((unsigned char)isdigit(a[j]) && + isdigit((unsigned char)b[j])) { /* Numeric comparison */ size_t k1 = j; size_t k2 = j; /* Compute number of digits in each string */ - while (isdigit(a[k1])) { + while (isdigit((unsigned char)a[k1])) { k1++; } - while (isdigit(b[k2])) { + while (isdigit((unsigned char)b[k2])) { k2++; } diff --git a/tests/lib1/utf8.pc b/tests/lib1/utf8.pc new file mode 100644 index 0000000..6507ec0 --- /dev/null +++ b/tests/lib1/utf8.pc @@ -0,0 +1,10 @@ +prefix=/tëst +exec_prefix=${prefix} +libdir=${prefix}/lib +includedir=${prefix}/include + +Name: utf8 +Description: Library installed in a prefix with UTF-8 +Version: 0 +Libs: -L${libdir} -lutf8 +Cflags: -I${includedir}