pkgconf/libpkgconf/parser.c

107 lines
2.9 KiB
C
Raw Normal View History

/*
* parser.c
* rfc822 message parser
*
* Copyright (c) 2018 pkgconf authors (see AUTHORS).
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* This software is provided 'as is' and without any warranty, express or
* implied. In no event shall the authors be liable for any damages arising
* from the use of this software.
*/
#include <libpkgconf/config.h>
#include <libpkgconf/stdinc.h>
#include <libpkgconf/libpkgconf.h>
/*
* !doc
*
* .. c:function:: pkgconf_pkg_t *pkgconf_pkg_new_from_file(const pkgconf_client_t *client, const char *filename, FILE *f)
*
* Parse a .pc file into a pkgconf_pkg_t object structure.
*
* :param pkgconf_client_t* client: The pkgconf client object to use for dependency resolution.
* :param char* filename: The filename of the package file (including full path).
* :param FILE* f: The file object to read from.
* :returns: A ``pkgconf_pkg_t`` object which contains the package data.
* :rtype: pkgconf_pkg_t *
*/
void
pkgconf_parser_parse(FILE *f, void *data, const pkgconf_parser_operand_func_t *ops, const pkgconf_parser_warn_func_t warnfunc, const char *filename)
{
char readbuf[PKGCONF_BUFSIZE];
size_t lineno = 0;
while (pkgconf_fgetline(readbuf, PKGCONF_BUFSIZE, f) != NULL)
{
char op, *p, *key, *value;
bool warned_key_whitespace = false, warned_value_whitespace = false;
lineno++;
p = readbuf;
Avoid undefined behaviour with the ctype(3) functions. fix https://github.com/pkgconf/pkgconf/issues/291 As defined in the C standard: In all cases the argument is an int, the value of which shall be representable as an unsigned char or shall equal the value of the macro EOF. If the argument has any other value, the behavior is undefined. This is because they're designed to work with the int values returned by getc or fgetc; they need extra work to handle a char value. If EOF is -1 (as it almost always is), with 8-bit bytes, the allowed inputs to the ctype(3) functions are: {-1, 0, 1, 2, 3, ..., 255}. However, on platforms where char is signed, such as x86 with the usual ABI, code like char *ptr = ...; ... isspace(*ptr) ... may pass in values in the range: {-128, -127, -126, ..., -2, -1, 0, 1, ..., 127}. This has two problems: 1. Inputs in the set {-128, -127, -126, ..., -2} are forbidden. 2. The non-EOF byte 0xff is conflated with the value EOF = -1, so even though the input is not forbidden, it may give the wrong answer. Casting char to unsigned int first before passing the result to ctype(3) doesn't help: inputs like -128 are unchanged by this cast, because (on a two's-complement machine with 32-bit int and unsigned int), converting the signed char with integer value -128 to unsigned int gives integer value 2^32 - 128 = 0xffffff80, which is out of range, and which is converted in int back to -128, which is also out of range. It is necessary to cast char inputs to unsigned char first; you can then cast to unsigned int if you like but there's no need because the functions will always convert the argument to int by definition. So the above fragment needs to be: char *ptr = ...; ... isspace((unsigned char)*ptr) ... This patch changes unsigned int casts to unsigned char casts, and adds unsigned char casts where they are missing.
2023-03-17 19:32:58 +00:00
while (*p && isspace((unsigned char)*p))
p++;
if (*p && p != readbuf)
{
warnfunc(data, "%s:" SIZE_FMT_SPECIFIER ": warning: whitespace encountered while parsing key section\n",
filename, lineno);
warned_key_whitespace = true;
}
key = p;
Avoid undefined behaviour with the ctype(3) functions. fix https://github.com/pkgconf/pkgconf/issues/291 As defined in the C standard: In all cases the argument is an int, the value of which shall be representable as an unsigned char or shall equal the value of the macro EOF. If the argument has any other value, the behavior is undefined. This is because they're designed to work with the int values returned by getc or fgetc; they need extra work to handle a char value. If EOF is -1 (as it almost always is), with 8-bit bytes, the allowed inputs to the ctype(3) functions are: {-1, 0, 1, 2, 3, ..., 255}. However, on platforms where char is signed, such as x86 with the usual ABI, code like char *ptr = ...; ... isspace(*ptr) ... may pass in values in the range: {-128, -127, -126, ..., -2, -1, 0, 1, ..., 127}. This has two problems: 1. Inputs in the set {-128, -127, -126, ..., -2} are forbidden. 2. The non-EOF byte 0xff is conflated with the value EOF = -1, so even though the input is not forbidden, it may give the wrong answer. Casting char to unsigned int first before passing the result to ctype(3) doesn't help: inputs like -128 are unchanged by this cast, because (on a two's-complement machine with 32-bit int and unsigned int), converting the signed char with integer value -128 to unsigned int gives integer value 2^32 - 128 = 0xffffff80, which is out of range, and which is converted in int back to -128, which is also out of range. It is necessary to cast char inputs to unsigned char first; you can then cast to unsigned int if you like but there's no need because the functions will always convert the argument to int by definition. So the above fragment needs to be: char *ptr = ...; ... isspace((unsigned char)*ptr) ... This patch changes unsigned int casts to unsigned char casts, and adds unsigned char casts where they are missing.
2023-03-17 19:32:58 +00:00
while (*p && (isalpha((unsigned char)*p) || isdigit((unsigned char)*p) || *p == '_' || *p == '.'))
p++;
Avoid undefined behaviour with the ctype(3) functions. fix https://github.com/pkgconf/pkgconf/issues/291 As defined in the C standard: In all cases the argument is an int, the value of which shall be representable as an unsigned char or shall equal the value of the macro EOF. If the argument has any other value, the behavior is undefined. This is because they're designed to work with the int values returned by getc or fgetc; they need extra work to handle a char value. If EOF is -1 (as it almost always is), with 8-bit bytes, the allowed inputs to the ctype(3) functions are: {-1, 0, 1, 2, 3, ..., 255}. However, on platforms where char is signed, such as x86 with the usual ABI, code like char *ptr = ...; ... isspace(*ptr) ... may pass in values in the range: {-128, -127, -126, ..., -2, -1, 0, 1, ..., 127}. This has two problems: 1. Inputs in the set {-128, -127, -126, ..., -2} are forbidden. 2. The non-EOF byte 0xff is conflated with the value EOF = -1, so even though the input is not forbidden, it may give the wrong answer. Casting char to unsigned int first before passing the result to ctype(3) doesn't help: inputs like -128 are unchanged by this cast, because (on a two's-complement machine with 32-bit int and unsigned int), converting the signed char with integer value -128 to unsigned int gives integer value 2^32 - 128 = 0xffffff80, which is out of range, and which is converted in int back to -128, which is also out of range. It is necessary to cast char inputs to unsigned char first; you can then cast to unsigned int if you like but there's no need because the functions will always convert the argument to int by definition. So the above fragment needs to be: char *ptr = ...; ... isspace((unsigned char)*ptr) ... This patch changes unsigned int casts to unsigned char casts, and adds unsigned char casts where they are missing.
2023-03-17 19:32:58 +00:00
if (!isalpha((unsigned char)*key) &&
!isdigit((unsigned char)*p))
continue;
Avoid undefined behaviour with the ctype(3) functions. fix https://github.com/pkgconf/pkgconf/issues/291 As defined in the C standard: In all cases the argument is an int, the value of which shall be representable as an unsigned char or shall equal the value of the macro EOF. If the argument has any other value, the behavior is undefined. This is because they're designed to work with the int values returned by getc or fgetc; they need extra work to handle a char value. If EOF is -1 (as it almost always is), with 8-bit bytes, the allowed inputs to the ctype(3) functions are: {-1, 0, 1, 2, 3, ..., 255}. However, on platforms where char is signed, such as x86 with the usual ABI, code like char *ptr = ...; ... isspace(*ptr) ... may pass in values in the range: {-128, -127, -126, ..., -2, -1, 0, 1, ..., 127}. This has two problems: 1. Inputs in the set {-128, -127, -126, ..., -2} are forbidden. 2. The non-EOF byte 0xff is conflated with the value EOF = -1, so even though the input is not forbidden, it may give the wrong answer. Casting char to unsigned int first before passing the result to ctype(3) doesn't help: inputs like -128 are unchanged by this cast, because (on a two's-complement machine with 32-bit int and unsigned int), converting the signed char with integer value -128 to unsigned int gives integer value 2^32 - 128 = 0xffffff80, which is out of range, and which is converted in int back to -128, which is also out of range. It is necessary to cast char inputs to unsigned char first; you can then cast to unsigned int if you like but there's no need because the functions will always convert the argument to int by definition. So the above fragment needs to be: char *ptr = ...; ... isspace((unsigned char)*ptr) ... This patch changes unsigned int casts to unsigned char casts, and adds unsigned char casts where they are missing.
2023-03-17 19:32:58 +00:00
while (*p && isspace((unsigned char)*p))
{
if (!warned_key_whitespace)
{
warnfunc(data, "%s:" SIZE_FMT_SPECIFIER ": warning: whitespace encountered while parsing key section\n",
filename, lineno);
warned_key_whitespace = true;
}
/* set to null to avoid trailing spaces in key */
*p = '\0';
p++;
}
op = *p;
if (*p != '\0')
{
*p = '\0';
p++;
}
Avoid undefined behaviour with the ctype(3) functions. fix https://github.com/pkgconf/pkgconf/issues/291 As defined in the C standard: In all cases the argument is an int, the value of which shall be representable as an unsigned char or shall equal the value of the macro EOF. If the argument has any other value, the behavior is undefined. This is because they're designed to work with the int values returned by getc or fgetc; they need extra work to handle a char value. If EOF is -1 (as it almost always is), with 8-bit bytes, the allowed inputs to the ctype(3) functions are: {-1, 0, 1, 2, 3, ..., 255}. However, on platforms where char is signed, such as x86 with the usual ABI, code like char *ptr = ...; ... isspace(*ptr) ... may pass in values in the range: {-128, -127, -126, ..., -2, -1, 0, 1, ..., 127}. This has two problems: 1. Inputs in the set {-128, -127, -126, ..., -2} are forbidden. 2. The non-EOF byte 0xff is conflated with the value EOF = -1, so even though the input is not forbidden, it may give the wrong answer. Casting char to unsigned int first before passing the result to ctype(3) doesn't help: inputs like -128 are unchanged by this cast, because (on a two's-complement machine with 32-bit int and unsigned int), converting the signed char with integer value -128 to unsigned int gives integer value 2^32 - 128 = 0xffffff80, which is out of range, and which is converted in int back to -128, which is also out of range. It is necessary to cast char inputs to unsigned char first; you can then cast to unsigned int if you like but there's no need because the functions will always convert the argument to int by definition. So the above fragment needs to be: char *ptr = ...; ... isspace((unsigned char)*ptr) ... This patch changes unsigned int casts to unsigned char casts, and adds unsigned char casts where they are missing.
2023-03-17 19:32:58 +00:00
while (*p && isspace((unsigned char)*p))
p++;
value = p;
p = value + (strlen(value) - 1);
Avoid undefined behaviour with the ctype(3) functions. fix https://github.com/pkgconf/pkgconf/issues/291 As defined in the C standard: In all cases the argument is an int, the value of which shall be representable as an unsigned char or shall equal the value of the macro EOF. If the argument has any other value, the behavior is undefined. This is because they're designed to work with the int values returned by getc or fgetc; they need extra work to handle a char value. If EOF is -1 (as it almost always is), with 8-bit bytes, the allowed inputs to the ctype(3) functions are: {-1, 0, 1, 2, 3, ..., 255}. However, on platforms where char is signed, such as x86 with the usual ABI, code like char *ptr = ...; ... isspace(*ptr) ... may pass in values in the range: {-128, -127, -126, ..., -2, -1, 0, 1, ..., 127}. This has two problems: 1. Inputs in the set {-128, -127, -126, ..., -2} are forbidden. 2. The non-EOF byte 0xff is conflated with the value EOF = -1, so even though the input is not forbidden, it may give the wrong answer. Casting char to unsigned int first before passing the result to ctype(3) doesn't help: inputs like -128 are unchanged by this cast, because (on a two's-complement machine with 32-bit int and unsigned int), converting the signed char with integer value -128 to unsigned int gives integer value 2^32 - 128 = 0xffffff80, which is out of range, and which is converted in int back to -128, which is also out of range. It is necessary to cast char inputs to unsigned char first; you can then cast to unsigned int if you like but there's no need because the functions will always convert the argument to int by definition. So the above fragment needs to be: char *ptr = ...; ... isspace((unsigned char)*ptr) ... This patch changes unsigned int casts to unsigned char casts, and adds unsigned char casts where they are missing.
2023-03-17 19:32:58 +00:00
while (*p && isspace((unsigned char) *p) && p > value)
{
if (!warned_value_whitespace && op == '=')
{
warnfunc(data, "%s:" SIZE_FMT_SPECIFIER ": warning: trailing whitespace encountered while parsing value section\n",
filename, lineno);
warned_value_whitespace = true;
}
*p = '\0';
p--;
}
if (ops[(unsigned char) op])
ops[(unsigned char) op](data, lineno, key, value);
}
fclose(f);
}