From 13b9e1e7dc518f14ca71b4005565610d9d23a334 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=ADle=20Ekaterin=20Liszka?= Date: Fri, 12 Jan 2024 12:57:01 -0800 Subject: [PATCH] initial commit --- .gitignore | 4 + LICENSE | 20 +++ README.md | 72 ++++++++ pyproject.toml | 36 ++++ tests/01-keyvalue.xbc | 2 + tests/02-block-keyvalue.xbc | 4 + tests/03-keyvalue-string.xbc | 1 + tests/10-compact.xbc | 1 + tests/11-config.xbc | 21 +++ tests/12-config2.xbc | 19 ++ tests/40-invalid1.xbc | 5 + tests/test_basic.py | 37 ++++ xbc/__init__.py | 330 +++++++++++++++++++++++++++++++++++ xbc/utils.py | 119 +++++++++++++ 14 files changed, 671 insertions(+) create mode 100644 .gitignore create mode 100644 LICENSE create mode 100644 README.md create mode 100644 pyproject.toml create mode 100644 tests/01-keyvalue.xbc create mode 100644 tests/02-block-keyvalue.xbc create mode 100644 tests/03-keyvalue-string.xbc create mode 100644 tests/10-compact.xbc create mode 100644 tests/11-config.xbc create mode 100644 tests/12-config2.xbc create mode 100644 tests/40-invalid1.xbc create mode 100644 tests/test_basic.py create mode 100644 xbc/__init__.py create mode 100644 xbc/utils.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..fba2add --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +.DS_Store +__pycache__/ +build/ +dist/ diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..06c72e0 --- /dev/null +++ b/LICENSE @@ -0,0 +1,20 @@ +Copyright (c) 2024 Síle Ekaterin Liszka + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be included +in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..7887594 --- /dev/null +++ b/README.md @@ -0,0 +1,72 @@ +# py-xbc + +`py-xbc` is a pure-Python library for reading and writing files in the +eXtra BootConfig (XBC) file format specified by the Linux kernel. This +is not a strictly-conforming implementation: in particular, this +implementation does not enforce the 32,767-byte ceiling on XBC files, +nor does it enforce the 16-level cap on keys and blocks. + +# Requirements + +`py-xbc` currently requires `pyparsing` and Python 3.3+. + +# Usage + +`py-xbc` exports four functions: + +- `loads_xbc` parses a string. +- `load_xbc` opens a file and then parses a string. +- `saves_xbc` renders to a string. +- `save_xbc` renders to a string and writes the string to a file. + +## Format + +XBC files consist of a series of statements, of which there are three +kinds: + +- A key is a sequence of one or more bytes in the range `a-zA-Z0-9_-`. + They are namespaced with periods (`.`) and may be followed by an + equals sign (`=`). Key statements are terminated by a semicolon (`;`), + a linefeed, or a semicolon followed by a linefeed. + +- A key/value statement is a key followed by an operator, followed in + turn by one or more values. There are three operators: + + - Assignment (`=`) specifies an initial value. + - Updates (`:=`) overwrites whatever value was previously there. + - Appends (`+=`) appends one or more values. + + There are two kinds of values: strings and arrays. Strings can be + either 'bare' or quoted. + + - Bare strings are a sequence of one or more bytes that are not in the + range `{}#=+:;,\n'" `. + - Quoted strings are a sequence of bytes that begins with a single + quote (`'`) or a double quote (`"`) and ends only with the same + quote. Quotes cannot be escaped. + - Arrays are a sequence of one or more values delimited by a comma + (`,`). + +- A block is a key followed by a pair of curly braces, inside which is + one or more key or key/value statements. + +Keys are composable. The following examples are equivalent: + +```xbc +foo { + bar { + fluff = 1 + } +} +# is equivalent to +foo.bar.fluff = 1 +# is equivalent to +foo.bar { fluff = 1 } +# is equivalent to +foo { bar.fluff = 1 } +``` + +# Licence + +`py-xbc` is published under the MIT license. See `LICENSE` for more +information. diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..8ffe300 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,36 @@ +[project] +name = 'py-xbc' +version = '0.1.0' +authors = [ + { name = 'Síle Ekaterin Liszka', email = 'sheila@vulpine.house' } +] +description = 'A library for manipulating eXtra BootConfig (XBC) files' +readme = 'README.md' +keywords = ['bootconfig', 'xbc', 'configuration'] +dependencies = [ + 'pyparsing', + 'pytest' +] +requires-python = '>=3.7' +classifiers = [ + 'Development Status :: 3 - Alpha', + 'Intended Audience :: Developers', + 'License :: OSI Approved :: MIT License', + 'Operating System :: OS Independent', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8', + 'Programming Language :: Python :: 3.9', + 'Programming Language :: Python :: 3.10', + 'Programming Language :: Python :: 3.11', + 'Programming Language :: Python :: 3.12', + 'Topic :: Utilities' +] + +[project.urls] +Homepage = 'https://gitea.treehouse.systems/VulpineAmethyst/py-xbc' +Issues = 'https://gitea.treehouse.systems/VulpineAmethyst/py-xbc/issues' + +[build-system] +requires = ["setuptools>=61.0"] +build-backend = "setuptools.build_meta" diff --git a/tests/01-keyvalue.xbc b/tests/01-keyvalue.xbc new file mode 100644 index 0000000..a76b551 --- /dev/null +++ b/tests/01-keyvalue.xbc @@ -0,0 +1,2 @@ +feature.option.foo = 1 +feature.option.bar = 2 diff --git a/tests/02-block-keyvalue.xbc b/tests/02-block-keyvalue.xbc new file mode 100644 index 0000000..a594066 --- /dev/null +++ b/tests/02-block-keyvalue.xbc @@ -0,0 +1,4 @@ +feature.option { + foo = 1 + bar = 2 +} \ No newline at end of file diff --git a/tests/03-keyvalue-string.xbc b/tests/03-keyvalue-string.xbc new file mode 100644 index 0000000..3439539 --- /dev/null +++ b/tests/03-keyvalue-string.xbc @@ -0,0 +1 @@ +feature.options = "foo", "bar" diff --git a/tests/10-compact.xbc b/tests/10-compact.xbc new file mode 100644 index 0000000..c9046d1 --- /dev/null +++ b/tests/10-compact.xbc @@ -0,0 +1 @@ +feature.option{foo=1;bar=2} \ No newline at end of file diff --git a/tests/11-config.xbc b/tests/11-config.xbc new file mode 100644 index 0000000..2b0966f --- /dev/null +++ b/tests/11-config.xbc @@ -0,0 +1,21 @@ +ftrace.event { + task.task_newtask { + filter = "pid < 128" + enable + } + kprobes.vfs_read { + probes = "vfs_read $arg1 $arg2" + filter = "common_pid < 200" + enable + } + synthetic.initcall_latency { + fields = "unsigned long func", "u64 lat" + actions = "hist:keys=func.sym,lat:vals=lat:sort=lat" + } + initcall.initcall_start { + actions = "hist:keys=func:ts0=common_timestamp.usecs" + } + initcall.initcall_finish { + actions = "hist:keys=func:lat=common_timestamp.usecs-$ts0:onmatch(initcall.initcall_start).initcall_latency(func,$lat)" + } +} \ No newline at end of file diff --git a/tests/12-config2.xbc b/tests/12-config2.xbc new file mode 100644 index 0000000..aaa6ffa --- /dev/null +++ b/tests/12-config2.xbc @@ -0,0 +1,19 @@ +ftrace.event.synthetic.initcall_latency { + fields = "unsigned long func", "u64 lat" + hist { + from { + event = initcall.initcall_start + key = func + assigns = "ts0=common_timestamp.usecs" + } + to { + event = initcall.initcall_finish + key = func + assigns = "lat=common_timestamp.usecs-$ts0" + onmatch = func, $lat + } + keys = func.sym, lat + vals = lat + sort = lat + } +} \ No newline at end of file diff --git a/tests/40-invalid1.xbc b/tests/40-invalid1.xbc new file mode 100644 index 0000000..26d5379 --- /dev/null +++ b/tests/40-invalid1.xbc @@ -0,0 +1,5 @@ +foo { + bar = 1 + foo = 2 +} +foo = 4 \ No newline at end of file diff --git a/tests/test_basic.py b/tests/test_basic.py new file mode 100644 index 0000000..1d9ee50 --- /dev/null +++ b/tests/test_basic.py @@ -0,0 +1,37 @@ + +import pytest + +from xbc import loads_xbc, ParseError + +def test_key(): + assert loads_xbc('a') == {'a': True} + +def test_keyvalue(): + assert loads_xbc('a = 1') == {'a': '1'} + +def test_keys(): + assert loads_xbc('a;b') == {'a': True, 'b': True} + +def test_string(): + assert loads_xbc('a = "b"') == {'a': '"b"'} + +def test_array(): + assert loads_xbc('a = 1, 2') == {'a': ['1', '2']} + +def test_block(): + assert loads_xbc('a { a = 1 }') == {'a': False, 'a.a': '1'} + +def test_block2(): + assert loads_xbc('a = 1\na { a = 1 }') == {'a': '1', 'a.a': '1'} + +def test_reassignment(): + with pytest.raises(ParseError): + loads_xbc('a = 1\na = 2') + +def test_ovewrite_nonexistent(): + with pytest.raises(ParseError): + loads_xbc('a := 1') + +def test_assign_after_block(): + with pytest.raises(ParseError): + loads_xbc('a { a = 1 }\na = 1') diff --git a/xbc/__init__.py b/xbc/__init__.py new file mode 100644 index 0000000..da8f0f0 --- /dev/null +++ b/xbc/__init__.py @@ -0,0 +1,330 @@ +# Copyright (c) 2024 Síle Ekaterin Liszka +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +""" +This module provides utilities for reading and writing files in the +Linux kernel's bootconfig format. This is not a strictly-conformant +implementation. In particular, this implementation does not restrict +key/block depths and makes no attempt whatsoever to ensure output is +under 32,767 bytes as mandated by the Linux kernel implementation. + +XBC has three types of configuration: + +- Keys are sequences of one or more characters in the range + `[a-zA-Z0-9_-]`, namespaced with dots. They can be specified without a + value, allowing their presence to be a boolean. +- Key/value pairs have a key component, an operator, and one or more + values. Values have three flavours: + - Bare values are sequences of one or more characters that are not in + the range `{}#=+:;,\\n'" `. + - Quoted strings are bounded by either single or double quotes, and + cannot contain the quote being bounded. Escaped quotes (`\\'` and + `\\"`) are not supported. + - Arrays are bare values or strings delimited by commas. +- Blocks have a key component and a sequence of keys, key/value pairs, + or blocks. Keys within blocks are not mapped separately; `a.a` is + identical to `a { a }`, for example. + +XBC supports single-line comments using the pound sign (`#`). +""" + +import re + +from collections.abc import Mapping, Sequence + +from pyparsing import ( + alphas, + CharsNotIn, + DelimitedList, + Forward, + Group, + nums, + Literal, + OneOrMore, + Optional, + Regex, + restOfLine, + QuotedString, + Word, + ZeroOrMore +) + +from .utils import normalise + +class Node: + def __init__(self, *args, type=None): + if isinstance(args[0], str): + self.args = args[0] + elif isinstance(args[0][0], str): + self.args = args[0][0] + else: + self.args = args[0][0][0] + self.type = type + + @property + def key(self): + return self.args[0] + +class Key(Node): + def __init__(self, *args): + self.args = args[0] + self.type = 'key' + +class KeyValue(Node): + def __init__(self, *args): + super().__init__(args, type='kv') + + @property + def op(self): + return self.args[1] + + @property + def value(self): + return self.args[2] + +class Block(Node): + def __init__(self, *args): + super().__init__(args, type='block') + + @property + def contents(self): + return self.args[1] + +key_fragment = Word(alphas + nums + '_-') +key = DelimitedList(key_fragment, delim='.', combine=True) + +bareval = CharsNotIn(' {}#=+:;,\n\'"') +strvals = QuotedString("'", multiline=True, unquote_results=False) +strvald = QuotedString('"', multiline=True, unquote_results=False) +value = bareval | strvald | strvals + +assign = Literal('=') +update = Literal(':=') +append = Literal('+=') +op = assign | update | append +semi = Literal(';').suppress() +lbrace = Literal('{').suppress() +rbrace = Literal('}').suppress() + +NL = Literal('\n').suppress() +WS = Word(' \t').suppress() +WS_NL = Word(' \t\n').suppress() +comment = Literal('#') + restOfLine + +values = Group(value + ZeroOrMore(Literal(',').suppress() + Optional(WS_NL) + value), aslist=True) + +keyvalue = Group(key + Optional(WS) + op + Optional(WS) + values, aslist=True) +keyvalue.set_parse_action(lambda x: KeyValue(x)) + +key_stmt = Optional(WS) + key + Optional(assign).suppress() + Optional(WS) +key_stmt.set_parse_action(lambda x: Key(x)) +kv_stmt = Optional(WS) + keyvalue + Optional(WS) + +block = Forward() +statement = kv_stmt | key_stmt +term_stmt = kv_stmt | key_stmt + Optional(semi) + Optional(WS) + +line_statements = Group(term_stmt + ZeroOrMore(NL + term_stmt)) +term_stmt_list = Group(statement + ZeroOrMore(semi + statement)) + +segment = OneOrMore(block | term_stmt_list | line_statements) + +block << Group(key + Optional(WS) + lbrace + Group(segment, aslist=True) + rbrace + Optional(NL)) +block.set_parse_action(lambda x: Block(x)) + +data = OneOrMore(block | (term_stmt + Optional(NL))) + +XBCParser = data + +XBCParser.ignore(comment) + +class ParseError(Exception): + pass + +def lex(data): + tree = XBCParser.parseString(data).asList() + return tree + +def parse_block(key, seq): + if isinstance(seq, list) and len(seq) == 1 and isinstance(seq[0], list): + seq = seq[0] + + ret = {} + + for item in seq: + if key is not None: + k = f'{key}.{item.key}' + else: + k = item.key + + if isinstance(item, Key): + if k not in ret: + ret[k] = True + else: + raise ParseError(f'key {k} already defined') + elif isinstance(item, KeyValue): + value = item.value + op = item.op + + if op == '=': + if k in ret: + raise ParseError(f'key {k} already defined') + assign = value + else: + if k not in ret: + raise ParseError(f'key {k} not defined') + + if op == '+=': + if isinstance(ret[k], str): + assign = [ret[k]] + else: + assign = ret[k] + if isinstance(value, str): + assign.append(value) + else: + assign.extend(value) + else: + assign = value + + if isinstance(assign, list) and len(assign) == 1: + assign = assign[0] + + ret[k] = assign + elif isinstance(item, Block): + value = item.contents + + if k not in ret: + ret[k] = False + + ret.update(parse_block(k, value)) + + return ret + +def parse(data): + tree = lex(data) + + d = parse_block(None, tree) + + return d + +def loads_xbc(data): + return parse(data) + +def load_xbc(fp): + with open(fp, mode='r') as f: + return loads_xbc(f.read()) + +def longest_key(L): + lens = [len(x) for x in L] + shortest = min(lens) + + if shortest < 1: + return None + + ret = [] + + for i in range(shortest): + count = {} + + for item in L: + j = item[i] + + if j not in count: + count[j] = 0 + + count[j] += 1 + if len(count.keys()) == 1: + ret.append(L[0][i]) + else: + return '.'.join(ret) + return None + +def longest_keys(keys): + keys = [k.split('.') for k in keys] + ret = set() + + for i in range(len(keys)): + for j in range(1, len(keys)): + longest = longest_key([keys[i], keys[j]]) + if longest is not None: + ret.add(longest) + ret.discard('') + return ret + +def make_block(data): + ret = [] + + leafs = [] + blocks = set() + block_keys = [] + + for key in data.keys(): + if '.' not in key: + leafs.append(key) + else: + k, rest = key.split('.', maxsplit=1) + blocks.add(k) + + keys = [k for k in data.keys() if '.' in k] + temp = longest_keys(keys) + if len(temp): + mindots = 99 + for i in temp: + if 0 < i.count('.') < mindots: + mindots = i.count('.') + temp = [i for i in temp if i.count('.') == mindots] + blocks = set(temp) + + for key in leafs: + if data[key] is True: + ret.append(f'{key}') + elif data[key] is False: + continue + else: + value = normalise(data[key]) + ret.append(f'{key} = {value}') + + for key in blocks: + block = {} + klen = len(key) + 1 + + for k, v in data.items(): + if not k.startswith(f'{key}.'): + continue + block[k[klen:]] = v + + chunk = make_block(block) + ret.append(key + ' {') + for line in chunk: + ret.append(f'\t{line}') + ret.append('}') + + return ret + +def saves_xbc(data): + ret = make_block(data) + return '\n'.join(ret) + +def save_xbc(data, filename): + with open(filename, mode='w') as f: + f.write(saves_xbc(data)) + +__all__ = ['loads_xbc', 'load_xbc', 'saves_xbc', 'save_xbc', 'ParseError'] diff --git a/xbc/utils.py b/xbc/utils.py new file mode 100644 index 0000000..fcbaccf --- /dev/null +++ b/xbc/utils.py @@ -0,0 +1,119 @@ +# Copyright (c) 2024 Síle Ekaterin Liszka +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +import re + +from collections.abc import Sequence, Mapping + +KEY_RE = re.compile(r'^[a-zA-Z0-9_-]+(?:\.(?:[a-zA-Z0-9_-]+))*$') +NVAL_RE = re.compile(r'^[^{}=+:;,\n\'"]+$') +quotes = '\'"' +other = {'"': "'", "'": '"'} +escapes = { + 'backslash': {'"': '\\x22', "'": '\\x27'}, + 'html': {'"': '"', "'": '''}, + 'url': {'"': '%22', "'": '%27'} +} + +def quote(data, escape='backslash'): + esc = None + + # how shall we escape embedded quotes? + if isinstance(esc, Mapping): + if '"' in escape and "'" in escape: + esc = escape + elif escape in escapes: + esc = escapes[escape] + + if esc is None: + raise ValueError('unrecognised escape format') + + f = data[0] + + # is this a quoted string? + if f in quotes and data[-1] == f: + # return it if we don't need to do anything + if f not in data[1:-1]: + return data + else: + # escape embedded quotes + x = data[1:-1].replace(f, esc[f]) + return f'{f}{x}{f}' + else: + # if the other quote isn't used, wrap in it + if f in quotes and other[f] not in data[1:]: + q = other[f] + return f'{q}{data}{q}' + # not a quoted string, but has only one kind of quote + elif "'" in data and '"' not in data: + return f'"{data}"' + elif '"' in data and "'" not in data: + return f"'{data}'" + # not a quoted string and has both types; we escape one + else: + data = data.replace("'", esc["'"]) + return f"'{data}" + +def normalise_string(string): + if not isinstance(string, str): + string = str(string) + + if NVAL_RE.match(string) is None: + string = quote(string) + + return string + +def normalise(data): + if isinstance(data, str) or not isinstance(data, (Sequence, Mapping)): + return normalise_string(data) + elif isinstance(data, Sequence): + L = [] + + for item in data: + if isinstance(item, str) or not isinstance(item, (Sequence, Mapping)): + L.append(normalise_string(item)) + # we can unwind nested sequences + elif isinstance(item, Sequence): + L.extend(normalise(item)) + # ...but we can't do that with mappings, the format doesn't + # support it. + elif isinstance(item, Mapping): + raise ValueError('nested mapping') + else: + raise ValueError(type(value)) + L = ', '.join(L) + return L + elif isinstance(data, Mapping): + d = {} + for k, v in data.items(): + if Key.pattern.match(k) is None: + raise KeyError(k) + else: + k = Key(k) + + v = normalise(v) + + d[k] = v + return d + + return data + +__all__ = ['quote', 'normalise', 'normalise_string']