120 lines
3.4 KiB
Python
120 lines
3.4 KiB
Python
# Copyright (c) 2024 Síle Ekaterin Liszka
|
|
#
|
|
# Permission is hereby granted, free of charge, to any person obtaining
|
|
# a copy of this software and associated documentation files (the
|
|
# "Software"), to deal in the Software without restriction, including
|
|
# without limitation the rights to use, copy, modify, merge, publish,
|
|
# distribute, sublicense, and/or sell copies of the Software, and to
|
|
# permit persons to whom the Software is furnished to do so, subject to
|
|
# the following conditions:
|
|
#
|
|
# The above copyright notice and this permission notice shall be
|
|
# included in all copies or substantial portions of the Software.
|
|
#
|
|
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
|
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
|
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
|
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
|
|
import re
|
|
|
|
from collections.abc import Sequence, Mapping
|
|
|
|
KEY_RE = re.compile(r'^[a-zA-Z0-9_-]+(?:\.(?:[a-zA-Z0-9_-]+))*$')
|
|
NVAL_RE = re.compile(r'^[^{}=+:;,\n\'"]+$')
|
|
quotes = '\'"'
|
|
other = {'"': "'", "'": '"'}
|
|
escapes = {
|
|
'backslash': {'"': '\\x22', "'": '\\x27'},
|
|
'html': {'"': '"', "'": '''},
|
|
'url': {'"': '%22', "'": '%27'}
|
|
}
|
|
|
|
def quote(data, escape='backslash'):
|
|
esc = None
|
|
|
|
# how shall we escape embedded quotes?
|
|
if isinstance(esc, Mapping):
|
|
if '"' in escape and "'" in escape:
|
|
esc = escape
|
|
elif escape in escapes:
|
|
esc = escapes[escape]
|
|
|
|
if esc is None:
|
|
raise ValueError('unrecognised escape format')
|
|
|
|
f = data[0]
|
|
|
|
# is this a quoted string?
|
|
if f in quotes and data[-1] == f:
|
|
# return it if we don't need to do anything
|
|
if f not in data[1:-1]:
|
|
return data
|
|
else:
|
|
# escape embedded quotes
|
|
x = data[1:-1].replace(f, esc[f])
|
|
return f'{f}{x}{f}'
|
|
else:
|
|
# if the other quote isn't used, wrap in it
|
|
if f in quotes and other[f] not in data[1:]:
|
|
q = other[f]
|
|
return f'{q}{data}{q}'
|
|
# not a quoted string, but has only one kind of quote
|
|
elif "'" in data and '"' not in data:
|
|
return f'"{data}"'
|
|
elif '"' in data and "'" not in data:
|
|
return f"'{data}'"
|
|
# not a quoted string and has both types; we escape one
|
|
else:
|
|
data = data.replace("'", esc["'"])
|
|
return f"'{data}"
|
|
|
|
def normalise_string(string):
|
|
if not isinstance(string, str):
|
|
string = str(string)
|
|
|
|
if NVAL_RE.match(string) is None:
|
|
string = quote(string)
|
|
|
|
return string
|
|
|
|
def normalise(data):
|
|
if isinstance(data, str) or not isinstance(data, (Sequence, Mapping)):
|
|
return normalise_string(data)
|
|
elif isinstance(data, Sequence):
|
|
L = []
|
|
|
|
for item in data:
|
|
if isinstance(item, str) or not isinstance(item, (Sequence, Mapping)):
|
|
L.append(normalise_string(item))
|
|
# we can unwind nested sequences
|
|
elif isinstance(item, Sequence):
|
|
L.extend(normalise(item))
|
|
# ...but we can't do that with mappings, the format doesn't
|
|
# support it.
|
|
elif isinstance(item, Mapping):
|
|
raise ValueError('nested mapping')
|
|
else:
|
|
raise ValueError(type(value))
|
|
L = ', '.join(L)
|
|
return L
|
|
elif isinstance(data, Mapping):
|
|
d = {}
|
|
for k, v in data.items():
|
|
if Key.pattern.match(k) is None:
|
|
raise KeyError(k)
|
|
else:
|
|
k = Key(k)
|
|
|
|
v = normalise(v)
|
|
|
|
d[k] = v
|
|
return d
|
|
|
|
return data
|
|
|
|
__all__ = ['quote', 'normalise', 'normalise_string']
|