py-xbc/src/xbc/utils.py

# Copyright (c) 2024 Síle Ekaterin Liszka
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

import re

from collections.abc import Sequence, Mapping

KEY_RE  = re.compile(r'^[a-zA-Z0-9_-]+(?:\.(?:[a-zA-Z0-9_-]+))*$')
NVAL_RE = re.compile(r'^[^{}=+:;,\n\'"]+$')
quotes  = '\'"'
other   = {'"': "'", "'": '"'}
escapes = {
	'backslash': {'"': '\\x22', "'": '\\x27'},
	'html': {'"': '&quot;', "'": '&apos;'},
	'url': {'"': '%22', "'": '%27'}
}

def quote(data, escape='backslash'):
	esc = None

	# how shall we escape embedded quotes?
	if isinstance(esc, Mapping):
		if '"' in escape and "'" in escape:
			esc = escape
	elif escape in escapes:
		esc = escapes[escape]

	if esc is None:
		raise ValueError('unrecognised escape format')

	f = data[0]

	# is this a quoted string?
	if f in quotes and data[-1] == f:
		# return it if we don't need to do anything
		if f not in data[1:-1]:
			return data
		else:
			# escape embedded quotes
			x = data[1:-1].replace(f, esc[f])
			return f'{f}{x}{f}'
	else:
		# if the other quote isn't used, wrap in it
		if f in quotes and other[f] not in data[1:]:
			q = other[f]
			return f'{q}{data}{q}'
		# not a quoted string, but has only one kind of quote
		elif "'" in data and '"' not in data:
			return f'"{data}"'
		elif '"' in data and "'" not in data:
			return f"'{data}'"
		# not a quoted string and has both types; we escape one
		else:
			data = data.replace("'", esc["'"])
			return f"'{data}"

def normalise_string(string):
	if not isinstance(string, str):
		string = str(string)

	if NVAL_RE.match(string) is None:
		string = quote(string)

	return string

def normalise(data):
	if isinstance(data, str) or not isinstance(data, (Sequence, Mapping)):
		return normalise_string(data)
	elif isinstance(data, Sequence):
		L = []

		for item in data:
			if isinstance(item, str) or not isinstance(item, (Sequence, Mapping)):
				L.append(normalise_string(item))
			# we can unwind nested sequences
			elif isinstance(item, Sequence):
				L.extend(normalise(item))
			# ...but we can't do that with mappings, the format doesn't
			# support it.
			elif isinstance(item, Mapping):
				raise ValueError('nested mapping')
			else:
				raise ValueError(type(value))
		L = ', '.join(L)
		return L
	elif isinstance(data, Mapping):
		d = {}
		for k, v in data.items():
			if Key.pattern.match(k) is None:
				raise KeyError(k)
			else:
				k = Key(k)

			v = normalise(v)

			d[k] = v
		return d

	return data

__all__ = ['quote', 'normalise', 'normalise_string']
initial commit 2024-01-12 20:57:01 +00:00			`# Copyright (c) 2024 Síle Ekaterin Liszka`
			`#`
			`# Permission is hereby granted, free of charge, to any person obtaining`
			`# a copy of this software and associated documentation files (the`
			`# "Software"), to deal in the Software without restriction, including`
			`# without limitation the rights to use, copy, modify, merge, publish,`
			`# distribute, sublicense, and/or sell copies of the Software, and to`
			`# permit persons to whom the Software is furnished to do so, subject to`
			`# the following conditions:`
			`#`
			`# The above copyright notice and this permission notice shall be`
			`# included in all copies or substantial portions of the Software.`
			`#`
			`# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,`
			`# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF`
			`# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.`
			`# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY`
			`# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,`
			`# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE`
			`# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.`

			`import re`

			`from collections.abc import Sequence, Mapping`

			`KEY_RE = re.compile(r'^[a-zA-Z0-9_-]+(?:\.(?:[a-zA-Z0-9_-]+))*$')`
			`NVAL_RE = re.compile(r'^[^{}=+:;,\n\'"]+$')`
			`quotes = '\'"'`
			`other = {'"': "'", "'": '"'}`
			`escapes = {`
			`'backslash': {'"': '\\x22', "'": '\\x27'},`
			`'html': {'"': '"', "'": '''},`
			`'url': {'"': '%22', "'": '%27'}`
			`}`

			`def quote(data, escape='backslash'):`
			`esc = None`

			`# how shall we escape embedded quotes?`
			`if isinstance(esc, Mapping):`
			`if '"' in escape and "'" in escape:`
			`esc = escape`
			`elif escape in escapes:`
			`esc = escapes[escape]`

			`if esc is None:`
			`raise ValueError('unrecognised escape format')`

			`f = data[0]`

			`# is this a quoted string?`
			`if f in quotes and data[-1] == f:`
			`# return it if we don't need to do anything`
			`if f not in data[1:-1]:`
			`return data`
			`else:`
			`# escape embedded quotes`
			`x = data[1:-1].replace(f, esc[f])`
			`return f'{f}{x}{f}'`
			`else:`
			`# if the other quote isn't used, wrap in it`
			`if f in quotes and other[f] not in data[1:]:`
			`q = other[f]`
			`return f'{q}{data}{q}'`
			`# not a quoted string, but has only one kind of quote`
			`elif "'" in data and '"' not in data:`
			`return f'"{data}"'`
			`elif '"' in data and "'" not in data:`
			`return f"'{data}'"`
			`# not a quoted string and has both types; we escape one`
			`else:`
			`data = data.replace("'", esc["'"])`
			`return f"'{data}"`

			`def normalise_string(string):`
			`if not isinstance(string, str):`
			`string = str(string)`

			`if NVAL_RE.match(string) is None:`
			`string = quote(string)`

			`return string`

			`def normalise(data):`
			`if isinstance(data, str) or not isinstance(data, (Sequence, Mapping)):`
			`return normalise_string(data)`
			`elif isinstance(data, Sequence):`
			`L = []`

			`for item in data:`
			`if isinstance(item, str) or not isinstance(item, (Sequence, Mapping)):`
			`L.append(normalise_string(item))`
			`# we can unwind nested sequences`
			`elif isinstance(item, Sequence):`
			`L.extend(normalise(item))`
			`# ...but we can't do that with mappings, the format doesn't`
			`# support it.`
			`elif isinstance(item, Mapping):`
			`raise ValueError('nested mapping')`
			`else:`
			`raise ValueError(type(value))`
			`L = ', '.join(L)`
			`return L`
			`elif isinstance(data, Mapping):`
			`d = {}`
			`for k, v in data.items():`
			`if Key.pattern.match(k) is None:`
			`raise KeyError(k)`
			`else:`
			`k = Key(k)`

			`v = normalise(v)`

			`d[k] = v`
			`return d`

			`return data`

			`__all__ = ['quote', 'normalise', 'normalise_string']`