Correctly sanitize MathML out of post content (#27107)
parent
48f3ed738f
commit
7f4858b339
|
@ -64,6 +64,44 @@ class Sanitize
|
||||||
current_node.wrap('<p></p>')
|
current_node.wrap('<p></p>')
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# We assume that incomming <math> nodes are of the form
|
||||||
|
# <math><semantics>...<annotation>...</annotation></semantics></math>
|
||||||
|
# according to the [FEP]. We try to grab the most relevant plain-text
|
||||||
|
# annotation from the semantics node, and use it to display a representation
|
||||||
|
# of the mathematics.
|
||||||
|
#
|
||||||
|
# FEP: https://codeberg.org/fediverse/fep/src/branch/main/fep/dc88/fep-dc88.md
|
||||||
|
MATH_TRANSFORMER = lambda do |env|
|
||||||
|
math = env[:node]
|
||||||
|
return if env[:is_allowlisted]
|
||||||
|
return unless math.element? && env[:node_name] == 'math'
|
||||||
|
|
||||||
|
semantics = math.element_children[0]
|
||||||
|
return if semantics.nil? || semantics.name != 'semantics'
|
||||||
|
|
||||||
|
# next, we find the plain-text description
|
||||||
|
is_annotation_with_encoding = lambda do |encoding, node|
|
||||||
|
return false unless node.name == 'annotation'
|
||||||
|
|
||||||
|
node.attributes['encoding'].value == encoding
|
||||||
|
end
|
||||||
|
|
||||||
|
annotation = semantics.children.find(&is_annotation_with_encoding.curry['application/x-tex'])
|
||||||
|
if annotation
|
||||||
|
text = if math.attributes['display']&.value == 'block'
|
||||||
|
"$$#{annotation.text}$$"
|
||||||
|
else
|
||||||
|
"$#{annotation.text}$"
|
||||||
|
end
|
||||||
|
math.replace(math.document.create_text_node(text))
|
||||||
|
return
|
||||||
|
end
|
||||||
|
# Don't bother surrounding 'text/plain' annotations with dollar signs,
|
||||||
|
# since it isn't LaTeX
|
||||||
|
annotation = semantics.children.find(&is_annotation_with_encoding.curry['text/plain'])
|
||||||
|
math.replace(math.document.create_text_node(annotation.text)) unless annotation.nil?
|
||||||
|
end
|
||||||
|
|
||||||
MASTODON_STRICT = freeze_config(
|
MASTODON_STRICT = freeze_config(
|
||||||
elements: %w(p br span a del s pre blockquote code b strong u i em ul ol li ruby rt rp),
|
elements: %w(p br span a del s pre blockquote code b strong u i em ul ol li ruby rt rp),
|
||||||
|
|
||||||
|
@ -86,6 +124,7 @@ class Sanitize
|
||||||
transformers: [
|
transformers: [
|
||||||
ALLOWED_CLASS_TRANSFORMER,
|
ALLOWED_CLASS_TRANSFORMER,
|
||||||
TRANSLATE_TRANSFORMER,
|
TRANSLATE_TRANSFORMER,
|
||||||
|
MATH_TRANSFORMER,
|
||||||
UNSUPPORTED_ELEMENTS_TRANSFORMER,
|
UNSUPPORTED_ELEMENTS_TRANSFORMER,
|
||||||
UNSUPPORTED_HREF_TRANSFORMER,
|
UNSUPPORTED_HREF_TRANSFORMER,
|
||||||
]
|
]
|
||||||
|
|
|
@ -57,5 +57,25 @@ RSpec.describe Sanitize::Config do
|
||||||
it 'keeps a with supported scheme and no host' do
|
it 'keeps a with supported scheme and no host' do
|
||||||
expect(Sanitize.fragment('<a href="dweb:/a/foo">Test</a>', subject)).to eq '<a href="dweb:/a/foo" rel="nofollow noopener noreferrer" target="_blank">Test</a>'
|
expect(Sanitize.fragment('<a href="dweb:/a/foo">Test</a>', subject)).to eq '<a href="dweb:/a/foo" rel="nofollow noopener noreferrer" target="_blank">Test</a>'
|
||||||
end
|
end
|
||||||
|
|
||||||
|
it 'sanitizes math to LaTeX' do
|
||||||
|
mathml = '<math><semantics><mrow><msup><mi>x</mi><mi>n</mi></msup><mo>+</mo><mi>y</mi></mrow><annotation encoding="application/x-tex">x^n+y</annotation></semantics></math>'
|
||||||
|
expect(Sanitize.fragment(mathml, subject)).to eq '$x^n+y$'
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'sanitizes math blocks to LaTeX' do
|
||||||
|
mathml = '<math display="block"><semantics><mrow><msup><mi>x</mi><mi>n</mi></msup><mo>+</mo><mi>y</mi></mrow><annotation encoding="application/x-tex">x^n+y</annotation></semantics></math>'
|
||||||
|
expect(Sanitize.fragment(mathml, subject)).to eq '$$x^n+y$$'
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'math sanitizer falls back to plaintext' do
|
||||||
|
mathml = '<math><semantics><msqrt><mi>x</mi></msqrt><annotation encoding="text/plain">sqrt(x)</annotation></semantics></math>'
|
||||||
|
expect(Sanitize.fragment(mathml, subject)).to eq 'sqrt(x)'
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'prefers latex' do
|
||||||
|
mathml = '<math><semantics><msqrt><mi>x</mi></msqrt><annotation encoding="text/plain">sqrt(x)</annotation><annotation encoding="application/x-tex">\\sqrt x</annotation></semantics></math>'
|
||||||
|
expect(Sanitize.fragment(mathml, subject)).to eq '$\sqrt x$'
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in New Issue