From 0be93820f344fafc8618febde149e9c63d1ba5d4 Mon Sep 17 00:00:00 2001 From: Thibaut Girka Date: Sun, 12 May 2019 22:13:36 +0200 Subject: [PATCH] Fix autolinking, and newlines in code blocks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Autolinking is now performed *after* the Markdown pass, by replacing HTML tags with zero-width spaces and running the twitter-text extractor as usual, except it does not auto-link URLs to avoid links in links… --- app/lib/formatter.rb | 65 ++++++++++++++++++++++++++++++++++---------- 1 file changed, 50 insertions(+), 15 deletions(-) diff --git a/app/lib/formatter.rb b/app/lib/formatter.rb index eaece879709..2c509ef19fc 100644 --- a/app/lib/formatter.rb +++ b/app/lib/formatter.rb @@ -3,6 +3,17 @@ require 'singleton' require_relative './sanitize_config' +class HTMLRenderer < Redcarpet::Render::HTML + def block_code(code, language) + "
#{code.gsub("\n", "
")}
" + end + + def autolink(link, link_type) + return link if link_type == :email + Formatter.instance.link_url(link) + end +end + class Formatter include Singleton include RoutingHelper @@ -39,15 +50,18 @@ class Formatter html = format_markdown(html) if status.content_type == 'text/markdown' html = encode_and_link_urls(html, linkable_accounts, keep_html: %w(text/markdown text/html).include?(status.content_type)) html = encode_custom_emojis(html, status.emojis, options[:autoplay]) if options[:custom_emojify] - html = simple_format(html, {}, sanitize: false) unless %w(text/markdown text/html).include?(status.content_type) - html = html.delete("\n") + + unless %w(text/markdown text/html).include?(status.content_type) + html = simple_format(html, {}, sanitize: false) + html = html.delete("\n") + end html.html_safe # rubocop:disable Rails/OutputSafety end def format_markdown(html) extensions = { - autolink: false, + autolink: true, no_intra_emphasis: true, fenced_code_blocks: true, disable_indented_code_blocks: true, @@ -57,11 +71,12 @@ class Formatter superscript: true, underline: true, highlight: true, - footnotes: true + footnotes: false, } - renderer = Redcarpet::Render::HTML.new({ + renderer = HTMLRenderer.new({ filter_html: false, + escape_html: false, no_images: true, no_styles: true, safe_links_only: true, @@ -72,14 +87,7 @@ class Formatter markdown = Redcarpet::Markdown.new(renderer, extensions) html = reformat(markdown.render(html)) - html = html.gsub("\r\n", "\n").gsub("\r", "\n") - code_safe_strip(html) - end - - def code_safe_strip(html, char="\n") - html = html.split(/(].*?\/code>)/m) - html.each_slice(2) { |part| part[0].delete!(char) } - html.join + html.delete("\r").delete("\n") end def reformat(html) @@ -136,6 +144,10 @@ class Formatter html.html_safe # rubocop:disable Rails/OutputSafety end + def link_url(url) + "#{link_html(url)}" + end + private def html_entities @@ -147,13 +159,13 @@ class Formatter end def encode_and_link_urls(html, accounts = nil, options = {}) - entities = utf8_friendly_extractor(html, extract_url_without_protocol: false) - if accounts.is_a?(Hash) options = accounts accounts = nil end + entities = options[:keep_html] ? html_friendly_extractor(html) : utf8_friendly_extractor(html, extract_url_without_protocol: false) + rewrite(html.dup, entities, options[:keep_html]) do |entity| if entity[:url] link_to_url(entity, options) @@ -285,6 +297,29 @@ class Formatter Extractor.remove_overlapping_entities(special + standard) end + def html_friendly_extractor(html, options = {}) + gaps = [] + total_offset = 0 + + escaped = html.gsub(/<[^>]*>/) do |match| + total_offset += match.length - 1 + end_offset = Regexp.last_match.end(0) + gaps << [end_offset - total_offset, total_offset] + "\u200b" + end + + entities = Extractor.extract_hashtags_with_indices(escaped, :check_url_overlap => false) + + Extractor.extract_mentions_or_lists_with_indices(escaped) + Extractor.remove_overlapping_entities(entities).map do |extract| + pos = extract[:indices].first + offset_idx = gaps.rindex { |gap| gap.first <= pos } + offset = offset_idx.nil? ? 0 : gaps[offset_idx].last + next extract.merge( + :indices => [extract[:indices].first + offset, extract[:indices].last + offset] + ) + end + end + def link_to_url(entity, options = {}) url = Addressable::URI.parse(entity[:url]) html_attrs = { target: '_blank', rel: 'nofollow noopener' }