# frozen_string_literal: true

require 'singleton'

#  See also `app/javascript/features/account/util/bio_metadata.js`.

class FrontmatterHandler
  include Singleton

  #  CONVENIENCE FUNCTIONS  #

  def self.unirex(str)
    Regexp.new str, Regexp::MULTILINE, 'u'
  end
  def self.rexstr(exp)
    '(?:' + exp.source + ')'
  end

  #  CHARACTER CLASSES  #

  DOCUMENT_START    = /^/
  DOCUMENT_END      = /$/
  ALLOWED_CHAR      =  #  c-printable` in the YAML 1.2 spec.
    /[\t\n\r\u{20}-\u{7e}\u{85}\u{a0}-\u{d7ff}\u{e000}-\u{fffd}\u{10000}-\u{10ffff}]/u
  WHITE_SPACE       = /[ \t]/
  INDENTATION       = / */
  LINE_BREAK        = /\r?\n|\r|<br\s*\/?>/
  ESCAPE_CHAR       = /[0abt\tnvfre "\/\\N_LP]/
  HEXADECIMAL_CHARS = /[0-9a-fA-F]/
  INDICATOR         = /[-?:,\[\]{}&#*!|>'"%@`]/
  FLOW_CHAR         = /[,\[\]{}]/

  #  NEGATED CHARACTER CLASSES  #

  NOT_WHITE_SPACE   = unirex '(?!' + rexstr(WHITE_SPACE) + ').'
  NOT_LINE_BREAK    = unirex '(?!' + rexstr(LINE_BREAK) + ').'
  NOT_INDICATOR     = unirex '(?!' + rexstr(INDICATOR) + ').'
  NOT_FLOW_CHAR     = unirex '(?!' + rexstr(FLOW_CHAR) + ').'
  NOT_ALLOWED_CHAR  = unirex '(?!' + rexstr(ALLOWED_CHAR) + ').'

  #  BASIC CONSTRUCTS  #

  ANY_WHITE_SPACE   = unirex rexstr(WHITE_SPACE) + '*'
  ANY_ALLOWED_CHARS = unirex rexstr(ALLOWED_CHAR) + '*'
  NEW_LINE          = unirex(
    rexstr(ANY_WHITE_SPACE) + rexstr(LINE_BREAK)
  )
  SOME_NEW_LINES    = unirex(
    '(?:' + rexstr(ANY_WHITE_SPACE) + rexstr(LINE_BREAK) + ')+'
  )
  POSSIBLE_STARTS   = unirex(
    rexstr(DOCUMENT_START) + rexstr(/<p[^<>]*>/) + '?'
  )
  POSSIBLE_ENDS     = unirex(
    rexstr(SOME_NEW_LINES) + '|' +
    rexstr(DOCUMENT_END) + '|' +
    rexstr(/<\/p>/)
  )
  CHARACTER_ESCAPE  = unirex(
    rexstr(/\\/) +
    '(?:' +
      rexstr(ESCAPE_CHAR) + '|' +
      rexstr(/x/) + rexstr(HEXADECIMAL_CHARS) + '{2}' + '|' +
      rexstr(/u/) + rexstr(HEXADECIMAL_CHARS) + '{4}' + '|' +
      rexstr(/U/) + rexstr(HEXADECIMAL_CHARS) + '{8}' +
    ')'
  )
  ESCAPED_CHAR      = unirex(
    rexstr(/(?!["\\])/) + rexstr(NOT_LINE_BREAK) + '|' +
    rexstr(CHARACTER_ESCAPE)
  )
  ANY_ESCAPED_CHARS = unirex(
    rexstr(ESCAPED_CHAR) + '*'
  )
  ESCAPED_APOS      = unirex(
    '(?=' + rexstr(NOT_LINE_BREAK) + ')' + rexstr(/[^']|''/)
  )
  ANY_ESCAPED_APOS  = unirex(
    rexstr(ESCAPED_APOS) + '*'
  )
  FIRST_KEY_CHAR    = unirex(
    '(?=' + rexstr(NOT_LINE_BREAK) + ')' +
    '(?=' + rexstr(NOT_WHITE_SPACE) + ')' +
    rexstr(NOT_INDICATOR) + '|' +
    rexstr(/[?:-]/) +
    '(?=' + rexstr(NOT_LINE_BREAK) + ')' +
    '(?=' + rexstr(NOT_WHITE_SPACE) + ')' +
    '(?=' + rexstr(NOT_FLOW_CHAR) + ')'
  )
  FIRST_VALUE_CHAR  = unirex(
    '(?=' + rexstr(NOT_LINE_BREAK) + ')' +
    '(?=' + rexstr(NOT_WHITE_SPACE) + ')' +
    rexstr(NOT_INDICATOR) + '|' +
    rexstr(/[?:-]/) +
    '(?=' + rexstr(NOT_LINE_BREAK) + ')' +
    '(?=' + rexstr(NOT_WHITE_SPACE) + ')'
    #  Flow indicators are allowed in values.
  )
  LATER_KEY_CHAR    = unirex(
    rexstr(WHITE_SPACE) + '|' +
    '(?=' + rexstr(NOT_LINE_BREAK) + ')' +
    '(?=' + rexstr(NOT_WHITE_SPACE) + ')' +
    '(?=' + rexstr(NOT_FLOW_CHAR) + ')' +
    rexstr(/[^:#]#?/) + '|' +
    rexstr(/:/) + '(?=' + rexstr(NOT_WHITE_SPACE) + ')'
  )
  LATER_VALUE_CHAR  = unirex(
    rexstr(WHITE_SPACE) + '|' +
    '(?=' + rexstr(NOT_LINE_BREAK) + ')' +
    '(?=' + rexstr(NOT_WHITE_SPACE) + ')' +
    #  Flow indicators are allowed in values.
    rexstr(/[^:#]#?/) + '|' +
    rexstr(/:/) + '(?=' + rexstr(NOT_WHITE_SPACE) + ')'
  )

  #  YAML CONSTRUCTS  #

  YAML_START        = unirex(
    rexstr(ANY_WHITE_SPACE) + rexstr(/---/)
  )
  YAML_END          = unirex(
    rexstr(ANY_WHITE_SPACE) + rexstr(/(?:---|\.\.\.)/)
  )
  YAML_LOOKAHEAD    = unirex(
    '(?=' +
      rexstr(YAML_START) +
      rexstr(ANY_ALLOWED_CHARS) + rexstr(NEW_LINE) +
      rexstr(YAML_END) + rexstr(POSSIBLE_ENDS) +
    ')'
  )
  YAML_DOUBLE_QUOTE = unirex(
    rexstr(/"/) + rexstr(ANY_ESCAPED_CHARS) + rexstr(/"/)
  )
  YAML_SINGLE_QUOTE = unirex(
    rexstr(/'/) + rexstr(ANY_ESCAPED_APOS) + rexstr(/'/)
  )
  YAML_SIMPLE_KEY   = unirex(
    rexstr(FIRST_KEY_CHAR) + rexstr(LATER_KEY_CHAR) + '*'
  )
  YAML_SIMPLE_VALUE = unirex(
    rexstr(FIRST_VALUE_CHAR) + rexstr(LATER_VALUE_CHAR) + '*'
  )
  YAML_KEY          = unirex(
    rexstr(YAML_DOUBLE_QUOTE) + '|' +
    rexstr(YAML_SINGLE_QUOTE) + '|' +
    rexstr(YAML_SIMPLE_KEY)
  )
  YAML_VALUE        = unirex(
    rexstr(YAML_DOUBLE_QUOTE) + '|' +
    rexstr(YAML_SINGLE_QUOTE) + '|' +
    rexstr(YAML_SIMPLE_VALUE)
  )
  YAML_SEPARATOR    = unirex(
    rexstr(ANY_WHITE_SPACE) +
    ':' + rexstr(WHITE_SPACE) +
    rexstr(ANY_WHITE_SPACE)
  )
  YAML_LINE         = unirex(
    '(' + rexstr(YAML_KEY) + ')' +
    rexstr(YAML_SEPARATOR) +
    '(' + rexstr(YAML_VALUE) + ')'
  )

  #  FRONTMATTER REGEX  #

  YAML_FRONTMATTER  = unirex(
    rexstr(POSSIBLE_STARTS) +
    rexstr(YAML_LOOKAHEAD) +
    rexstr(YAML_START) + rexstr(SOME_NEW_LINES) +
    '(?:' +
      '(' + rexstr(INDENTATION) + ')' +
      rexstr(YAML_LINE) + rexstr(SOME_NEW_LINES) +
      '(?:' +
        '\\1' + rexstr(YAML_LINE) + rexstr(SOME_NEW_LINES) +
      '){0,4}' +
    ')?' +
    rexstr(YAML_END) + rexstr(POSSIBLE_ENDS)
  )

  #  SEARCHES  #

  FIND_YAML_LINES   = unirex(
    rexstr(NEW_LINE) + rexstr(INDENTATION) + rexstr(YAML_LINE)
  )

  #  STRING PROCESSING  #

  def process_string(str)
    case str[0]
    when '"'
      str[1..-2]
        .gsub(/\\0/, "\u{00}")
        .gsub(/\\a/, "\u{07}")
        .gsub(/\\b/, "\u{08}")
        .gsub(/\\t/, "\u{09}")
        .gsub(/\\\u{09}/, "\u{09}")
        .gsub(/\\n/, "\u{0a}")
        .gsub(/\\v/, "\u{0b}")
        .gsub(/\\f/, "\u{0c}")
        .gsub(/\\r/, "\u{0d}")
        .gsub(/\\e/, "\u{1b}")
        .gsub(/\\ /, "\u{20}")
        .gsub(/\\"/, "\u{22}")
        .gsub(/\\\//, "\u{2f}")
        .gsub(/\\\\/, "\u{5c}")
        .gsub(/\\N/, "\u{85}")
        .gsub(/\\_/, "\u{a0}")
        .gsub(/\\L/, "\u{2028}")
        .gsub(/\\P/, "\u{2029}")
        .gsub(/\\x([0-9a-fA-F]{2})/mu) {|s| $1.to_i.chr Encoding::UTF_8}
        .gsub(/\\u([0-9a-fA-F]{4})/mu) {|s| $1.to_i.chr Encoding::UTF_8}
        .gsub(/\\U([0-9a-fA-F]{8})/mu) {|s| $1.to_i.chr Encoding::UTF_8}
    when "'"
      str[1..-2].gsub(/''/, "'")
    else
      str
    end
  end

  #  BIO PROCESSING  #

  def process_bio content
    result = {
      text: content.gsub(/&quot;/, '"').gsub(/&apos;/, "'"),
      metadata: []
    }
    yaml = YAML_FRONTMATTER.match(result[:text])
    return result unless yaml
    yaml = yaml[0]
    start = YAML_START =~ result[:text]
    ending = start + yaml.length - (YAML_START =~ yaml)
    result[:text][start..ending - 1] = ''
    metadata = nil
    index = 0
    while metadata = FIND_YAML_LINES.match(yaml, index) do
      index = metadata.end(0)
      result[:metadata].push [
        process_string(metadata[1]), process_string(metadata[2])
      ]
    end
    return result
  end

end