Change links in webUI to rewrite misleading links (#11426)
* [WiP] Show host for “misleading” links * Disallow misleading targets which domain names are prefixes of link text * Move decodeIDNA to app/javascript/mastodon/utils * Add support for international domain names * Change link origin tag color to darker text color * Handle links to domains starting with www. as shortened by Mastodon * [WiP] Ignore links that cannot be misread as URLs, rewrite other linksmain
parent
78144f4c79
commit
b31b232edf
|
@ -8,9 +8,71 @@ import classnames from 'classnames';
|
||||||
import PollContainer from 'mastodon/containers/poll_container';
|
import PollContainer from 'mastodon/containers/poll_container';
|
||||||
import Icon from 'mastodon/components/icon';
|
import Icon from 'mastodon/components/icon';
|
||||||
import { autoPlayGif } from 'mastodon/initial_state';
|
import { autoPlayGif } from 'mastodon/initial_state';
|
||||||
|
import { decode as decodeIDNA } from 'mastodon/utils/idna';
|
||||||
|
|
||||||
const MAX_HEIGHT = 642; // 20px * 32 (+ 2px padding at the top)
|
const MAX_HEIGHT = 642; // 20px * 32 (+ 2px padding at the top)
|
||||||
|
|
||||||
|
// Regex matching what "looks like a link", that is, something that starts with
|
||||||
|
// an optional "http://" or "https://" scheme and then what could look like a
|
||||||
|
// domain main, that is, at least two sequences of characters not including spaces
|
||||||
|
// and separated by "." or an homoglyph. The idea is not to match valid URLs or
|
||||||
|
// domain names, but what could be confused for a valid URL or domain name,
|
||||||
|
// especially to the untrained eye.
|
||||||
|
|
||||||
|
const h_confusables = 'h\u13c2\u1d58d\u1d4f1\u1d691\u0068\uff48\u1d525\u210e\u1d489\u1d629\u0570\u1d4bd\u1d65d\u1d421\u1d5c1\u1d5f5\u04bb\u1d559';
|
||||||
|
const t_confusables = 't\u1d42d\u1d5cd\u1d531\u1d565\u1d4c9\u1d669\u1d4fd\u1d69d\u0074\u1d461\u1d601\u1d495\u1d635\u1d599';
|
||||||
|
const p_confusables = 'p\u0440\u03c1\u1d52d\u1d631\u1d665\u1d429\uff50\u1d6e0\u1d45d\u1d561\u1d595\u1d71a\u1d699\u1d78e\u2ca3\u1d754\u1d6d2\u1d491\u1d7c8\u1d746\u1d4c5\u1d70c\u1d5c9\u0070\u1d780\u03f1\u1d5fd\u2374\u1d7ba\u1d4f9';
|
||||||
|
const s_confusables = 's\u1d530\u118c1\u1d494\u1d634\u1d4c8\u1d668\uabaa\u1d42c\u1d5cc\u1d460\u1d600\ua731\u0073\uff53\u1d564\u0455\u1d598\u1d4fc\u1d69c\u10448\u01bd';
|
||||||
|
const column_confusables = ':\u0903\u0a83\u0703\u1803\u05c3\u0704\u0589\u1809\ua789\u16ec\ufe30\u02d0\u2236\u02f8\u003a\uff1a\u205a\ua4fd';
|
||||||
|
const slash_confusables = '/\u2041\u2f03\u2044\u2cc6\u27cb\u30ce\u002f\u2571\u31d3\u3033\u1735\u2215\u29f8\u1d23a\u4e3f';
|
||||||
|
const dot_confusables = '.\u002e\u0660\u06f0\u0701\u0702\u2024\ua4f8\ua60e\u10a50\u1d16d';
|
||||||
|
|
||||||
|
const linkRegex = new RegExp(`^\\s*(([${h_confusables}][${t_confusables}][${t_confusables}][${p_confusables}][${s_confusables}]?[${column_confusables}][${slash_confusables}][${slash_confusables}]))?[^:/\\n ]+([${dot_confusables}][^:/\\n ]+)+`);
|
||||||
|
|
||||||
|
const isLinkMisleading = (link) => {
|
||||||
|
let linkTextParts = [];
|
||||||
|
|
||||||
|
// Reconstruct visible text, as we do not have much control over how links
|
||||||
|
// from remote software look, and we can't rely on `innerText` because the
|
||||||
|
// `invisible` class does not set `display` to `none`.
|
||||||
|
|
||||||
|
const walk = (node) => {
|
||||||
|
switch (node.nodeType) {
|
||||||
|
case Node.TEXT_NODE:
|
||||||
|
linkTextParts.push(node.textContent);
|
||||||
|
break;
|
||||||
|
case Node.ELEMENT_NODE:
|
||||||
|
if (node.classList.contains('invisible')) return;
|
||||||
|
const children = node.childNodes;
|
||||||
|
for (let i = 0; i < children.length; i++) {
|
||||||
|
walk(children[i]);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
walk(link);
|
||||||
|
|
||||||
|
const linkText = linkTextParts.join('');
|
||||||
|
const targetURL = new URL(link.href);
|
||||||
|
|
||||||
|
// The following may not work with international domain names
|
||||||
|
if (linkText === targetURL.origin || linkText === targetURL.host || 'www.' + linkText === targetURL.host || linkText.startsWith(targetURL.origin + '/') || linkText.startsWith(targetURL.host + '/')) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// The link hasn't been recognized, maybe it features an international domain name
|
||||||
|
const hostname = decodeIDNA(targetURL.hostname);
|
||||||
|
const host = targetURL.host.replace(targetURL.hostname, hostname);
|
||||||
|
const origin = targetURL.origin.replace(targetURL.host, host);
|
||||||
|
if (linkText === origin || linkText === host || linkText.startsWith(origin + '/') || linkText.startsWith(host + '/')) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If the link text looks like an URL or auto-generated link, it is misleading
|
||||||
|
return linkRegex.test(linkText);
|
||||||
|
};
|
||||||
|
|
||||||
export default class StatusContent extends React.PureComponent {
|
export default class StatusContent extends React.PureComponent {
|
||||||
|
|
||||||
static contextTypes = {
|
static contextTypes = {
|
||||||
|
@ -56,6 +118,34 @@ export default class StatusContent extends React.PureComponent {
|
||||||
} else {
|
} else {
|
||||||
link.setAttribute('title', link.href);
|
link.setAttribute('title', link.href);
|
||||||
link.classList.add('unhandled-link');
|
link.classList.add('unhandled-link');
|
||||||
|
|
||||||
|
if (isLinkMisleading(link)) {
|
||||||
|
while (link.firstChild) {
|
||||||
|
link.removeChild(link.firstChild);
|
||||||
|
}
|
||||||
|
|
||||||
|
const prefix = (link.href.match(/https?:\/\/(www\.)?/) || [''])[0];
|
||||||
|
const text = link.href.substr(prefix.length, 30);
|
||||||
|
const suffix = link.href.substr(prefix.length + 30);
|
||||||
|
const cutoff = !!suffix;
|
||||||
|
|
||||||
|
const prefixTag = document.createElement('span');
|
||||||
|
prefixTag.classList.add('invisible');
|
||||||
|
prefixTag.textContent = prefix;
|
||||||
|
link.appendChild(prefixTag);
|
||||||
|
|
||||||
|
const textTag = document.createElement('span');
|
||||||
|
if (cutoff) {
|
||||||
|
textTag.classList.add('ellipsis');
|
||||||
|
}
|
||||||
|
textTag.textContent = text;
|
||||||
|
link.appendChild(textTag);
|
||||||
|
|
||||||
|
const suffixTag = document.createElement('span');
|
||||||
|
suffixTag.classList.add('invisible');
|
||||||
|
suffixTag.textContent = suffix;
|
||||||
|
link.appendChild(suffixTag);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
link.setAttribute('target', '_blank');
|
link.setAttribute('target', '_blank');
|
||||||
|
|
|
@ -2,18 +2,9 @@ import React from 'react';
|
||||||
import PropTypes from 'prop-types';
|
import PropTypes from 'prop-types';
|
||||||
import Immutable from 'immutable';
|
import Immutable from 'immutable';
|
||||||
import ImmutablePropTypes from 'react-immutable-proptypes';
|
import ImmutablePropTypes from 'react-immutable-proptypes';
|
||||||
import punycode from 'punycode';
|
|
||||||
import classnames from 'classnames';
|
import classnames from 'classnames';
|
||||||
import Icon from 'mastodon/components/icon';
|
import Icon from 'mastodon/components/icon';
|
||||||
|
import { decode as decodeIDNA } from 'mastodon/utils/idna';
|
||||||
const IDNA_PREFIX = 'xn--';
|
|
||||||
|
|
||||||
const decodeIDNA = domain => {
|
|
||||||
return domain
|
|
||||||
.split('.')
|
|
||||||
.map(part => part.indexOf(IDNA_PREFIX) === 0 ? punycode.decode(part.slice(IDNA_PREFIX.length)) : part)
|
|
||||||
.join('.');
|
|
||||||
};
|
|
||||||
|
|
||||||
const getHostname = url => {
|
const getHostname = url => {
|
||||||
const parser = document.createElement('a');
|
const parser = document.createElement('a');
|
||||||
|
|
|
@ -0,0 +1,10 @@
|
||||||
|
import punycode from 'punycode';
|
||||||
|
|
||||||
|
const IDNA_PREFIX = 'xn--';
|
||||||
|
|
||||||
|
export const decode = domain => {
|
||||||
|
return domain
|
||||||
|
.split('.')
|
||||||
|
.map(part => part.indexOf(IDNA_PREFIX) === 0 ? punycode.decode(part.slice(IDNA_PREFIX.length)) : part)
|
||||||
|
.join('.');
|
||||||
|
};
|
Loading…
Reference in New Issue