From 061fd66ee66afc8a5c7923ff7648f51a6da3fe7c Mon Sep 17 00:00:00 2001
From: Renaud Chaput <renchap@gmail.com>
Date: Mon, 21 Aug 2023 19:39:01 +0200
Subject: [PATCH] Remove hashtags from the last line of a status if it only
 contains hashtags (#26499)

---
 .../components/__tests__/hashtag_bar.tsx      | 184 +++++++++++++++
 .../mastodon/components/hashtag_bar.jsx       |  50 ----
 .../mastodon/components/hashtag_bar.tsx       | 222 ++++++++++++++++++
 app/javascript/mastodon/components/status.jsx |   7 +-
 .../mastodon/components/status_content.jsx    |  14 +-
 .../status/components/detailed_status.jsx     |   7 +-
 6 files changed, 428 insertions(+), 56 deletions(-)
 create mode 100644 app/javascript/mastodon/components/__tests__/hashtag_bar.tsx
 delete mode 100644 app/javascript/mastodon/components/hashtag_bar.jsx
 create mode 100644 app/javascript/mastodon/components/hashtag_bar.tsx

diff --git a/app/javascript/mastodon/components/__tests__/hashtag_bar.tsx b/app/javascript/mastodon/components/__tests__/hashtag_bar.tsx
new file mode 100644
index 0000000000..c7db485d08
--- /dev/null
+++ b/app/javascript/mastodon/components/__tests__/hashtag_bar.tsx
@@ -0,0 +1,184 @@
+import { fromJS } from 'immutable';
+
+import type { StatusLike } from '../hashtag_bar';
+import { computeHashtagBarForStatus } from '../hashtag_bar';
+
+function createStatus(
+  content: string,
+  hashtags: string[],
+  hasMedia = false,
+  spoilerText?: string,
+) {
+  return fromJS({
+    tags: hashtags.map((name) => ({ name })),
+    contentHtml: content,
+    media_attachments: hasMedia ? ['fakeMedia'] : [],
+    spoiler_text: spoilerText,
+  }) as unknown as StatusLike; // need to force the type here, as it is not properly defined
+}
+
+describe('computeHashtagBarForStatus', () => {
+  it('does nothing when there are no tags', () => {
+    const status = createStatus('<p>Simple text</p>', []);
+
+    const { hashtagsInBar, statusContentProps } =
+      computeHashtagBarForStatus(status);
+
+    expect(hashtagsInBar).toEqual([]);
+    expect(statusContentProps.statusContent).toMatchInlineSnapshot(
+      `"<p>Simple text</p>"`,
+    );
+  });
+
+  it('displays out of band hashtags in the bar', () => {
+    const status = createStatus(
+      '<p>Simple text <a href="test">#hashtag</a></p>',
+      ['hashtag', 'test'],
+    );
+
+    const { hashtagsInBar, statusContentProps } =
+      computeHashtagBarForStatus(status);
+
+    expect(hashtagsInBar).toEqual(['test']);
+    expect(statusContentProps.statusContent).toMatchInlineSnapshot(
+      `"<p>Simple text <a href="test">#hashtag</a></p>"`,
+    );
+  });
+
+  it('extract tags from the last line', () => {
+    const status = createStatus(
+      '<p>Simple text</p><p><a href="test">#hashtag</a></p>',
+      ['hashtag'],
+    );
+
+    const { hashtagsInBar, statusContentProps } =
+      computeHashtagBarForStatus(status);
+
+    expect(hashtagsInBar).toEqual(['hashtag']);
+    expect(statusContentProps.statusContent).toMatchInlineSnapshot(
+      `"<p>Simple text</p>"`,
+    );
+  });
+
+  it('does not include tags from content', () => {
+    const status = createStatus(
+      '<p>Simple text with a <a href="test">#hashtag</a></p><p><a href="test">#hashtag</a></p>',
+      ['hashtag'],
+    );
+
+    const { hashtagsInBar, statusContentProps } =
+      computeHashtagBarForStatus(status);
+
+    expect(hashtagsInBar).toEqual([]);
+    expect(statusContentProps.statusContent).toMatchInlineSnapshot(
+      `"<p>Simple text with a <a href="test">#hashtag</a></p>"`,
+    );
+  });
+
+  it('works with one line status and hashtags', () => {
+    const status = createStatus(
+      '<p><a href="test">#test</a>. And another <a href="test">#hashtag</a></p>',
+      ['hashtag', 'test'],
+    );
+
+    const { hashtagsInBar, statusContentProps } =
+      computeHashtagBarForStatus(status);
+
+    expect(hashtagsInBar).toEqual([]);
+    expect(statusContentProps.statusContent).toMatchInlineSnapshot(
+      `"<p><a href="test">#test</a>. And another <a href="test">#hashtag</a></p>"`,
+    );
+  });
+
+  it('de-duplicate accentuated characters with case differences', () => {
+    const status = createStatus(
+      '<p>Text</p><p><a href="test">#éaa</a> <a href="test">#Éaa</a></p>',
+      ['éaa'],
+    );
+
+    const { hashtagsInBar, statusContentProps } =
+      computeHashtagBarForStatus(status);
+
+    expect(hashtagsInBar).toEqual(['Éaa']);
+    expect(statusContentProps.statusContent).toMatchInlineSnapshot(
+      `"<p>Text</p>"`,
+    );
+  });
+
+  it('does not display in bar a hashtag in content with a case difference', () => {
+    const status = createStatus(
+      '<p>Text <a href="test">#Éaa</a></p><p><a href="test">#éaa</a></p>',
+      ['éaa'],
+    );
+
+    const { hashtagsInBar, statusContentProps } =
+      computeHashtagBarForStatus(status);
+
+    expect(hashtagsInBar).toEqual([]);
+    expect(statusContentProps.statusContent).toMatchInlineSnapshot(
+      `"<p>Text <a href="test">#Éaa</a></p>"`,
+    );
+  });
+
+  it('does not modify a status with a line of hashtags only', () => {
+    const status = createStatus(
+      '<p><a href="test">#test</a>  <a href="test">#hashtag</a></p>',
+      ['test', 'hashtag'],
+    );
+
+    const { hashtagsInBar, statusContentProps } =
+      computeHashtagBarForStatus(status);
+
+    expect(hashtagsInBar).toEqual([]);
+    expect(statusContentProps.statusContent).toMatchInlineSnapshot(
+      `"<p><a href="test">#test</a>  <a href="test">#hashtag</a></p>"`,
+    );
+  });
+
+  it('puts the hashtags in the bar if a status content has hashtags in the only line and has a media', () => {
+    const status = createStatus(
+      '<p>This is my content! <a href="test">#hashtag</a></p>',
+      ['hashtag'],
+      true,
+    );
+
+    const { hashtagsInBar, statusContentProps } =
+      computeHashtagBarForStatus(status);
+
+    expect(hashtagsInBar).toEqual([]);
+    expect(statusContentProps.statusContent).toMatchInlineSnapshot(
+      `"<p>This is my content! <a href="test">#hashtag</a></p>"`,
+    );
+  });
+
+  it('puts the hashtags in the bar if a status content is only hashtags and has a media', () => {
+    const status = createStatus(
+      '<p><a href="test">#test</a>  <a href="test">#hashtag</a></p>',
+      ['test', 'hashtag'],
+      true,
+    );
+
+    const { hashtagsInBar, statusContentProps } =
+      computeHashtagBarForStatus(status);
+
+    expect(hashtagsInBar).toEqual(['test', 'hashtag']);
+    expect(statusContentProps.statusContent).toMatchInlineSnapshot(`""`);
+  });
+
+  it('does not use the hashtag bar if the status content is only hashtags, has a CW and a media', () => {
+    const status = createStatus(
+      '<p><a href="test">#test</a>  <a href="test">#hashtag</a></p>',
+      ['test', 'hashtag'],
+      true,
+      'My CW text',
+    );
+
+    const { hashtagsInBar, statusContentProps } =
+      computeHashtagBarForStatus(status);
+
+    expect(hashtagsInBar).toEqual([]);
+    expect(statusContentProps.statusContent).toMatchInlineSnapshot(
+      `"<p><a href="test">#test</a>  <a href="test">#hashtag</a></p>"`,
+    );
+  });
+});
diff --git a/app/javascript/mastodon/components/hashtag_bar.jsx b/app/javascript/mastodon/components/hashtag_bar.jsx
deleted file mode 100644
index 3c7e24228d..0000000000
--- a/app/javascript/mastodon/components/hashtag_bar.jsx
+++ /dev/null
@@ -1,50 +0,0 @@
-import PropTypes from 'prop-types';
-import { useMemo, useState, useCallback } from 'react';
-
-import { FormattedMessage } from 'react-intl';
-
-import { Link } from 'react-router-dom';
-
-import ImmutablePropTypes from 'react-immutable-proptypes';
-
-const domParser = new DOMParser();
-
-// About two lines on desktop
-const VISIBLE_HASHTAGS = 7;
-
-export const HashtagBar = ({ hashtags, text }) => {
-  const renderedHashtags = useMemo(() => {
-    const body = domParser.parseFromString(text, 'text/html').documentElement;
-    return [].filter.call(body.querySelectorAll('a[href]'), link => link.textContent[0] === '#' || (link.previousSibling?.textContent?.[link.previousSibling.textContent.length - 1] === '#')).map(node => node.textContent);
-  }, [text]);
-
-  const invisibleHashtags = useMemo(() => (
-    hashtags.filter(hashtag => !renderedHashtags.some(textContent => textContent.localeCompare(`#${hashtag.get('name')}`, undefined, { sensitivity: 'accent' }) === 0 || textContent.localeCompare(hashtag.get('name'), undefined, { sensitivity: 'accent' }) === 0))
-  ), [hashtags, renderedHashtags]);
-
-  const [expanded, setExpanded] = useState(false);
-  const handleClick = useCallback(() => setExpanded(true), []);
-
-  if (invisibleHashtags.isEmpty()) {
-    return null;
-  }
-
-  const revealedHashtags = expanded ? invisibleHashtags : invisibleHashtags.take(VISIBLE_HASHTAGS);
-
-  return (
-    <div className='hashtag-bar'>
-      {revealedHashtags.map(hashtag => (
-        <Link key={hashtag.get('name')} to={`/tags/${hashtag.get('name')}`}>
-          #{hashtag.get('name')}
-        </Link>
-      ))}
-
-      {!expanded && invisibleHashtags.size > VISIBLE_HASHTAGS && <button className='link-button' onClick={handleClick}><FormattedMessage id='hashtags.and_other' defaultMessage='…and {count, plural, other {# more}}' values={{ count: invisibleHashtags.size - VISIBLE_HASHTAGS }} /></button>}
-    </div>
-  );
-};
-
-HashtagBar.propTypes = {
-  hashtags: ImmutablePropTypes.list,
-  text: PropTypes.string,
-};
diff --git a/app/javascript/mastodon/components/hashtag_bar.tsx b/app/javascript/mastodon/components/hashtag_bar.tsx
new file mode 100644
index 0000000000..8781c26630
--- /dev/null
+++ b/app/javascript/mastodon/components/hashtag_bar.tsx
@@ -0,0 +1,222 @@
+import { useState, useCallback } from 'react';
+
+import { FormattedMessage } from 'react-intl';
+
+import { Link } from 'react-router-dom';
+
+import type { List, Record } from 'immutable';
+
+import { groupBy, minBy } from 'lodash';
+
+import { getStatusContent } from './status_content';
+
+// About two lines on desktop
+const VISIBLE_HASHTAGS = 7;
+
+// Those types are not correct, they need to be replaced once this part of the state is typed
+export type TagLike = Record<{ name: string }>;
+export type StatusLike = Record<{
+  tags: List<TagLike>;
+  contentHTML: string;
+  media_attachments: List<unknown>;
+  spoiler_text?: string;
+}>;
+
+function normalizeHashtag(hashtag: string) {
+  if (hashtag && hashtag.startsWith('#')) return hashtag.slice(1);
+  else return hashtag;
+}
+
+function isNodeLinkHashtag(element: Node): element is HTMLLinkElement {
+  return (
+    element instanceof HTMLAnchorElement &&
+    // it may be a <a> starting with a hashtag
+    (element.textContent?.[0] === '#' ||
+      // or a #<a>
+      element.previousSibling?.textContent?.[
+        element.previousSibling.textContent.length - 1
+      ] === '#')
+  );
+}
+
+/**
+ * Removes duplicates from an hashtag list, case-insensitive, keeping only the best one
+ * "Best" here is defined by the one with the more casing difference (ie, the most camel-cased one)
+ * @param hashtags The list of hashtags
+ * @returns The input hashtags, but with only 1 occurence of each (case-insensitive)
+ */
+function uniqueHashtagsWithCaseHandling(hashtags: string[]) {
+  const groups = groupBy(hashtags, (tag) =>
+    tag.normalize('NFKD').toLowerCase(),
+  );
+
+  return Object.values(groups).map((tags) => {
+    if (tags.length === 1) return tags[0];
+
+    // The best match is the one where we have the less difference between upper and lower case letter count
+    const best = minBy(tags, (tag) => {
+      const upperCase = Array.from(tag).reduce(
+        (acc, char) => (acc += char.toUpperCase() === char ? 1 : 0),
+        0,
+      );
+
+      const lowerCase = tag.length - upperCase;
+
+      return Math.abs(lowerCase - upperCase);
+    });
+
+    return best ?? tags[0];
+  });
+}
+
+// Create the collator once, this is much more efficient
+const collator = new Intl.Collator(undefined, { sensitivity: 'accent' });
+function localeAwareInclude(collection: string[], value: string) {
+  return collection.find((item) => collator.compare(item, value) === 0);
+}
+
+// We use an intermediate function here to make it easier to test
+export function computeHashtagBarForStatus(status: StatusLike): {
+  statusContentProps: { statusContent: string };
+  hashtagsInBar: string[];
+} {
+  let statusContent = getStatusContent(status);
+
+  const tagNames = status
+    .get('tags')
+    .map((tag) => tag.get('name'))
+    .toJS();
+
+  // this is returned if we stop the processing early, it does not change what is displayed
+  const defaultResult = {
+    statusContentProps: { statusContent },
+    hashtagsInBar: [],
+  };
+
+  // return early if this status does not have any tags
+  if (tagNames.length === 0) return defaultResult;
+
+  const template = document.createElement('template');
+  template.innerHTML = statusContent.trim();
+
+  const lastChild = template.content.lastChild;
+
+  if (!lastChild) return defaultResult;
+
+  template.content.removeChild(lastChild);
+  const contentWithoutLastLine = template;
+
+  // First, try to parse
+  const contentHashtags = Array.from(
+    contentWithoutLastLine.content.querySelectorAll<HTMLLinkElement>('a[href]'),
+  ).reduce<string[]>((result, link) => {
+    if (isNodeLinkHashtag(link)) {
+      if (link.textContent) result.push(normalizeHashtag(link.textContent));
+    }
+    return result;
+  }, []);
+
+  // Now we parse the last line, and try to see if it only contains hashtags
+  const lastLineHashtags: string[] = [];
+  // try to see if the last line is only hashtags
+  let onlyHashtags = true;
+
+  Array.from(lastChild.childNodes).forEach((node) => {
+    if (isNodeLinkHashtag(node) && node.textContent) {
+      const normalized = normalizeHashtag(node.textContent);
+
+      if (!localeAwareInclude(tagNames, normalized)) {
+        // stop here, this is not a real hashtag, so consider it as text
+        onlyHashtags = false;
+        return;
+      }
+
+      if (!localeAwareInclude(contentHashtags, normalized))
+        // only add it if it does not appear in the rest of the content
+        lastLineHashtags.push(normalized);
+    } else if (node.nodeType !== Node.TEXT_NODE || node.nodeValue?.trim()) {
+      // not a space
+      onlyHashtags = false;
+    }
+  });
+
+  const hashtagsInBar = tagNames.filter(
+    (tag) =>
+      // the tag does not appear at all in the status content, it is an out-of-band tag
+      !localeAwareInclude(contentHashtags, tag) &&
+      !localeAwareInclude(lastLineHashtags, tag),
+  );
+
+  const isOnlyOneLine = contentWithoutLastLine.content.childElementCount === 0;
+  const hasMedia = status.get('media_attachments').size > 0;
+  const hasSpoiler = !!status.get('spoiler_text');
+
+  // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition -- due to https://github.com/microsoft/TypeScript/issues/9998
+  if (onlyHashtags && ((hasMedia && !hasSpoiler) || !isOnlyOneLine)) {
+    // if the last line only contains hashtags, and we either:
+    // - have other content in the status
+    // - dont have other content, but a media and no CW. If it has a CW, then we do not remove the content to avoid having an empty content behind the CW button
+    statusContent = contentWithoutLastLine.innerHTML;
+    // and add the tags to the bar
+    hashtagsInBar.push(...lastLineHashtags);
+  }
+
+  return {
+    statusContentProps: { statusContent },
+    hashtagsInBar: uniqueHashtagsWithCaseHandling(hashtagsInBar),
+  };
+}
+
+/**
+ *  This function will process a status to, at the same time (avoiding parsing it twice):
+ * - build the HashtagBar for this status
+ * - remove the last-line hashtags from the status content
+ * @param status The status to process
+ * @returns Props to be passed to the <StatusContent> component, and the hashtagBar to render
+ */
+export function getHashtagBarForStatus(status: StatusLike) {
+  const { statusContentProps, hashtagsInBar } =
+    computeHashtagBarForStatus(status);
+
+  return {
+    statusContentProps,
+    hashtagBar: <HashtagBar hashtags={hashtagsInBar} />,
+  };
+}
+
+const HashtagBar: React.FC<{
+  hashtags: string[];
+}> = ({ hashtags }) => {
+  const [expanded, setExpanded] = useState(false);
+  const handleClick = useCallback(() => {
+    setExpanded(true);
+  }, []);
+
+  if (hashtags.length === 0) {
+    return null;
+  }
+
+  const revealedHashtags = expanded
+    ? hashtags
+    : hashtags.slice(0, VISIBLE_HASHTAGS - 1);
+
+  return (
+    <div className='hashtag-bar'>
+      {revealedHashtags.map((hashtag) => (
+        <Link key={hashtag} to={`/tags/${hashtag}`}>
+          #{hashtag}
+        </Link>
+      ))}
+
+      {!expanded && hashtags.length > VISIBLE_HASHTAGS && (
+        <button className='link-button' onClick={handleClick}>
+          <FormattedMessage
+            id='hashtags.and_other'
+            defaultMessage='…and {count, plural, other {# more}}'
+            values={{ count: hashtags.length - VISIBLE_HASHTAGS }}
+          />
+        </button>
+      )}
+    </div>
+  );
+};
diff --git a/app/javascript/mastodon/components/status.jsx b/app/javascript/mastodon/components/status.jsx
index 7c34684d71..45a2106dba 100644
--- a/app/javascript/mastodon/components/status.jsx
+++ b/app/javascript/mastodon/components/status.jsx
@@ -22,7 +22,7 @@ import { displayMedia } from '../initial_state';
 import { Avatar } from './avatar';
 import { AvatarOverlay } from './avatar_overlay';
 import { DisplayName } from './display_name';
-import { HashtagBar } from './hashtag_bar';
+import { getHashtagBarForStatus } from './hashtag_bar';
 import { RelativeTimestamp } from './relative_timestamp';
 import StatusActionBar from './status_action_bar';
 import StatusContent from './status_content';
@@ -545,6 +545,8 @@ class Status extends ImmutablePureComponent {
 
     const visibilityIcon = visibilityIconInfo[status.get('visibility')];
 
+    const {statusContentProps, hashtagBar} = getHashtagBarForStatus(status);
+
     return (
       <HotKeys handlers={handlers}>
         <div className={classNames('status__wrapper', `status__wrapper-${status.get('visibility')}`, { 'status__wrapper-reply': !!status.get('in_reply_to_id'), unread, focusable: !this.props.muted })} tabIndex={this.props.muted ? null : 0} data-featured={featured ? 'true' : null} aria-label={textForScreenReader(intl, status, rebloggedByText)} ref={this.handleRef}>
@@ -577,11 +579,12 @@ class Status extends ImmutablePureComponent {
               onTranslate={this.handleTranslate}
               collapsible
               onCollapsedToggle={this.handleCollapsedToggle}
+              {...statusContentProps}
             />
 
             {media}
 
-            <HashtagBar hashtags={status.get('tags')} text={status.get('content')} />
+            {hashtagBar}
 
             <StatusActionBar scrollKey={scrollKey} status={status} account={account} onFilter={matchedFilters ? this.handleFilterClick : null} {...other} />
           </div>
diff --git a/app/javascript/mastodon/components/status_content.jsx b/app/javascript/mastodon/components/status_content.jsx
index 84a698810f..d3bbc1ba02 100644
--- a/app/javascript/mastodon/components/status_content.jsx
+++ b/app/javascript/mastodon/components/status_content.jsx
@@ -15,6 +15,15 @@ import { autoPlayGif, languages as preloadedLanguages } from 'mastodon/initial_s
 
 const MAX_HEIGHT = 706; // 22px * 32 (+ 2px padding at the top)
 
+/**
+ *
+ * @param {any} status
+ * @returns {string}
+ */
+export function getStatusContent(status) {
+  return status.getIn(['translation', 'contentHtml']) || status.get('contentHtml');
+}
+
 class TranslateButton extends PureComponent {
 
   static propTypes = {
@@ -65,6 +74,7 @@ class StatusContent extends PureComponent {
 
   static propTypes = {
     status: ImmutablePropTypes.map.isRequired,
+    statusContent: PropTypes.string,
     expanded: PropTypes.bool,
     onExpandedToggle: PropTypes.func,
     onTranslate: PropTypes.func,
@@ -225,7 +235,7 @@ class StatusContent extends PureComponent {
   };
 
   render () {
-    const { status, intl } = this.props;
+    const { status, intl, statusContent } = this.props;
 
     const hidden = this.props.onExpandedToggle ? !this.props.expanded : this.state.hidden;
     const renderReadMore = this.props.onClick && status.get('collapsed');
@@ -233,7 +243,7 @@ class StatusContent extends PureComponent {
     const targetLanguages = this.props.languages?.get(status.get('language') || 'und');
     const renderTranslate = this.props.onTranslate && this.context.identity.signedIn && ['public', 'unlisted'].includes(status.get('visibility')) && status.get('search_index').trim().length > 0 && targetLanguages?.includes(contentLocale);
 
-    const content = { __html: status.getIn(['translation', 'contentHtml']) || status.get('contentHtml') };
+    const content = { __html: statusContent ?? getStatusContent(status) };
     const spoilerContent = { __html: status.getIn(['translation', 'spoilerHtml']) || status.get('spoilerHtml') };
     const language = status.getIn(['translation', 'language']) || status.get('language');
     const classNames = classnames('status__content', {
diff --git a/app/javascript/mastodon/features/status/components/detailed_status.jsx b/app/javascript/mastodon/features/status/components/detailed_status.jsx
index c1815b9167..401550e49f 100644
--- a/app/javascript/mastodon/features/status/components/detailed_status.jsx
+++ b/app/javascript/mastodon/features/status/components/detailed_status.jsx
@@ -10,7 +10,7 @@ import ImmutablePureComponent from 'react-immutable-pure-component';
 
 import { AnimatedNumber } from 'mastodon/components/animated_number';
 import EditedTimestamp from 'mastodon/components/edited_timestamp';
-import { HashtagBar } from 'mastodon/components/hashtag_bar';
+import { getHashtagBarForStatus } from 'mastodon/components/hashtag_bar';
 import { Icon }  from 'mastodon/components/icon';
 import PictureInPicturePlaceholder from 'mastodon/components/picture_in_picture_placeholder';
 
@@ -292,6 +292,8 @@ class DetailedStatus extends ImmutablePureComponent {
       );
     }
 
+    const {statusContentProps, hashtagBar} = getHashtagBarForStatus(status);
+
     return (
       <div style={outerStyle}>
         <div ref={this.setRef} className={classNames('detailed-status', { compact })}>
@@ -311,11 +313,12 @@ class DetailedStatus extends ImmutablePureComponent {
             expanded={!status.get('hidden')}
             onExpandedToggle={this.handleExpandedToggle}
             onTranslate={this.handleTranslate}
+            {...statusContentProps}
           />
 
           {media}
 
-          <HashtagBar hashtags={status.get('tags')} text={status.get('content')} />
+          {hashtagBar}
 
           <div className='detailed-status__meta'>
             <a className='detailed-status__datetime' href={`/@${status.getIn(['account', 'acct'])}/${status.get('id')}`} target='_blank' rel='noopener noreferrer'>