Improvements to search engine link handling
* Handling when Google results use Google's middleman (google.com/url?q=) * When handling middleman links, instead of replacing search result hrefs with updated links, we now add the updated link in a custom data-iwb-href attribute * Better handling of special characters in page titlessafari
parent
af752726ab
commit
5995b28efa
|
@ -2529,14 +2529,14 @@
|
|||
"origin": "Fear & Hunger Fandom Wiki",
|
||||
"origin_base_url": "fearandhunger.fandom.com",
|
||||
"origin_content_path": "/wiki/",
|
||||
"origin_main_page": "%22Fear_and_Hunger:_the_Tormentpedia%22_Wiki"
|
||||
"origin_main_page": "\"Fear_and_Hunger:_the_Tormentpedia\"_Wiki"
|
||||
}
|
||||
],
|
||||
"destination": "Fear & Hunger Wiki",
|
||||
"destination_base_url": "fearandhunger.wiki.gg",
|
||||
"destination_platform": "mediawiki",
|
||||
"destination_icon": "fearhungerwiki.png",
|
||||
"destination_main_page": "%22Fear_and_Hunger:_the_Tormentpedia%22_Wiki",
|
||||
"destination_main_page": "\"Fear_and_Hunger:_the_Tormentpedia\"_Wiki",
|
||||
"destination_search_path": "/index.php",
|
||||
"destination_content_path": "/wiki/",
|
||||
"tags": [
|
||||
|
|
|
@ -160,8 +160,15 @@ async function commonFunctionFindMatchingSite(site, crossLanguageSetting, dest =
|
|||
|
||||
function commonFunctionGetOriginArticle(originURL, matchingSite) {
|
||||
let url = new URL('https://' + originURL.replace(/.*https?:\/\//, ''));
|
||||
const path = decodeURIComponent(decodeURIComponent(String(url.pathname.split('&')[0]).split(matchingSite['origin_content_path'])[1] || ''));
|
||||
return path;
|
||||
let article = String(url.pathname).split(matchingSite['origin_content_path'])[1] || '';
|
||||
|
||||
// If a Fextralife wiki, replace plus signs with spaces
|
||||
// When there are multiple plus signs together, this regex will only replace only the first
|
||||
if (originURL.includes('.wiki.fextralife.com')) {
|
||||
article = article.replace(/(?<!\+)\+/g, ' ');
|
||||
}
|
||||
|
||||
return article;
|
||||
}
|
||||
|
||||
function commonFunctionGetDestinationArticle(matchingSite, article) {
|
||||
|
@ -190,15 +197,10 @@ function commonFunctionGetNewURL(originURL, matchingSite) {
|
|||
|
||||
// Replace underscores with spaces as that performs better in search
|
||||
destinationArticle = destinationArticle.replaceAll('_', ' ');
|
||||
|
||||
// If a Fextralife wiki, replace plus signs with spaces
|
||||
// When there are multiple plus signs together, this regex will only replace only the first
|
||||
if (matchingSite['origin_base_url'].includes('.wiki.fextralife.com')) {
|
||||
destinationArticle = destinationArticle.replace(/(?<!\+)\+/g, ' ');
|
||||
}
|
||||
|
||||
// Encode article
|
||||
destinationArticle = encodeURIComponent(destinationArticle);
|
||||
// We decode + encode to ensure we don't double-encode,
|
||||
// in the event a string is already encoded
|
||||
destinationArticle = encodeURIComponent(decodeURIComponent(destinationArticle));
|
||||
|
||||
let searchParams = '';
|
||||
switch (matchingSite['destination_platform']) {
|
||||
|
|
|
@ -315,7 +315,7 @@ function displayRedirectBanner(newUrl, id, destinationName, destinationLanguage,
|
|||
}
|
||||
|
||||
function main() {
|
||||
extensionAPI.runtime.sendMessage({action: 'getStorage'}, (storage) => {
|
||||
extensionAPI.runtime.sendMessage({ action: 'getStorage' }, (storage) => {
|
||||
// Check if extension is on:
|
||||
if ((storage.power ?? 'on') === 'on') {
|
||||
// Check if there is a pathname, to ensure we're looking at an article
|
||||
|
@ -328,7 +328,7 @@ function main() {
|
|||
if (currentURL.hostname.match(breezewikiRegex) || (storage.breezewikiHost === 'CUSTOM' && storage.breezewikiCustomHost?.includes(currentURL.hostname))) {
|
||||
origin = String(currentURL.pathname).split('/')[1] + '.fandom.com/wiki/';
|
||||
if (currentURL.search.includes('?q=')) {
|
||||
origin = 'https://' + origin + currentURL.search.substring(3).split('&')[0];
|
||||
origin = 'https://' + origin + currentURL.search.substring(3);
|
||||
} else {
|
||||
origin = 'https://' + origin + currentURL.pathname.split('/')[3];
|
||||
}
|
||||
|
|
|
@ -154,11 +154,6 @@ function stringToId(string) {
|
|||
return string.replaceAll(' ', '-').replaceAll("'", '').replace(/\W/g, '').toLowerCase();
|
||||
}
|
||||
|
||||
// Function to escape string to use in regex
|
||||
function escapeRegex(string) {
|
||||
return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
||||
}
|
||||
|
||||
function removeSubstringIfAtEnd(str, sub) {
|
||||
if (sub && str.endsWith(sub)) {
|
||||
return str.slice(0, -sub.length);
|
||||
|
@ -193,16 +188,14 @@ function replaceSearchResults(searchResultContainer, site, link) {
|
|||
let indieResultText = document.createElement('span');
|
||||
if (originArticle && originArticle !== site['origin_main_page']) {
|
||||
let destinationArticleTitle = removeSubstringIfAtEnd(destinationArticle, site['destination_content_suffix']).replace(site['destination_content_prefix'], '').replaceAll('_', ' ');
|
||||
// If a Fextralife wiki, replace plus signs with spaces
|
||||
// When there are multiple plus signs together, this regex will only replace only the first
|
||||
if (link.includes('.wiki.fextralife.com')) {
|
||||
destinationArticleTitle = destinationArticleTitle.replace(/(?<!\+)\+/g, ' ');
|
||||
}
|
||||
|
||||
// Decode article
|
||||
destinationArticleTitle = decodeURIComponent(destinationArticleTitle);
|
||||
|
||||
if (site['language'] === 'EN' && link.match(/fandom\.com\/[a-z]{2}\/wiki\//)) {
|
||||
indieResultText.innerText = 'Look up "' + decodeURIComponent(destinationArticleTitle) + '" on ' + site.destination + ' (EN)';
|
||||
indieResultText.innerText = 'Look up "' + destinationArticleTitle + '" on ' + site.destination + ' (EN)';
|
||||
} else {
|
||||
indieResultText.innerText = 'Look up "' + decodeURIComponent(destinationArticleTitle) + '" on ' + site.destination;
|
||||
indieResultText.innerText = 'Look up "' + destinationArticleTitle + '" on ' + site.destination;
|
||||
}
|
||||
} else {
|
||||
if (site['language'] === 'EN' && link.match(/fandom\.com\/[a-z]{2}\/wiki\//)) {
|
||||
|
@ -480,16 +473,18 @@ async function filterSearchResult(matchingSite, searchResult, searchEngine, coun
|
|||
|
||||
if (searchResultContainer) {
|
||||
// If this page from Fandom is the same as a re-ordered page, filter it out
|
||||
let searchResultLink = searchResult.href;
|
||||
let searchResultLink = searchResult.getAttribute('data-iwb-href') || searchResult.href;
|
||||
let originArticle = commonFunctionGetOriginArticle(searchResultLink, matchingSite);
|
||||
let destinationArticle = commonFunctionGetDestinationArticle(matchingSite, originArticle);
|
||||
|
||||
if (reorderedHrefs.find((href) => href.match(
|
||||
// Match for destination URL with content path and article name
|
||||
new RegExp(
|
||||
`http(s)*://${matchingSite['destination_base_url']}${matchingSite['destination_content_path']}${encodeURIComponent(destinationArticle)}$`
|
||||
`http(s)?://${matchingSite['destination_base_url']}${matchingSite['destination_content_path']}${decodeURI(destinationArticle)}$`
|
||||
)
|
||||
))) {
|
||||
countFiltered += hideSearchResults(searchResultContainer, searchEngine, matchingSite, 'off');
|
||||
console.debug(`Indie Wiki Buddy has hidden a result matching ${searchResult.href} because we re-ordered an indie wiki result with a matching article`);
|
||||
console.debug(`Indie Wiki Buddy has hidden a result matching ${searchResultLink} because we re-ordered an indie wiki result with a matching article`);
|
||||
} else if (searchFilterSetting !== 'disabled') {
|
||||
if (searchFilterSetting === 'hide') {
|
||||
// Else, if the user has the preference set to hide search results, hide it indiscriminately
|
||||
|
@ -540,11 +535,11 @@ async function reorderSearchResults(searchResults, searchEngine, storage) {
|
|||
document.querySelector('#main div[data-hveid]');
|
||||
|
||||
// Get the first Fandom/Fextralife/Neoseeker result, if it exists
|
||||
const firstNonIndieResult = document.querySelector(`
|
||||
const nonIndieResults = document.querySelectorAll(`
|
||||
div[data-hveid] a[href*='.fandom.com/'][href*='/wiki/']:first-of-type:not([role='button']):not([target='_self']),
|
||||
div[data-hveid] a[href*='.wiki.fextralife.com/']:first-of-type:not([role='button']):not([target='_self']),
|
||||
div[data-hveid] a[href*='.neoseeker.com/wiki/']:first-of-type:not([role='button']):not([target='_self'])`);
|
||||
|
||||
const firstNonIndieResult = Array.from(nonIndieResults).filter((e) => !e.closest('g-section-with-header, div[aria-expanded], div[data-q], div[data-minw], div[data-num-cols], div[data-docid], div[data-lpage]'))[0];
|
||||
if (!resultsFirstChild || !firstNonIndieResult) return;
|
||||
|
||||
searchResults.some((result, i) => {
|
||||
|
@ -567,7 +562,7 @@ async function reorderSearchResults(searchResults, searchEngine, storage) {
|
|||
continue;
|
||||
}
|
||||
|
||||
const searchResultLink = searchResult.href || '';
|
||||
const searchResultLink = searchResult.getAttribute('data-iwb-href') || searchResult.href || '';
|
||||
|
||||
// Handle re-ordering of results to move destination results up the page
|
||||
let matchingDest = await commonFunctionFindMatchingSite(searchResultLink, crossLanguageSetting, true);
|
||||
|
@ -597,7 +592,7 @@ async function reorderSearchResults(searchResults, searchEngine, storage) {
|
|||
for (const searchResult of resultsToSort) {
|
||||
try {
|
||||
await reorderDestinationSearchResult(firstNonIndieResult, searchResult);
|
||||
reorderedHrefs.push(searchResult.href);
|
||||
reorderedHrefs.push(searchResult.getAttribute('data-iwb-href') || searchResult.href);
|
||||
} catch (e) {
|
||||
console.log('Indie Wiki Buddy failed to properly re-order search results with error: ' + e);
|
||||
}
|
||||
|
@ -614,7 +609,7 @@ async function filterSearchResults(searchResults, searchEngine, storage, reorder
|
|||
try {
|
||||
// Check that result isn't within another result
|
||||
if (!searchResult.closest('.iwb-detected') || !searchResult.closest('.iwb-detected')?.querySelector('.iwb-new-link')) {
|
||||
let searchResultLink = searchResult.href || '';
|
||||
let searchResultLink = searchResult.getAttribute('data-iwb-href') || searchResult.href || '';
|
||||
|
||||
if (!searchResultLink) {
|
||||
continue;
|
||||
|
@ -670,6 +665,22 @@ function startFiltering(searchEngine, storage, mutations = null, observer = null
|
|||
// Determine which search engine we're on
|
||||
switch (searchEngine) {
|
||||
case 'google':
|
||||
// Query Google results and rewrite HREFs when Google uses middleman links (i.e. google.com/url?q=)
|
||||
let searchResults = document.querySelectorAll("div[data-hveid] a:first-of-type:not([role='button']):not([target='_self'])");
|
||||
searchResults.forEach((searchResult) => {
|
||||
if (searchResult.href) {
|
||||
const link = new URL(searchResult.href);
|
||||
if (link.href.includes('https://www.google.com/url')) {
|
||||
try {
|
||||
const destinationLink = link.searchParams.get('url') || link.searchParams.get('q');
|
||||
searchResult.setAttribute('data-iwb-href', destinationLink);
|
||||
} catch (e) {
|
||||
console.log('Indie Wiki Buddy failed to parse Bing link with error: ', e);
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Function to filter search results in Google
|
||||
function filterGoogle(reorderedHrefs) {
|
||||
let searchResults = document.querySelectorAll(`
|
||||
|
@ -682,7 +693,6 @@ function startFiltering(searchEngine, storage, mutations = null, observer = null
|
|||
|
||||
async function reorderGoogle() {
|
||||
let searchResults = document.querySelectorAll("div[data-hveid] a:first-of-type:not([role='button']):not([target='_self'])");
|
||||
|
||||
// Remove any matches that are not "standard" search results - this could've been done with :has() but limited browser support right now
|
||||
searchResults = Array.from(searchResults).filter((e) => !e.closest('g-section-with-header, div[aria-expanded], div[data-q], div[data-minw], div[data-num-cols], div[data-docid], div[data-lpage]'));
|
||||
|
||||
|
@ -715,7 +725,7 @@ function startFiltering(searchEngine, storage, mutations = null, observer = null
|
|||
try {
|
||||
let decodedLink = base64Decode(encodedLink.searchParams.get('u').replace(/^a1/, ''));
|
||||
if (decodedLink.includes('.fandom.com') || decodedLink.includes('.wiki.fextralife.com') || decodedLink.includes('.neoseeker.com/wiki/')) {
|
||||
searchResult.href = decodedLink;
|
||||
searchResult.setAttribute('data-iwb-href', decodedLink);
|
||||
searchResults.push(searchResult);
|
||||
}
|
||||
} catch (e) {
|
||||
|
@ -804,7 +814,7 @@ function startFiltering(searchEngine, storage, mutations = null, observer = null
|
|||
const extractedURL = decodeURIComponent(match && match[1]);
|
||||
|
||||
if (extractedURL.includes('.fandom.com') || extractedURL.includes('.wiki.fextralife.com') || extractedURL.includes('.neoseeker.com/wiki/')) {
|
||||
searchResult.href = extractedURL;
|
||||
searchResult.setAttribute('data-iwb-href', extractedURL);
|
||||
searchResults.push(searchResult);
|
||||
}
|
||||
} catch (e) {
|
||||
|
|
Loading…
Reference in New Issue