Improvements to Google search results re-ordering

* Instead of moving indie results to the top of Google search results, we now move indie results above the first non-indie (Fandom / Fextra / Neoseeker) result. If no non-indie result appears, re-ordering doesn't occur. This is to avoid moving less relevant results to the top, particularly for searches for generic terms.
* Improved Google filtering to account for when Google uses their own middleman link
pull/700/head
Kevin Payravi 2024-05-29 22:24:58 +02:00
parent 47148f5f34
commit 67b77a234c
5 changed files with 137 additions and 126 deletions

View File

@ -110,10 +110,9 @@
</li> </li>
<br /> <br />
<li> <li>
<b><img src="../../images/toggle-move-up.png" height="12" alt="" /> On Google, move indie wiki results to the top of <b><img src="../../images/toggle-move-up.png" height="12" alt="" /> On Google, move indie wiki results above non-indie results</b>
the page</b>
<br /> <br />
By default, Indie Wiki Buddy will re-order the results page on Google to move indie wiki results to the top, By default, Indie Wiki Buddy will re-order the results page on Google to move indie wiki results above the first non-indie (Fandom / Fextralife / Neoseeker) result,
if they are further down the first page of results. This allows you to find the pages you're looking for on indie wikis if they are further down the first page of results. This allows you to find the pages you're looking for on indie wikis
quicker. quicker.
<br /><br /> <br /><br />

View File

@ -175,7 +175,7 @@
<label> <label>
<input id="reorderResultsCheckbox" type="checkbox" /> <input id="reorderResultsCheckbox" type="checkbox" />
<img src="../../images/toggle-move-up.png" height="12" alt="" /> <img src="../../images/toggle-move-up.png" height="12" alt="" />
On Google, move indie wiki results to the top of the page On Google, move indie wiki results above non-indie results
</label> </label>
</div> </div>
</fieldset> </fieldset>

View File

@ -166,7 +166,7 @@
<label> <label>
<input id="reorderResultsCheckbox" type="checkbox" /> <input id="reorderResultsCheckbox" type="checkbox" />
<img src="../../images/toggle-move-up.png" height="12" alt="" /> <img src="../../images/toggle-move-up.png" height="12" alt="" />
On Google, move indie wiki results to the top of the page On Google, move indie wiki results above non-indie results
</label> </label>
</div> </div>
</fieldset> </fieldset>

View File

@ -79,32 +79,32 @@ async function populateSiteDataByOrigin() {
let promises = []; let promises = [];
for (let i = 0; i < LANGS.length; i++) { for (let i = 0; i < LANGS.length; i++) {
promises.push(fetch(extensionAPI.runtime.getURL('data/sites' + LANGS[i] + '.json')) promises.push(fetch(extensionAPI.runtime.getURL('data/sites' + LANGS[i] + '.json'))
.then((resp) => resp.json()) .then((resp) => resp.json())
.then((jsonData) => { .then((jsonData) => {
jsonData.forEach((site) => { jsonData.forEach((site) => {
site.origins.forEach((origin) => { site.origins.forEach((origin) => {
sites.push({ sites.push({
"id": site.id, "id": site.id,
"origin": origin.origin, "origin": origin.origin,
"origin_base_url": origin.origin_base_url, "origin_base_url": origin.origin_base_url,
"origin_content_path": origin.origin_content_path, "origin_content_path": origin.origin_content_path,
"origin_main_page": origin.origin_main_page, "origin_main_page": origin.origin_main_page,
"destination": site.destination, "destination": site.destination,
"destination_base_url": site.destination_base_url, "destination_base_url": site.destination_base_url,
"destination_search_path": site.destination_search_path, "destination_search_path": site.destination_search_path,
"destination_content_prefix": origin.destination_content_prefix || site.destination_content_prefix || "", "destination_content_prefix": origin.destination_content_prefix || site.destination_content_prefix || "",
// /w/index.php?title= is the default path for a new MediaWiki install, change as accordingly in config JSON files // /w/index.php?title= is the default path for a new MediaWiki install, change as accordingly in config JSON files
"destination_content_path": site.destination_content_path || "/w/index.php?title=", "destination_content_path": site.destination_content_path || "/w/index.php?title=",
"destination_content_suffix": origin.destination_content_suffix || site.destination_content_suffix || "", "destination_content_suffix": origin.destination_content_suffix || site.destination_content_suffix || "",
"destination_platform": site.destination_platform, "destination_platform": site.destination_platform,
"destination_icon": site.destination_icon, "destination_icon": site.destination_icon,
"destination_main_page": site.destination_main_page, "destination_main_page": site.destination_main_page,
"tags": site.tags || [], "tags": site.tags || [],
"language": LANGS[i] "language": LANGS[i]
})
}) })
}); })
})); });
}));
} }
await Promise.all(promises); await Promise.all(promises);
@ -136,7 +136,7 @@ async function commonFunctionFindMatchingSite(site, crossLanguageSetting, dest =
matchingSites = sites.filter(el => site.replace(/.*https?:\/\//, '').startsWith(el[base_url_key])); matchingSites = sites.filter(el => site.replace(/.*https?:\/\//, '').startsWith(el[base_url_key]));
} else { } else {
matchingSites = sites.filter(el => matchingSites = sites.filter(el =>
site.replace(/.*https?:\/\//, '').startsWith(dest ? el[base_url_key] : (el.origin_base_url + el.origin_content_path)) site.replace(/.*https?:\/\//, '').startsWith(dest ? el[base_url_key] : (el.origin_base_url + el.origin_content_path))
|| site.replace(/.*https?:\/\//, '').replace(/\/$/, '') === el[base_url_key] || site.replace(/.*https?:\/\//, '').replace(/\/$/, '') === el[base_url_key]
); );
} }
@ -159,8 +159,9 @@ async function commonFunctionFindMatchingSite(site, crossLanguageSetting, dest =
} }
function commonFunctionGetOriginArticle(originURL, matchingSite) { function commonFunctionGetOriginArticle(originURL, matchingSite) {
let url = new URL(originURL); let url = new URL('https://' + originURL.replace(/.*https?:\/\//, ''));
return decodeURIComponent(String(url.pathname).split(matchingSite['origin_content_path'])[1] || ''); const path = decodeURIComponent(decodeURIComponent(String(url.pathname.split('&')[0]).split(matchingSite['origin_content_path'])[1] || ''));
return path;
} }
function commonFunctionGetDestinationArticle(matchingSite, article) { function commonFunctionGetDestinationArticle(matchingSite, article) {

View File

@ -161,7 +161,7 @@ function escapeRegex(string) {
function removeSubstringIfAtEnd(str, sub) { function removeSubstringIfAtEnd(str, sub) {
if (sub && str.endsWith(sub)) { if (sub && str.endsWith(sub)) {
return str.slice(0, -sub.length); return str.slice(0, -sub.length);
} }
return str; return str;
} }
@ -393,38 +393,7 @@ function findClosestElement(target, elements) {
return closestElement; return closestElement;
} }
async function filterSearchResult(matchingSite, searchResult, searchEngine, countFiltered, storage, reorderedHrefs) { function getSearchContainer(searchEngine, searchResult) {
// Get user's settings for the wiki
let id = matchingSite['id'];
let searchFilterSetting = 'replace';
let searchEngineSettings = await commonFunctionDecompressJSON(storage.searchEngineSettings || {});
if (searchEngineSettings[id]) {
searchFilterSetting = searchEngineSettings[id];
} else if (storage.defaultSearchAction) {
searchFilterSetting = storage.defaultSearchAction;
}
// Output stylesheet if not already done
if (!document.querySelector('.iwb-styles')) {
const headElement = document.querySelector('head');
if (headElement) {
insertCSS();
} else {
// If head element doesn't exist, wait for it via MutationObserver
const docObserver = new MutationObserver((mutations, mutationInstance) => {
const headElement = document.querySelector('head');
if (headElement && !document.querySelector('.iwb-styles')) {
insertCSS();
mutationInstance.disconnect();
}
});
docObserver.observe(document, {
childList: true,
subtree: true
});
}
}
let searchResultContainer = null; let searchResultContainer = null;
switch (searchEngine) { switch (searchEngine) {
@ -472,11 +441,48 @@ async function filterSearchResult(matchingSite, searchResult, searchEngine, coun
default: default:
} }
return searchResultContainer;
}
async function filterSearchResult(matchingSite, searchResult, searchEngine, countFiltered, storage, reorderedHrefs) {
// Get user's settings for the wiki
let id = matchingSite['id'];
let searchFilterSetting = 'replace';
let searchEngineSettings = await commonFunctionDecompressJSON(storage.searchEngineSettings || {});
if (searchEngineSettings[id]) {
searchFilterSetting = searchEngineSettings[id];
} else if (storage.defaultSearchAction) {
searchFilterSetting = storage.defaultSearchAction;
}
// Output stylesheet if not already done
if (!document.querySelector('.iwb-styles')) {
const headElement = document.querySelector('head');
if (headElement) {
insertCSS();
} else {
// If head element doesn't exist, wait for it via MutationObserver
const docObserver = new MutationObserver((mutations, mutationInstance) => {
const headElement = document.querySelector('head');
if (headElement && !document.querySelector('.iwb-styles')) {
insertCSS();
mutationInstance.disconnect();
}
});
docObserver.observe(document, {
childList: true,
subtree: true
});
}
}
const searchResultContainer = getSearchContainer(searchEngine, searchResult);
if (searchResultContainer) { if (searchResultContainer) {
// If this page from Fandom is the same as a re-ordered page, filter it out // If this page from Fandom is the same as a re-ordered page, filter it out
let originArticle = commonFunctionGetOriginArticle(searchResult.href, matchingSite); let searchResultLink = searchResult.href;
let originArticle = commonFunctionGetOriginArticle(searchResultLink, matchingSite);
let destinationArticle = commonFunctionGetDestinationArticle(matchingSite, originArticle); let destinationArticle = commonFunctionGetDestinationArticle(matchingSite, originArticle);
if (reorderedHrefs.find((href) => href.match( if (reorderedHrefs.find((href) => href.match(
new RegExp( new RegExp(
`http(s)*://${matchingSite['destination_base_url']}${matchingSite['destination_content_path']}${encodeURIComponent(destinationArticle)}$` `http(s)*://${matchingSite['destination_base_url']}${matchingSite['destination_content_path']}${encodeURIComponent(destinationArticle)}$`
@ -497,32 +503,20 @@ async function filterSearchResult(matchingSite, searchResult, searchEngine, coun
return countFiltered; return countFiltered;
} }
async function reorderDestinationSearchResult(resultsFirstChild, searchResult) { async function reorderDestinationSearchResult(firstNonIndieResult, searchResult) {
// Find containing element for the search result // Find containing element for non-indie result
const closestJsController = searchResult.closest('div[jscontroller]'); const nonIndieSearchResultContainer = getSearchContainer('google', firstNonIndieResult);
const closestDataDiv = searchResult.closest('div[data-hveid].g') || searchResult.closest('div[data-hveid]');
searchResultContainer = findClosestElement(searchResult, [closestJsController, closestDataDiv]);
// Find the element holding the search results, // Find containing element for the indie search result
// to prepend the destination wiki result const indieSearchResultContainer = getSearchContainer('google', searchResult);
let searchResultsList = document.querySelector('#search') || document.querySelector('#topstuff');
if (!searchResultsList) {
if (document.querySelector('#main')) {
var el = document.querySelector('#main');
if (el.querySelector('#main > div[data-hveid]')) {
searchResultsList = el.querySelector('div[data-hveid]');
} else {
searchResultsList = el.querySelector('div div[data-hveid]').parentElement;
}
};
}
if (!resultsFirstChild || !searchResultContainer || searchResultContainer.classList.contains('iwb-reordered')) { if (!indieSearchResultContainer || indieSearchResultContainer.classList.contains('iwb-reordered')) {
return; return;
} }
searchResultContainer.classList.add('iwb-reordered'); indieSearchResultContainer.classList.add('iwb-reordered');
searchResultsList.prepend(searchResultContainer); // Prepend search results to first Fandom/Fextra/Neoseeker result
nonIndieSearchResultContainer.parentNode.prepend(indieSearchResultContainer);
} }
async function reorderSearchResults(searchResults, searchEngine, storage) { async function reorderSearchResults(searchResults, searchEngine, storage) {
@ -538,14 +532,31 @@ async function reorderSearchResults(searchResults, searchEngine, storage) {
if (searchEngine !== 'google') return; if (searchEngine !== 'google') return;
// Get the first element in the results container // Get the first element in the results container
let resultsFirstChild = document.querySelector('#rso div[data-hveid].g') || const resultsFirstChild = document.querySelector('#rso div[data-hveid].g') ||
document.querySelector('#main div[data-hveid].g') || document.querySelector('#main div[data-hveid].g') ||
document.querySelector('#rso div[data-hveid] div[data-dsrp]') || document.querySelector('#rso div[data-hveid] div[data-dsrp]') ||
document.querySelector('#main div[data-hveid] div[data-dsrp]') || document.querySelector('#main div[data-hveid] div[data-dsrp]') ||
document.querySelector('#rso div[data-hveid]') || document.querySelector('#rso div[data-hveid]') ||
document.querySelector('#main div[data-hveid]'); document.querySelector('#main div[data-hveid]');
if (!resultsFirstChild) return; // Get the first Fandom/Fextralife/Neoseeker result, if it exists
const firstNonIndieResult = document.querySelector(`
div[data-hveid] a[href*='.fandom.com/'][href*='/wiki/']:first-of-type:not([role='button']):not([target='_self']),
div[data-hveid] a[href*='.wiki.fextralife.com/']:first-of-type:not([role='button']):not([target='_self']),
div[data-hveid] a[href*='.neoseeker.com/wiki/']:first-of-type:not([role='button']):not([target='_self'])`);
if (!resultsFirstChild || !firstNonIndieResult) return;
searchResults.some((result, i) => {
if (result.matches(`
div[data-hveid] a[href*='.fandom.com/'][href*='/wiki/']:first-of-type:not([role='button']):not([target='_self']),
div[data-hveid] a[href*='.wiki.fextralife.com/']:first-of-type:not([role='button']):not([target='_self']),
div[data-hveid] a[href*='.neoseeker.com/wiki/']:first-of-type:not([role='button']):not([target='_self'])`)) {
searchResults.splice(0, i + 1);
return true;
}
return false;
});
let crossLanguageSetting = storage.crossLanguage || 'off'; let crossLanguageSetting = storage.crossLanguage || 'off';
let resultsToSort = []; let resultsToSort = [];
@ -585,7 +596,7 @@ async function reorderSearchResults(searchResults, searchEngine, storage) {
for (const searchResult of resultsToSort) { for (const searchResult of resultsToSort) {
try { try {
await reorderDestinationSearchResult(resultsFirstChild, searchResult); await reorderDestinationSearchResult(firstNonIndieResult, searchResult);
reorderedHrefs.push(searchResult.href); reorderedHrefs.push(searchResult.href);
} catch (e) { } catch (e) {
console.log('Indie Wiki Buddy failed to properly re-order search results with error: ' + e); console.log('Indie Wiki Buddy failed to properly re-order search results with error: ' + e);
@ -603,7 +614,7 @@ async function filterSearchResults(searchResults, searchEngine, storage, reorder
try { try {
// Check that result isn't within another result // Check that result isn't within another result
if (!searchResult.closest('.iwb-detected') || !searchResult.closest('.iwb-detected')?.querySelector('.iwb-new-link')) { if (!searchResult.closest('.iwb-detected') || !searchResult.closest('.iwb-detected')?.querySelector('.iwb-new-link')) {
searchResultLink = searchResult.href || ''; let searchResultLink = searchResult.href || '';
if (!searchResultLink) { if (!searchResultLink) {
continue; continue;
@ -612,7 +623,7 @@ async function filterSearchResults(searchResults, searchEngine, storage, reorder
if (searchEngine === 'google') { if (searchEngine === 'google') {
// Break if image result: // Break if image result:
if (searchResultLink.includes('imgurl=')) { if (searchResultLink.includes('imgurl=')) {
break; continue;
} }
// Skip if result doesn't include specific tags/attributes // Skip if result doesn't include specific tags/attributes
@ -621,8 +632,7 @@ async function filterSearchResults(searchResults, searchEngine, storage, reorder
searchResult.querySelector('h1') || searchResult.querySelector('h1') ||
searchResult.querySelector('h3') || searchResult.querySelector('h3') ||
searchResult.querySelector('cite') || searchResult.querySelector('cite') ||
searchResult.querySelector("div[role='link']"))) searchResult.querySelector("div[role='link']"))) {
{
searchResult.classList.add('iwb-detected'); searchResult.classList.add('iwb-detected');
continue; continue;
} }
@ -663,9 +673,10 @@ function startFiltering(searchEngine, storage, mutations = null, observer = null
// Function to filter search results in Google // Function to filter search results in Google
function filterGoogle(reorderedHrefs) { function filterGoogle(reorderedHrefs) {
let searchResults = document.querySelectorAll(` let searchResults = document.querySelectorAll(`
div[data-hveid] a[href*='.fandom.com/']:first-of-type:not([role='button']):not([target='_self']), div[data-hveid] a[href*='.fandom.com/'][href*='/wiki/']:first-of-type:not([role='button']):not([target='_self']),
div[data-hveid] a[href*='.wiki.fextralife.com/']:first-of-type:not([role='button']):not([target='_self']), div[data-hveid] a[href*='.wiki.fextralife.com/']:first-of-type:not([role='button']):not([target='_self']),
div[data-hveid] a[href*='.neoseeker.com/wiki/']:first-of-type:not([role='button']):not([target='_self'])`); div[data-hveid] a[href*='.neoseeker.com/wiki/']:first-of-type:not([role='button']):not([target='_self'])`);
filterSearchResults(searchResults, 'google', storage, reorderedHrefs); filterSearchResults(searchResults, 'google', storage, reorderedHrefs);
} }
@ -725,9 +736,9 @@ function startFiltering(searchEngine, storage, mutations = null, observer = null
// Function to filter search results in Brave // Function to filter search results in Brave
function filterBrave() { function filterBrave() {
let searchResults = Array.from(document.querySelectorAll('div.snippet[data-type="web"] a')).filter(el => let searchResults = Array.from(document.querySelectorAll('div.snippet[data-type="web"] a')).filter(el =>
el.href?.includes('.fandom.com') || el.href?.includes('.fandom.com') ||
el.href?.includes('.wiki.fextralife.com') || el.href?.includes('.wiki.fextralife.com') ||
el.href?.includes('.neoseeker.com/wiki/')); el.href?.includes('.neoseeker.com/wiki/'));
filterSearchResults(searchResults, 'brave', storage); filterSearchResults(searchResults, 'brave', storage);
} }
@ -737,9 +748,9 @@ function startFiltering(searchEngine, storage, mutations = null, observer = null
// Function to filter search results in Ecosia // Function to filter search results in Ecosia
function filterEcosia() { function filterEcosia() {
let searchResults = Array.from(document.querySelectorAll('section.mainline .result__title a.result__link')).filter(el => let searchResults = Array.from(document.querySelectorAll('section.mainline .result__title a.result__link')).filter(el =>
el.href?.includes('.fandom.com') || el.href?.includes('.fandom.com') ||
el.href?.includes('.wiki.fextralife.com') || el.href?.includes('.wiki.fextralife.com') ||
el.href?.includes('.neoseeker.com/wiki/')); el.href?.includes('.neoseeker.com/wiki/'));
filterSearchResults(searchResults, 'ecosia', storage); filterSearchResults(searchResults, 'ecosia', storage);
} }
@ -758,9 +769,9 @@ function startFiltering(searchEngine, storage, mutations = null, observer = null
// Function to filter search results in Startpage // Function to filter search results in Startpage
function filterStartpage() { function filterStartpage() {
let searchResults = Array.from(document.querySelectorAll('a.result-link')).filter(el => let searchResults = Array.from(document.querySelectorAll('a.result-link')).filter(el =>
el.href?.includes('.fandom.com') || el.href?.includes('.fandom.com') ||
el.href?.includes('.wiki.fextralife.com') || el.href?.includes('.wiki.fextralife.com') ||
el.href?.includes('.neoseeker.com/wiki/')); el.href?.includes('.neoseeker.com/wiki/'));
filterSearchResults(searchResults, 'startpage', storage); filterSearchResults(searchResults, 'startpage', storage);
} }
@ -770,9 +781,9 @@ function startFiltering(searchEngine, storage, mutations = null, observer = null
// Function to filter search results in Yandex // Function to filter search results in Yandex
function filterYandex() { function filterYandex() {
let searchResults = Array.from(document.querySelectorAll('li[data-cid] a.link, li[data-cid] a.Link, .serp-item a.link, .serp-item a.Link, .MMOrganicSnippet a, .viewer-snippet a')).filter(el => let searchResults = Array.from(document.querySelectorAll('li[data-cid] a.link, li[data-cid] a.Link, .serp-item a.link, .serp-item a.Link, .MMOrganicSnippet a, .viewer-snippet a')).filter(el =>
el.href?.includes('.fandom.com') || el.href?.includes('.fandom.com') ||
el.href?.includes('.wiki.fextralife.com') || el.href?.includes('.wiki.fextralife.com') ||
el.href?.includes('.neoseeker.com/wiki/')); el.href?.includes('.neoseeker.com/wiki/'));
filterSearchResults(searchResults, 'yandex', storage); filterSearchResults(searchResults, 'yandex', storage);
} }
@ -814,9 +825,9 @@ function startFiltering(searchEngine, storage, mutations = null, observer = null
// Function to filter search results in Kagi // Function to filter search results in Kagi
function filterKagi() { function filterKagi() {
let searchResults = Array.from(document.querySelectorAll('h3>a, a.__sri-url')).filter(el => let searchResults = Array.from(document.querySelectorAll('h3>a, a.__sri-url')).filter(el =>
el.href?.includes('.fandom.com') || el.href?.includes('.fandom.com') ||
el.href?.includes('.wiki.fextralife.com') || el.href?.includes('.wiki.fextralife.com') ||
el.href?.includes('.neoseeker.com/wiki/')); el.href?.includes('.neoseeker.com/wiki/'));
filterSearchResults(searchResults, 'kagi', storage); filterSearchResults(searchResults, 'kagi', storage);
} }
@ -826,17 +837,17 @@ function startFiltering(searchEngine, storage, mutations = null, observer = null
if (storage.customSearchEngines) { if (storage.customSearchEngines) {
function filterSearXNG() { function filterSearXNG() {
let searchResults = Array.from(document.querySelectorAll('h3>a')).filter(el => let searchResults = Array.from(document.querySelectorAll('h3>a')).filter(el =>
el.href?.includes('.fandom.com') || el.href?.includes('.fandom.com') ||
el.href?.includes('.wiki.fextralife.com') || el.href?.includes('.wiki.fextralife.com') ||
el.href?.includes('.neoseeker.com/wiki/')); el.href?.includes('.neoseeker.com/wiki/'));
filterSearchResults(searchResults, 'searxng', storage); filterSearchResults(searchResults, 'searxng', storage);
} }
function filterWhoogle() { function filterWhoogle() {
let searchResults = Array.from(document.querySelectorAll('div>a')).filter(el => let searchResults = Array.from(document.querySelectorAll('div>a')).filter(el =>
el.href?.includes('.fandom.com') || el.href?.includes('.fandom.com') ||
el.href?.includes('.wiki.fextralife.com') || el.href?.includes('.wiki.fextralife.com') ||
el.href?.includes('.neoseeker.com/wiki/')); el.href?.includes('.neoseeker.com/wiki/'));
filterSearchResults(searchResults, 'whoogle', storage); filterSearchResults(searchResults, 'whoogle', storage);
} }
@ -861,7 +872,7 @@ function startFiltering(searchEngine, storage, mutations = null, observer = null
// Check if user has enabled filtering for the current search engine // Check if user has enabled filtering for the current search engine
// If so, call startFiltering function to start filtering process // If so, call startFiltering function to start filtering process
function checkIfEnabled(searchEngine) { function checkIfEnabled(searchEngine) {
extensionAPI.runtime.sendMessage({action: 'getStorage'}, (storage) => { extensionAPI.runtime.sendMessage({ action: 'getStorage' }, (storage) => {
searchEngineToggles = storage.searchEngineToggles || {}; searchEngineToggles = storage.searchEngineToggles || {};
if (searchEngineToggles[searchEngine] === 'on' || !searchEngineToggles.hasOwnProperty(searchEngine)) { if (searchEngineToggles[searchEngine] === 'on' || !searchEngineToggles.hasOwnProperty(searchEngine)) {
startFiltering(searchEngine, storage); startFiltering(searchEngine, storage);