Improve search filtering, esp. on Google and Bing

For Google, fixed issue with filtering on Google's country-TLDs, and improved selectors for identifying link results. This includes a function to identify the closest possible result container. For Bing, fixed an issue for when anchor tags don't have an href attribute.
pull/464/head
Kevin Payravi 2024-02-04 04:19:09 -06:00
parent b81c54fdc7
commit 356363d4a0
2 changed files with 149 additions and 102 deletions

View File

@ -55,11 +55,11 @@ async function commonFunctionFindMatchingSite(site, crossLanguageSetting) {
let matchingSite = commonFunctionGetSiteDataByOrigin().then(sites => { let matchingSite = commonFunctionGetSiteDataByOrigin().then(sites => {
let matchingSites = []; let matchingSites = [];
if (crossLanguageSetting === 'on') { if (crossLanguageSetting === 'on') {
matchingSites = sites.filter(el => site.replace(/^https?:\/\//, '').startsWith(el.origin_base_url)); matchingSites = sites.filter(el => site.replace(/.*https?:\/\//, '').startsWith(el.origin_base_url));
} else { } else {
matchingSites = sites.filter(el => matchingSites = sites.filter(el =>
site.replace(/^https?:\/\//, '').startsWith(el.origin_base_url + el.origin_content_path) site.replace(/.*https?:\/\//, '').startsWith(el.origin_base_url + el.origin_content_path)
|| site.replace(/^https?:\/\//, '') === el.origin_base_url || site.replace(/.*https?:\/\//, '') === el.origin_base_url
); );
} }
if (matchingSites.length > 0) { if (matchingSites.length > 0) {
@ -80,7 +80,8 @@ async function commonFunctionFindMatchingSite(site, crossLanguageSetting) {
} }
function commonFunctionGetOriginArticle(originURL, matchingSite) { function commonFunctionGetOriginArticle(originURL, matchingSite) {
return decodeURIComponent(originURL.split(matchingSite['origin_base_url'] + matchingSite['origin_content_path'])[1] || ''); let url = new URL(originURL);
return decodeURIComponent(String(url.pathname).split(matchingSite['origin_content_path'])[1] || '');
} }
function commonFunctionGetDestinationArticle(matchingSite, article) { function commonFunctionGetDestinationArticle(matchingSite, article) {

View File

@ -347,103 +347,147 @@ function hideSearchResults(searchResultContainer, searchEngine, site, showBanner
return 0; return 0;
} }
function filterSearchResults(searchResults, searchEngine, storage) { function getDistance(child, parent) {
commonFunctionGetSiteDataByOrigin().then(async sites => { let distance = 0;
let countFiltered = 0;
for (let searchResult of searchResults) { while (parent !== child) {
try { child = child.parentNode;
// Check that result isn't within another result distance++;
if (!searchResult.closest('.iwb-detected')) { if (!child) break;
let searchResultLink = ''; }
searchResultLink = searchResult.closest('a[href]').href;
if (searchEngine === 'google') { return distance;
// Break if image result: }
if (searchResultLink.includes('imgurl=')) {
break; function findClosestElement(target, elements) {
} let closestElement = null;
let closestDistance = Infinity;
elements.forEach(element => {
const distance = element?.contains(target) ? getDistance(target, element) : Infinity;
if (distance < closestDistance) {
closestDistance = distance;
closestElement = element;
}
});
return closestElement;
}
async function filterSearchResults(searchResults, searchEngine, storage) {
let countFiltered = 0;
for (let searchResult of searchResults) {
try {
// Check that result isn't within another result
if (!searchResult.closest('.iwb-detected')) {
searchResultLink = searchResult.href || '';
if (!searchResultLink) {
continue;
}
if (searchEngine === 'google') {
// Break if image result:
if (searchResultLink.includes('imgurl=')) {
break;
} }
let crossLanguageSetting = storage.crossLanguage || 'off'; // Skip if result doesn't include specific tags/attributes
let matchingSite = await commonFunctionFindMatchingSite(searchResultLink, crossLanguageSetting); // This helps avoid capturing unintended image results
if (matchingSite) { if (!(
// Get user's settings for the wiki searchResult.querySelector('h1') ||
let id = matchingSite['id']; searchResult.querySelector('h3') ||
let searchFilterSetting = 'replace'; searchResult.querySelector('cite') ||
if (storage.searchEngineSettings && storage.searchEngineSettings[id]) { searchResult.querySelector("div[role='link']")))
searchFilterSetting = storage.searchEngineSettings[id]; {
} else if (storage.defaultSearchAction) { searchResult.classList.add('iwb-detected');
searchFilterSetting = storage.defaultSearchAction; continue;
}
}
let crossLanguageSetting = storage.crossLanguage || 'off';
let matchingSite = await commonFunctionFindMatchingSite(searchResultLink, crossLanguageSetting);
if (matchingSite) {
// Get user's settings for the wiki
let id = matchingSite['id'];
let searchFilterSetting = 'replace';
if (storage.searchEngineSettings && storage.searchEngineSettings[id]) {
searchFilterSetting = storage.searchEngineSettings[id];
} else if (storage.defaultSearchAction) {
searchFilterSetting = storage.defaultSearchAction;
}
if (searchFilterSetting !== 'disabled') {
// Output stylesheet if not already done
if (filteredWikis.length === 0) {
// Wait for head to be available
const headElement = document.querySelector('head');
if (headElement && !document.querySelector('.iwb-styles')) {
insertCSS();
} else {
const docObserver = new MutationObserver((mutations, mutationInstance) => {
const headElement = document.querySelector('head');
if (headElement && !document.querySelector('.iwb-styles')) {
insertCSS();
mutationInstance.disconnect();
}
});
docObserver.observe(document, {
childList: true,
subtree: true
});
}
} }
if (searchFilterSetting !== 'disabled') { let searchResultContainer = null;
// Output stylesheet if not already done
if (filteredWikis.length === 0) {
// Wait for head to be available
const headElement = document.querySelector('head');
if (headElement && !document.querySelector('.iwb-styles')) {
insertCSS();
} else {
const docObserver = new MutationObserver((mutations, mutationInstance) => {
const headElement = document.querySelector('head');
if (headElement && !document.querySelector('.iwb-styles')) {
insertCSS();
mutationInstance.disconnect();
}
});
docObserver.observe(document, {
childList: true,
subtree: true
});
}
}
let searchResultContainer = null; switch (searchEngine) {
switch (searchEngine) { case 'google':
case 'google': const closestJsController = searchResult.closest('div[jscontroller]');
searchResultContainer = searchResult.closest('div[data-hveid].g') || searchResult.closest('div[data-hveid]'); const closestDataDiv = searchResult.closest('div[data-hveid].g') || searchResult.closest('div[data-hveid]');
break; searchResultContainer = findClosestElement(searchResult, [closestJsController, closestDataDiv]);
case 'bing': break;
searchResultContainer = searchResult.closest('li.b_algo'); case 'bing':
break; searchResultContainer = searchResult.closest('li.b_algo');
case 'duckduckgo': break;
searchResultContainer = searchResult.closest('li[data-layout], div.web-result'); case 'duckduckgo':
break; searchResultContainer = searchResult.closest('li[data-layout], div.web-result');
case 'brave': break;
searchResultContainer = searchResult.closest('div.snippet'); case 'brave':
break; searchResultContainer = searchResult.closest('div.snippet');
case 'ecosia': break;
searchResultContainer = searchResult.closest('div.mainline__result-wrapper article div.result__body'); case 'ecosia':
break; searchResultContainer = searchResult.closest('div.mainline__result-wrapper article div.result__body');
case 'startpage': break;
searchResultContainer = searchResult.closest('div.w-gl__result'); case 'startpage':
break; searchResultContainer = searchResult.closest('div.w-gl__result');
case 'yahoo': break;
searchResultContainer = searchResult.closest('#web > ol > li div.itm .exp, #web > ol > li div.algo, #web > ol > li, section.algo'); case 'yahoo':
break; searchResultContainer = searchResult.closest('#web > ol > li div.itm .exp, #web > ol > li div.algo, #web > ol > li, section.algo');
default: break;
} default:
}
if (searchResultContainer) { if (searchResultContainer) {
if (searchFilterSetting === 'hide') { if (searchFilterSetting === 'hide') {
countFiltered += hideSearchResults(searchResultContainer, searchEngine, matchingSite, storage['hiddenResultsBanner']); countFiltered += hideSearchResults(searchResultContainer, searchEngine, matchingSite, storage['hiddenResultsBanner']);
} else { } else {
countFiltered += replaceSearchResults(searchResultContainer, matchingSite, searchResultLink); countFiltered += replaceSearchResults(searchResultContainer, matchingSite, searchResultLink);
}
} }
} }
} }
} }
} catch (e) {
console.log('Indie Wiki Buddy failed to properly parse search results with error: ' + e);
} }
}; } catch (e) {
addLocationObserver(main); console.log('Indie Wiki Buddy failed to properly parse search results with error: ' + e);
if (countFiltered > 0) {
chrome.storage.sync.set({ 'countSearchFilters': (storage.countSearchFilters ?? 0) + countFiltered });
} }
}); };
addLocationObserver(main);
if (countFiltered > 0) {
chrome.storage.sync.set({ 'countSearchFilters': (storage.countSearchFilters ?? 0) + countFiltered });
}
} }
function main(mutations = null, observer = null) { function main(mutations = null, observer = null) {
@ -459,7 +503,7 @@ function main(mutations = null, observer = null) {
if (currentURL.hostname.includes('www.google.')) { if (currentURL.hostname.includes('www.google.')) {
// Function to filter search results in Google // Function to filter search results in Google
function filterGoogle() { function filterGoogle() {
let searchResults = document.querySelectorAll("div[data-hveid] a[href*='fandom.com']:first-of-type:not([role='button']):not([target]), div[data-hveid] a[href*='fextralife.com']:first-of-type:not([role='button']):not([target])"); let searchResults = document.querySelectorAll("div[data-hveid] a[href*='.fandom.com/']:first-of-type:not([role='button']):not([target='_self']), div[data-hveid] a[href*='.fextralife.com/']:first-of-type:not([role='button']):not([target='_self'])");
filterSearchResults(searchResults, 'google', storage); filterSearchResults(searchResults, 'google', storage);
} }
@ -496,19 +540,21 @@ function main(mutations = null, observer = null) {
let searchResultsEncoded = document.querySelectorAll('li.b_algo a'); let searchResultsEncoded = document.querySelectorAll('li.b_algo a');
let searchResults = []; let searchResults = [];
searchResultsEncoded.forEach((searchResult) => { searchResultsEncoded.forEach((searchResult) => {
const encodedLink = new URL(searchResult.href); if (searchResult.href) {
if (encodedLink.href.includes('https://www.bing.com/ck/')) { const encodedLink = new URL(searchResult.href);
try { if (encodedLink.href.includes('https://www.bing.com/ck/')) {
let decodedLink = base64Decode(encodedLink.searchParams.get('u').replace(/^a1/, '')); try {
if (decodedLink.includes('fandom.com') || decodedLink.includes('fextralife.com')) { let decodedLink = base64Decode(encodedLink.searchParams.get('u').replace(/^a1/, ''));
searchResult.href = decodedLink; if (decodedLink.includes('fandom.com') || decodedLink.includes('fextralife.com')) {
searchResults.push(searchResult); searchResult.href = decodedLink;
searchResults.push(searchResult);
}
} catch (e) {
console.log('Indie Wiki Buddy failed to parse Bing link with error: ', e);
} }
} catch (e) { } else {
console.log('Indie Wiki Buddy failed to parse Bing link with error: ', e); searchResults.push(searchResult);
} }
} else {
searchResults.push(searchResult);
} }
}); });
@ -545,7 +591,7 @@ function main(mutations = null, observer = null) {
} else if (currentURL.hostname.includes('ecosia.org')) { } else if (currentURL.hostname.includes('ecosia.org')) {
// Function to filter search results in Ecosia // Function to filter search results in Ecosia
function filterEcosia() { function filterEcosia() {
let searchResults = Array.from(document.querySelectorAll('section.mainline .result__title a.result__link')).filter(el => el.href.includes('fandom.com') || el.href.includes('fextralife.com')); let searchResults = Array.from(document.querySelectorAll('section.mainline .result__title a.result__link')).filter(el => el.href?.includes('fandom.com') || el.href?.includes('fextralife.com'));
filterSearchResults(searchResults, 'ecosia', storage); filterSearchResults(searchResults, 'ecosia', storage);
} }
@ -562,7 +608,7 @@ function main(mutations = null, observer = null) {
} else if (currentURL.hostname.includes('startpage.com')) { } else if (currentURL.hostname.includes('startpage.com')) {
// Function to filter search results in Startpage // Function to filter search results in Startpage
function filterStartpage() { function filterStartpage() {
let searchResults = Array.from(document.querySelectorAll('a.result-link')).filter(el => el.href.includes('fandom.com') || el.href.includes('fextralife.com')); let searchResults = Array.from(document.querySelectorAll('a.result-link')).filter(el => el.href?.includes('fandom.com') || el.href?.includes('fextralife.com'));
filterSearchResults(searchResults, 'startpage', storage); filterSearchResults(searchResults, 'startpage', storage);
} }
@ -579,7 +625,7 @@ function main(mutations = null, observer = null) {
} else if (currentURL.hostname.includes('yahoo.com')) { } else if (currentURL.hostname.includes('yahoo.com')) {
// Function to filter search results in Yahoo // Function to filter search results in Yahoo
function filterYahoo() { function filterYahoo() {
let searchResults = Array.from(document.querySelectorAll('#web > ol > li a:not(.thmb), #main-algo section.algo a:not(.thmb)')).filter(el => el.href.includes('fandom.com') || el.href.includes('fextralife.com')); let searchResults = Array.from(document.querySelectorAll('#web > ol > li a:not(.thmb), #main-algo section.algo a:not(.thmb)')).filter(el => el.href?.includes('fandom.com') || el.href?.includes('fextralife.com'));
filterSearchResults(searchResults, 'yahoo', storage); filterSearchResults(searchResults, 'yahoo', storage);
} }