Improve search filtering, esp. on Google and Bing

For Google, fixed issue with filtering on Google's country-TLDs, and improved selectors for identifying link results. This includes a function to identify the closest possible result container. For Bing, fixed an issue for when anchor tags don't have an href attribute.
pull/464/head
Kevin Payravi 2024-02-04 04:19:09 -06:00
parent b81c54fdc7
commit 356363d4a0
2 changed files with 149 additions and 102 deletions

View File

@ -55,11 +55,11 @@ async function commonFunctionFindMatchingSite(site, crossLanguageSetting) {
let matchingSite = commonFunctionGetSiteDataByOrigin().then(sites => {
let matchingSites = [];
if (crossLanguageSetting === 'on') {
matchingSites = sites.filter(el => site.replace(/^https?:\/\//, '').startsWith(el.origin_base_url));
matchingSites = sites.filter(el => site.replace(/.*https?:\/\//, '').startsWith(el.origin_base_url));
} else {
matchingSites = sites.filter(el =>
site.replace(/^https?:\/\//, '').startsWith(el.origin_base_url + el.origin_content_path)
|| site.replace(/^https?:\/\//, '') === el.origin_base_url
site.replace(/.*https?:\/\//, '').startsWith(el.origin_base_url + el.origin_content_path)
|| site.replace(/.*https?:\/\//, '') === el.origin_base_url
);
}
if (matchingSites.length > 0) {
@ -80,7 +80,8 @@ async function commonFunctionFindMatchingSite(site, crossLanguageSetting) {
}
function commonFunctionGetOriginArticle(originURL, matchingSite) {
return decodeURIComponent(originURL.split(matchingSite['origin_base_url'] + matchingSite['origin_content_path'])[1] || '');
let url = new URL(originURL);
return decodeURIComponent(String(url.pathname).split(matchingSite['origin_content_path'])[1] || '');
}
function commonFunctionGetDestinationArticle(matchingSite, article) {

View File

@ -347,22 +347,64 @@ function hideSearchResults(searchResultContainer, searchEngine, site, showBanner
return 0;
}
function filterSearchResults(searchResults, searchEngine, storage) {
commonFunctionGetSiteDataByOrigin().then(async sites => {
function getDistance(child, parent) {
let distance = 0;
while (parent !== child) {
child = child.parentNode;
distance++;
if (!child) break;
}
return distance;
}
function findClosestElement(target, elements) {
let closestElement = null;
let closestDistance = Infinity;
elements.forEach(element => {
const distance = element?.contains(target) ? getDistance(target, element) : Infinity;
if (distance < closestDistance) {
closestDistance = distance;
closestElement = element;
}
});
return closestElement;
}
async function filterSearchResults(searchResults, searchEngine, storage) {
let countFiltered = 0;
for (let searchResult of searchResults) {
try {
// Check that result isn't within another result
if (!searchResult.closest('.iwb-detected')) {
let searchResultLink = '';
searchResultLink = searchResult.closest('a[href]').href;
searchResultLink = searchResult.href || '';
if (!searchResultLink) {
continue;
}
if (searchEngine === 'google') {
// Break if image result:
if (searchResultLink.includes('imgurl=')) {
break;
}
// Skip if result doesn't include specific tags/attributes
// This helps avoid capturing unintended image results
if (!(
searchResult.querySelector('h1') ||
searchResult.querySelector('h3') ||
searchResult.querySelector('cite') ||
searchResult.querySelector("div[role='link']")))
{
searchResult.classList.add('iwb-detected');
continue;
}
}
let crossLanguageSetting = storage.crossLanguage || 'off';
@ -400,9 +442,12 @@ function filterSearchResults(searchResults, searchEngine, storage) {
}
let searchResultContainer = null;
switch (searchEngine) {
case 'google':
searchResultContainer = searchResult.closest('div[data-hveid].g') || searchResult.closest('div[data-hveid]');
const closestJsController = searchResult.closest('div[jscontroller]');
const closestDataDiv = searchResult.closest('div[data-hveid].g') || searchResult.closest('div[data-hveid]');
searchResultContainer = findClosestElement(searchResult, [closestJsController, closestDataDiv]);
break;
case 'bing':
searchResultContainer = searchResult.closest('li.b_algo');
@ -443,7 +488,6 @@ function filterSearchResults(searchResults, searchEngine, storage) {
if (countFiltered > 0) {
chrome.storage.sync.set({ 'countSearchFilters': (storage.countSearchFilters ?? 0) + countFiltered });
}
});
}
function main(mutations = null, observer = null) {
@ -459,7 +503,7 @@ function main(mutations = null, observer = null) {
if (currentURL.hostname.includes('www.google.')) {
// Function to filter search results in Google
function filterGoogle() {
let searchResults = document.querySelectorAll("div[data-hveid] a[href*='fandom.com']:first-of-type:not([role='button']):not([target]), div[data-hveid] a[href*='fextralife.com']:first-of-type:not([role='button']):not([target])");
let searchResults = document.querySelectorAll("div[data-hveid] a[href*='.fandom.com/']:first-of-type:not([role='button']):not([target='_self']), div[data-hveid] a[href*='.fextralife.com/']:first-of-type:not([role='button']):not([target='_self'])");
filterSearchResults(searchResults, 'google', storage);
}
@ -496,6 +540,7 @@ function main(mutations = null, observer = null) {
let searchResultsEncoded = document.querySelectorAll('li.b_algo a');
let searchResults = [];
searchResultsEncoded.forEach((searchResult) => {
if (searchResult.href) {
const encodedLink = new URL(searchResult.href);
if (encodedLink.href.includes('https://www.bing.com/ck/')) {
try {
@ -510,6 +555,7 @@ function main(mutations = null, observer = null) {
} else {
searchResults.push(searchResult);
}
}
});
filterSearchResults(searchResults, 'bing', storage);
@ -545,7 +591,7 @@ function main(mutations = null, observer = null) {
} else if (currentURL.hostname.includes('ecosia.org')) {
// Function to filter search results in Ecosia
function filterEcosia() {
let searchResults = Array.from(document.querySelectorAll('section.mainline .result__title a.result__link')).filter(el => el.href.includes('fandom.com') || el.href.includes('fextralife.com'));
let searchResults = Array.from(document.querySelectorAll('section.mainline .result__title a.result__link')).filter(el => el.href?.includes('fandom.com') || el.href?.includes('fextralife.com'));
filterSearchResults(searchResults, 'ecosia', storage);
}
@ -562,7 +608,7 @@ function main(mutations = null, observer = null) {
} else if (currentURL.hostname.includes('startpage.com')) {
// Function to filter search results in Startpage
function filterStartpage() {
let searchResults = Array.from(document.querySelectorAll('a.result-link')).filter(el => el.href.includes('fandom.com') || el.href.includes('fextralife.com'));
let searchResults = Array.from(document.querySelectorAll('a.result-link')).filter(el => el.href?.includes('fandom.com') || el.href?.includes('fextralife.com'));
filterSearchResults(searchResults, 'startpage', storage);
}
@ -579,7 +625,7 @@ function main(mutations = null, observer = null) {
} else if (currentURL.hostname.includes('yahoo.com')) {
// Function to filter search results in Yahoo
function filterYahoo() {
let searchResults = Array.from(document.querySelectorAll('#web > ol > li a:not(.thmb), #main-algo section.algo a:not(.thmb)')).filter(el => el.href.includes('fandom.com') || el.href.includes('fextralife.com'));
let searchResults = Array.from(document.querySelectorAll('#web > ol > li a:not(.thmb), #main-algo section.algo a:not(.thmb)')).filter(el => el.href?.includes('fandom.com') || el.href?.includes('fextralife.com'));
filterSearchResults(searchResults, 'yahoo', storage);
}