Improve search filtering, esp. on Google and Bing
For Google, fixed issue with filtering on Google's country-TLDs, and improved selectors for identifying link results. This includes a function to identify the closest possible result container. For Bing, fixed an issue for when anchor tags don't have an href attribute.pull/464/head
parent
b81c54fdc7
commit
356363d4a0
|
@ -55,11 +55,11 @@ async function commonFunctionFindMatchingSite(site, crossLanguageSetting) {
|
|||
let matchingSite = commonFunctionGetSiteDataByOrigin().then(sites => {
|
||||
let matchingSites = [];
|
||||
if (crossLanguageSetting === 'on') {
|
||||
matchingSites = sites.filter(el => site.replace(/^https?:\/\//, '').startsWith(el.origin_base_url));
|
||||
matchingSites = sites.filter(el => site.replace(/.*https?:\/\//, '').startsWith(el.origin_base_url));
|
||||
} else {
|
||||
matchingSites = sites.filter(el =>
|
||||
site.replace(/^https?:\/\//, '').startsWith(el.origin_base_url + el.origin_content_path)
|
||||
|| site.replace(/^https?:\/\//, '') === el.origin_base_url
|
||||
site.replace(/.*https?:\/\//, '').startsWith(el.origin_base_url + el.origin_content_path)
|
||||
|| site.replace(/.*https?:\/\//, '') === el.origin_base_url
|
||||
);
|
||||
}
|
||||
if (matchingSites.length > 0) {
|
||||
|
@ -80,7 +80,8 @@ async function commonFunctionFindMatchingSite(site, crossLanguageSetting) {
|
|||
}
|
||||
|
||||
function commonFunctionGetOriginArticle(originURL, matchingSite) {
|
||||
return decodeURIComponent(originURL.split(matchingSite['origin_base_url'] + matchingSite['origin_content_path'])[1] || '');
|
||||
let url = new URL(originURL);
|
||||
return decodeURIComponent(String(url.pathname).split(matchingSite['origin_content_path'])[1] || '');
|
||||
}
|
||||
|
||||
function commonFunctionGetDestinationArticle(matchingSite, article) {
|
||||
|
|
|
@ -347,22 +347,64 @@ function hideSearchResults(searchResultContainer, searchEngine, site, showBanner
|
|||
return 0;
|
||||
}
|
||||
|
||||
function filterSearchResults(searchResults, searchEngine, storage) {
|
||||
commonFunctionGetSiteDataByOrigin().then(async sites => {
|
||||
function getDistance(child, parent) {
|
||||
let distance = 0;
|
||||
|
||||
while (parent !== child) {
|
||||
child = child.parentNode;
|
||||
distance++;
|
||||
if (!child) break;
|
||||
}
|
||||
|
||||
return distance;
|
||||
}
|
||||
|
||||
function findClosestElement(target, elements) {
|
||||
let closestElement = null;
|
||||
let closestDistance = Infinity;
|
||||
|
||||
elements.forEach(element => {
|
||||
const distance = element?.contains(target) ? getDistance(target, element) : Infinity;
|
||||
|
||||
if (distance < closestDistance) {
|
||||
closestDistance = distance;
|
||||
closestElement = element;
|
||||
}
|
||||
});
|
||||
|
||||
return closestElement;
|
||||
}
|
||||
|
||||
async function filterSearchResults(searchResults, searchEngine, storage) {
|
||||
let countFiltered = 0;
|
||||
|
||||
for (let searchResult of searchResults) {
|
||||
try {
|
||||
// Check that result isn't within another result
|
||||
if (!searchResult.closest('.iwb-detected')) {
|
||||
let searchResultLink = '';
|
||||
searchResultLink = searchResult.closest('a[href]').href;
|
||||
searchResultLink = searchResult.href || '';
|
||||
|
||||
if (!searchResultLink) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (searchEngine === 'google') {
|
||||
// Break if image result:
|
||||
if (searchResultLink.includes('imgurl=')) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Skip if result doesn't include specific tags/attributes
|
||||
// This helps avoid capturing unintended image results
|
||||
if (!(
|
||||
searchResult.querySelector('h1') ||
|
||||
searchResult.querySelector('h3') ||
|
||||
searchResult.querySelector('cite') ||
|
||||
searchResult.querySelector("div[role='link']")))
|
||||
{
|
||||
searchResult.classList.add('iwb-detected');
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
let crossLanguageSetting = storage.crossLanguage || 'off';
|
||||
|
@ -400,9 +442,12 @@ function filterSearchResults(searchResults, searchEngine, storage) {
|
|||
}
|
||||
|
||||
let searchResultContainer = null;
|
||||
|
||||
switch (searchEngine) {
|
||||
case 'google':
|
||||
searchResultContainer = searchResult.closest('div[data-hveid].g') || searchResult.closest('div[data-hveid]');
|
||||
const closestJsController = searchResult.closest('div[jscontroller]');
|
||||
const closestDataDiv = searchResult.closest('div[data-hveid].g') || searchResult.closest('div[data-hveid]');
|
||||
searchResultContainer = findClosestElement(searchResult, [closestJsController, closestDataDiv]);
|
||||
break;
|
||||
case 'bing':
|
||||
searchResultContainer = searchResult.closest('li.b_algo');
|
||||
|
@ -443,7 +488,6 @@ function filterSearchResults(searchResults, searchEngine, storage) {
|
|||
if (countFiltered > 0) {
|
||||
chrome.storage.sync.set({ 'countSearchFilters': (storage.countSearchFilters ?? 0) + countFiltered });
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
function main(mutations = null, observer = null) {
|
||||
|
@ -459,7 +503,7 @@ function main(mutations = null, observer = null) {
|
|||
if (currentURL.hostname.includes('www.google.')) {
|
||||
// Function to filter search results in Google
|
||||
function filterGoogle() {
|
||||
let searchResults = document.querySelectorAll("div[data-hveid] a[href*='fandom.com']:first-of-type:not([role='button']):not([target]), div[data-hveid] a[href*='fextralife.com']:first-of-type:not([role='button']):not([target])");
|
||||
let searchResults = document.querySelectorAll("div[data-hveid] a[href*='.fandom.com/']:first-of-type:not([role='button']):not([target='_self']), div[data-hveid] a[href*='.fextralife.com/']:first-of-type:not([role='button']):not([target='_self'])");
|
||||
filterSearchResults(searchResults, 'google', storage);
|
||||
}
|
||||
|
||||
|
@ -496,6 +540,7 @@ function main(mutations = null, observer = null) {
|
|||
let searchResultsEncoded = document.querySelectorAll('li.b_algo a');
|
||||
let searchResults = [];
|
||||
searchResultsEncoded.forEach((searchResult) => {
|
||||
if (searchResult.href) {
|
||||
const encodedLink = new URL(searchResult.href);
|
||||
if (encodedLink.href.includes('https://www.bing.com/ck/')) {
|
||||
try {
|
||||
|
@ -510,6 +555,7 @@ function main(mutations = null, observer = null) {
|
|||
} else {
|
||||
searchResults.push(searchResult);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
filterSearchResults(searchResults, 'bing', storage);
|
||||
|
@ -545,7 +591,7 @@ function main(mutations = null, observer = null) {
|
|||
} else if (currentURL.hostname.includes('ecosia.org')) {
|
||||
// Function to filter search results in Ecosia
|
||||
function filterEcosia() {
|
||||
let searchResults = Array.from(document.querySelectorAll('section.mainline .result__title a.result__link')).filter(el => el.href.includes('fandom.com') || el.href.includes('fextralife.com'));
|
||||
let searchResults = Array.from(document.querySelectorAll('section.mainline .result__title a.result__link')).filter(el => el.href?.includes('fandom.com') || el.href?.includes('fextralife.com'));
|
||||
filterSearchResults(searchResults, 'ecosia', storage);
|
||||
}
|
||||
|
||||
|
@ -562,7 +608,7 @@ function main(mutations = null, observer = null) {
|
|||
} else if (currentURL.hostname.includes('startpage.com')) {
|
||||
// Function to filter search results in Startpage
|
||||
function filterStartpage() {
|
||||
let searchResults = Array.from(document.querySelectorAll('a.result-link')).filter(el => el.href.includes('fandom.com') || el.href.includes('fextralife.com'));
|
||||
let searchResults = Array.from(document.querySelectorAll('a.result-link')).filter(el => el.href?.includes('fandom.com') || el.href?.includes('fextralife.com'));
|
||||
filterSearchResults(searchResults, 'startpage', storage);
|
||||
}
|
||||
|
||||
|
@ -579,7 +625,7 @@ function main(mutations = null, observer = null) {
|
|||
} else if (currentURL.hostname.includes('yahoo.com')) {
|
||||
// Function to filter search results in Yahoo
|
||||
function filterYahoo() {
|
||||
let searchResults = Array.from(document.querySelectorAll('#web > ol > li a:not(.thmb), #main-algo section.algo a:not(.thmb)')).filter(el => el.href.includes('fandom.com') || el.href.includes('fextralife.com'));
|
||||
let searchResults = Array.from(document.querySelectorAll('#web > ol > li a:not(.thmb), #main-algo section.algo a:not(.thmb)')).filter(el => el.href?.includes('fandom.com') || el.href?.includes('fextralife.com'));
|
||||
filterSearchResults(searchResults, 'yahoo', storage);
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue