Improve search filtering, esp. on Google and Bing
For Google, fixed issue with filtering on Google's country-TLDs, and improved selectors for identifying link results. This includes a function to identify the closest possible result container. For Bing, fixed an issue for when anchor tags don't have an href attribute.pull/464/head
parent
b81c54fdc7
commit
356363d4a0
|
@ -55,11 +55,11 @@ async function commonFunctionFindMatchingSite(site, crossLanguageSetting) {
|
||||||
let matchingSite = commonFunctionGetSiteDataByOrigin().then(sites => {
|
let matchingSite = commonFunctionGetSiteDataByOrigin().then(sites => {
|
||||||
let matchingSites = [];
|
let matchingSites = [];
|
||||||
if (crossLanguageSetting === 'on') {
|
if (crossLanguageSetting === 'on') {
|
||||||
matchingSites = sites.filter(el => site.replace(/^https?:\/\//, '').startsWith(el.origin_base_url));
|
matchingSites = sites.filter(el => site.replace(/.*https?:\/\//, '').startsWith(el.origin_base_url));
|
||||||
} else {
|
} else {
|
||||||
matchingSites = sites.filter(el =>
|
matchingSites = sites.filter(el =>
|
||||||
site.replace(/^https?:\/\//, '').startsWith(el.origin_base_url + el.origin_content_path)
|
site.replace(/.*https?:\/\//, '').startsWith(el.origin_base_url + el.origin_content_path)
|
||||||
|| site.replace(/^https?:\/\//, '') === el.origin_base_url
|
|| site.replace(/.*https?:\/\//, '') === el.origin_base_url
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
if (matchingSites.length > 0) {
|
if (matchingSites.length > 0) {
|
||||||
|
@ -80,7 +80,8 @@ async function commonFunctionFindMatchingSite(site, crossLanguageSetting) {
|
||||||
}
|
}
|
||||||
|
|
||||||
function commonFunctionGetOriginArticle(originURL, matchingSite) {
|
function commonFunctionGetOriginArticle(originURL, matchingSite) {
|
||||||
return decodeURIComponent(originURL.split(matchingSite['origin_base_url'] + matchingSite['origin_content_path'])[1] || '');
|
let url = new URL(originURL);
|
||||||
|
return decodeURIComponent(String(url.pathname).split(matchingSite['origin_content_path'])[1] || '');
|
||||||
}
|
}
|
||||||
|
|
||||||
function commonFunctionGetDestinationArticle(matchingSite, article) {
|
function commonFunctionGetDestinationArticle(matchingSite, article) {
|
||||||
|
|
|
@ -347,103 +347,147 @@ function hideSearchResults(searchResultContainer, searchEngine, site, showBanner
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
function filterSearchResults(searchResults, searchEngine, storage) {
|
function getDistance(child, parent) {
|
||||||
commonFunctionGetSiteDataByOrigin().then(async sites => {
|
let distance = 0;
|
||||||
let countFiltered = 0;
|
|
||||||
|
|
||||||
for (let searchResult of searchResults) {
|
while (parent !== child) {
|
||||||
try {
|
child = child.parentNode;
|
||||||
// Check that result isn't within another result
|
distance++;
|
||||||
if (!searchResult.closest('.iwb-detected')) {
|
if (!child) break;
|
||||||
let searchResultLink = '';
|
}
|
||||||
searchResultLink = searchResult.closest('a[href]').href;
|
|
||||||
|
|
||||||
if (searchEngine === 'google') {
|
return distance;
|
||||||
// Break if image result:
|
}
|
||||||
if (searchResultLink.includes('imgurl=')) {
|
|
||||||
break;
|
function findClosestElement(target, elements) {
|
||||||
}
|
let closestElement = null;
|
||||||
|
let closestDistance = Infinity;
|
||||||
|
|
||||||
|
elements.forEach(element => {
|
||||||
|
const distance = element?.contains(target) ? getDistance(target, element) : Infinity;
|
||||||
|
|
||||||
|
if (distance < closestDistance) {
|
||||||
|
closestDistance = distance;
|
||||||
|
closestElement = element;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
return closestElement;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function filterSearchResults(searchResults, searchEngine, storage) {
|
||||||
|
let countFiltered = 0;
|
||||||
|
|
||||||
|
for (let searchResult of searchResults) {
|
||||||
|
try {
|
||||||
|
// Check that result isn't within another result
|
||||||
|
if (!searchResult.closest('.iwb-detected')) {
|
||||||
|
searchResultLink = searchResult.href || '';
|
||||||
|
|
||||||
|
if (!searchResultLink) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (searchEngine === 'google') {
|
||||||
|
// Break if image result:
|
||||||
|
if (searchResultLink.includes('imgurl=')) {
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
let crossLanguageSetting = storage.crossLanguage || 'off';
|
// Skip if result doesn't include specific tags/attributes
|
||||||
let matchingSite = await commonFunctionFindMatchingSite(searchResultLink, crossLanguageSetting);
|
// This helps avoid capturing unintended image results
|
||||||
if (matchingSite) {
|
if (!(
|
||||||
// Get user's settings for the wiki
|
searchResult.querySelector('h1') ||
|
||||||
let id = matchingSite['id'];
|
searchResult.querySelector('h3') ||
|
||||||
let searchFilterSetting = 'replace';
|
searchResult.querySelector('cite') ||
|
||||||
if (storage.searchEngineSettings && storage.searchEngineSettings[id]) {
|
searchResult.querySelector("div[role='link']")))
|
||||||
searchFilterSetting = storage.searchEngineSettings[id];
|
{
|
||||||
} else if (storage.defaultSearchAction) {
|
searchResult.classList.add('iwb-detected');
|
||||||
searchFilterSetting = storage.defaultSearchAction;
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let crossLanguageSetting = storage.crossLanguage || 'off';
|
||||||
|
let matchingSite = await commonFunctionFindMatchingSite(searchResultLink, crossLanguageSetting);
|
||||||
|
if (matchingSite) {
|
||||||
|
// Get user's settings for the wiki
|
||||||
|
let id = matchingSite['id'];
|
||||||
|
let searchFilterSetting = 'replace';
|
||||||
|
if (storage.searchEngineSettings && storage.searchEngineSettings[id]) {
|
||||||
|
searchFilterSetting = storage.searchEngineSettings[id];
|
||||||
|
} else if (storage.defaultSearchAction) {
|
||||||
|
searchFilterSetting = storage.defaultSearchAction;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (searchFilterSetting !== 'disabled') {
|
||||||
|
// Output stylesheet if not already done
|
||||||
|
if (filteredWikis.length === 0) {
|
||||||
|
// Wait for head to be available
|
||||||
|
const headElement = document.querySelector('head');
|
||||||
|
if (headElement && !document.querySelector('.iwb-styles')) {
|
||||||
|
insertCSS();
|
||||||
|
} else {
|
||||||
|
const docObserver = new MutationObserver((mutations, mutationInstance) => {
|
||||||
|
const headElement = document.querySelector('head');
|
||||||
|
if (headElement && !document.querySelector('.iwb-styles')) {
|
||||||
|
insertCSS();
|
||||||
|
mutationInstance.disconnect();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
docObserver.observe(document, {
|
||||||
|
childList: true,
|
||||||
|
subtree: true
|
||||||
|
});
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (searchFilterSetting !== 'disabled') {
|
let searchResultContainer = null;
|
||||||
// Output stylesheet if not already done
|
|
||||||
if (filteredWikis.length === 0) {
|
|
||||||
// Wait for head to be available
|
|
||||||
const headElement = document.querySelector('head');
|
|
||||||
if (headElement && !document.querySelector('.iwb-styles')) {
|
|
||||||
insertCSS();
|
|
||||||
} else {
|
|
||||||
const docObserver = new MutationObserver((mutations, mutationInstance) => {
|
|
||||||
const headElement = document.querySelector('head');
|
|
||||||
if (headElement && !document.querySelector('.iwb-styles')) {
|
|
||||||
insertCSS();
|
|
||||||
mutationInstance.disconnect();
|
|
||||||
}
|
|
||||||
});
|
|
||||||
docObserver.observe(document, {
|
|
||||||
childList: true,
|
|
||||||
subtree: true
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let searchResultContainer = null;
|
switch (searchEngine) {
|
||||||
switch (searchEngine) {
|
case 'google':
|
||||||
case 'google':
|
const closestJsController = searchResult.closest('div[jscontroller]');
|
||||||
searchResultContainer = searchResult.closest('div[data-hveid].g') || searchResult.closest('div[data-hveid]');
|
const closestDataDiv = searchResult.closest('div[data-hveid].g') || searchResult.closest('div[data-hveid]');
|
||||||
break;
|
searchResultContainer = findClosestElement(searchResult, [closestJsController, closestDataDiv]);
|
||||||
case 'bing':
|
break;
|
||||||
searchResultContainer = searchResult.closest('li.b_algo');
|
case 'bing':
|
||||||
break;
|
searchResultContainer = searchResult.closest('li.b_algo');
|
||||||
case 'duckduckgo':
|
break;
|
||||||
searchResultContainer = searchResult.closest('li[data-layout], div.web-result');
|
case 'duckduckgo':
|
||||||
break;
|
searchResultContainer = searchResult.closest('li[data-layout], div.web-result');
|
||||||
case 'brave':
|
break;
|
||||||
searchResultContainer = searchResult.closest('div.snippet');
|
case 'brave':
|
||||||
break;
|
searchResultContainer = searchResult.closest('div.snippet');
|
||||||
case 'ecosia':
|
break;
|
||||||
searchResultContainer = searchResult.closest('div.mainline__result-wrapper article div.result__body');
|
case 'ecosia':
|
||||||
break;
|
searchResultContainer = searchResult.closest('div.mainline__result-wrapper article div.result__body');
|
||||||
case 'startpage':
|
break;
|
||||||
searchResultContainer = searchResult.closest('div.w-gl__result');
|
case 'startpage':
|
||||||
break;
|
searchResultContainer = searchResult.closest('div.w-gl__result');
|
||||||
case 'yahoo':
|
break;
|
||||||
searchResultContainer = searchResult.closest('#web > ol > li div.itm .exp, #web > ol > li div.algo, #web > ol > li, section.algo');
|
case 'yahoo':
|
||||||
break;
|
searchResultContainer = searchResult.closest('#web > ol > li div.itm .exp, #web > ol > li div.algo, #web > ol > li, section.algo');
|
||||||
default:
|
break;
|
||||||
}
|
default:
|
||||||
|
}
|
||||||
|
|
||||||
if (searchResultContainer) {
|
if (searchResultContainer) {
|
||||||
if (searchFilterSetting === 'hide') {
|
if (searchFilterSetting === 'hide') {
|
||||||
countFiltered += hideSearchResults(searchResultContainer, searchEngine, matchingSite, storage['hiddenResultsBanner']);
|
countFiltered += hideSearchResults(searchResultContainer, searchEngine, matchingSite, storage['hiddenResultsBanner']);
|
||||||
} else {
|
} else {
|
||||||
countFiltered += replaceSearchResults(searchResultContainer, matchingSite, searchResultLink);
|
countFiltered += replaceSearchResults(searchResultContainer, matchingSite, searchResultLink);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} catch (e) {
|
|
||||||
console.log('Indie Wiki Buddy failed to properly parse search results with error: ' + e);
|
|
||||||
}
|
}
|
||||||
};
|
} catch (e) {
|
||||||
addLocationObserver(main);
|
console.log('Indie Wiki Buddy failed to properly parse search results with error: ' + e);
|
||||||
if (countFiltered > 0) {
|
|
||||||
chrome.storage.sync.set({ 'countSearchFilters': (storage.countSearchFilters ?? 0) + countFiltered });
|
|
||||||
}
|
}
|
||||||
});
|
};
|
||||||
|
addLocationObserver(main);
|
||||||
|
if (countFiltered > 0) {
|
||||||
|
chrome.storage.sync.set({ 'countSearchFilters': (storage.countSearchFilters ?? 0) + countFiltered });
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function main(mutations = null, observer = null) {
|
function main(mutations = null, observer = null) {
|
||||||
|
@ -459,7 +503,7 @@ function main(mutations = null, observer = null) {
|
||||||
if (currentURL.hostname.includes('www.google.')) {
|
if (currentURL.hostname.includes('www.google.')) {
|
||||||
// Function to filter search results in Google
|
// Function to filter search results in Google
|
||||||
function filterGoogle() {
|
function filterGoogle() {
|
||||||
let searchResults = document.querySelectorAll("div[data-hveid] a[href*='fandom.com']:first-of-type:not([role='button']):not([target]), div[data-hveid] a[href*='fextralife.com']:first-of-type:not([role='button']):not([target])");
|
let searchResults = document.querySelectorAll("div[data-hveid] a[href*='.fandom.com/']:first-of-type:not([role='button']):not([target='_self']), div[data-hveid] a[href*='.fextralife.com/']:first-of-type:not([role='button']):not([target='_self'])");
|
||||||
filterSearchResults(searchResults, 'google', storage);
|
filterSearchResults(searchResults, 'google', storage);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -496,19 +540,21 @@ function main(mutations = null, observer = null) {
|
||||||
let searchResultsEncoded = document.querySelectorAll('li.b_algo a');
|
let searchResultsEncoded = document.querySelectorAll('li.b_algo a');
|
||||||
let searchResults = [];
|
let searchResults = [];
|
||||||
searchResultsEncoded.forEach((searchResult) => {
|
searchResultsEncoded.forEach((searchResult) => {
|
||||||
const encodedLink = new URL(searchResult.href);
|
if (searchResult.href) {
|
||||||
if (encodedLink.href.includes('https://www.bing.com/ck/')) {
|
const encodedLink = new URL(searchResult.href);
|
||||||
try {
|
if (encodedLink.href.includes('https://www.bing.com/ck/')) {
|
||||||
let decodedLink = base64Decode(encodedLink.searchParams.get('u').replace(/^a1/, ''));
|
try {
|
||||||
if (decodedLink.includes('fandom.com') || decodedLink.includes('fextralife.com')) {
|
let decodedLink = base64Decode(encodedLink.searchParams.get('u').replace(/^a1/, ''));
|
||||||
searchResult.href = decodedLink;
|
if (decodedLink.includes('fandom.com') || decodedLink.includes('fextralife.com')) {
|
||||||
searchResults.push(searchResult);
|
searchResult.href = decodedLink;
|
||||||
|
searchResults.push(searchResult);
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
console.log('Indie Wiki Buddy failed to parse Bing link with error: ', e);
|
||||||
}
|
}
|
||||||
} catch (e) {
|
} else {
|
||||||
console.log('Indie Wiki Buddy failed to parse Bing link with error: ', e);
|
searchResults.push(searchResult);
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
searchResults.push(searchResult);
|
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -545,7 +591,7 @@ function main(mutations = null, observer = null) {
|
||||||
} else if (currentURL.hostname.includes('ecosia.org')) {
|
} else if (currentURL.hostname.includes('ecosia.org')) {
|
||||||
// Function to filter search results in Ecosia
|
// Function to filter search results in Ecosia
|
||||||
function filterEcosia() {
|
function filterEcosia() {
|
||||||
let searchResults = Array.from(document.querySelectorAll('section.mainline .result__title a.result__link')).filter(el => el.href.includes('fandom.com') || el.href.includes('fextralife.com'));
|
let searchResults = Array.from(document.querySelectorAll('section.mainline .result__title a.result__link')).filter(el => el.href?.includes('fandom.com') || el.href?.includes('fextralife.com'));
|
||||||
filterSearchResults(searchResults, 'ecosia', storage);
|
filterSearchResults(searchResults, 'ecosia', storage);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -562,7 +608,7 @@ function main(mutations = null, observer = null) {
|
||||||
} else if (currentURL.hostname.includes('startpage.com')) {
|
} else if (currentURL.hostname.includes('startpage.com')) {
|
||||||
// Function to filter search results in Startpage
|
// Function to filter search results in Startpage
|
||||||
function filterStartpage() {
|
function filterStartpage() {
|
||||||
let searchResults = Array.from(document.querySelectorAll('a.result-link')).filter(el => el.href.includes('fandom.com') || el.href.includes('fextralife.com'));
|
let searchResults = Array.from(document.querySelectorAll('a.result-link')).filter(el => el.href?.includes('fandom.com') || el.href?.includes('fextralife.com'));
|
||||||
filterSearchResults(searchResults, 'startpage', storage);
|
filterSearchResults(searchResults, 'startpage', storage);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -579,7 +625,7 @@ function main(mutations = null, observer = null) {
|
||||||
} else if (currentURL.hostname.includes('yahoo.com')) {
|
} else if (currentURL.hostname.includes('yahoo.com')) {
|
||||||
// Function to filter search results in Yahoo
|
// Function to filter search results in Yahoo
|
||||||
function filterYahoo() {
|
function filterYahoo() {
|
||||||
let searchResults = Array.from(document.querySelectorAll('#web > ol > li a:not(.thmb), #main-algo section.algo a:not(.thmb)')).filter(el => el.href.includes('fandom.com') || el.href.includes('fextralife.com'));
|
let searchResults = Array.from(document.querySelectorAll('#web > ol > li a:not(.thmb), #main-algo section.algo a:not(.thmb)')).filter(el => el.href?.includes('fandom.com') || el.href?.includes('fextralife.com'));
|
||||||
filterSearchResults(searchResults, 'yahoo', storage);
|
filterSearchResults(searchResults, 'yahoo', storage);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue