From 948e7226db24c13049586a3b930207de3a08d1ba Mon Sep 17 00:00:00 2001 From: Ophir LOJKINE Date: Wed, 16 Sep 2020 10:43:09 +0200 Subject: [PATCH] =?UTF-8?q?Am=C3=A9lioration=20de=20l'extraction=20des=20t?= =?UTF-8?q?ermes=20de=20recherche?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ophirofox/content_scripts/europresse_search.js | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ophirofox/content_scripts/europresse_search.js b/ophirofox/content_scripts/europresse_search.js index 3912954..cf1776d 100644 --- a/ophirofox/content_scripts/europresse_search.js +++ b/ophirofox/content_scripts/europresse_search.js @@ -3,9 +3,10 @@ function onLoad() { const source = url.searchParams.get("ophirofox_source"); if (!source) return; const source_url = new URL(source); - const lemonde_match = source_url.pathname.match(/([^/.]+)(\.html)?$/); + const lemonde_match = source_url.pathname.match(/([^/.]+)(_\d*_\d*\.html)?$/); if (!lemonde_match) throw new Error("Could not find keywords in lemonde url"); - const search_terms = lemonde_match[1].split(/[^a-z]+/).join(" "); + const stopwords = new Set(['d', 'l', 'et']) + const search_terms = lemonde_match[1].split('-').filter(w => !stopwords.has(w)).join(" "); const keyword_field = document.getElementById("Keywords"); keyword_field.value = search_terms; keyword_field.form.submit();