From 769df8eb5d4b448772a8a04332e2296009142ea6 Mon Sep 17 00:00:00 2001 From: BartM82 <105561997+BartM82@users.noreply.github.com> Date: Mon, 28 Jul 2025 13:59:43 +0000 Subject: [PATCH] =?UTF-8?q?am=C3=A9lioration=20extractor=20(suppression=20?= =?UTF-8?q?du=20bruit)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../releases/Plugin-dima/data/techniques.js | 9 +-------- .../Plugin-dima/modules/contentExtractor.js | 18 +++++++++++++++++- .../Plugin-dima/modules/techniqueAnalyzer.js | 11 +++++++++++ 3 files changed, 29 insertions(+), 9 deletions(-) diff --git a/plugin/plugin_chrome/releases/Plugin-dima/data/techniques.js b/plugin/plugin_chrome/releases/Plugin-dima/data/techniques.js index b4748e6..362b744 100644 --- a/plugin/plugin_chrome/releases/Plugin-dima/data/techniques.js +++ b/plugin/plugin_chrome/releases/Plugin-dima/data/techniques.js @@ -176,8 +176,7 @@ const DIMA_TECHNIQUES = [ "same as", "cela rappelle", "déjà vu", - "par ailleurs", - "contexte", + "dans ce contexte", ], weight: 0.9, type: "technique", @@ -493,11 +492,6 @@ const DIMA_TECHNIQUES = [ "stéréotype", "les étrangers", "les immigrants", - "all the", - "toujours", - "always", - "jamais", - "never", "en général", "in general", "les français", @@ -853,7 +847,6 @@ const DIMA_TECHNIQUES = [ "confident", "sûr", "sure", - "certain", "capable", "expert", "maîtrise", diff --git a/plugin/plugin_chrome/releases/Plugin-dima/modules/contentExtractor.js b/plugin/plugin_chrome/releases/Plugin-dima/modules/contentExtractor.js index 15ea1db..c857e45 100644 --- a/plugin/plugin_chrome/releases/Plugin-dima/modules/contentExtractor.js +++ b/plugin/plugin_chrome/releases/Plugin-dima/modules/contentExtractor.js @@ -130,8 +130,21 @@ class ContentExtractor { "advertisement", "social", "share", + "cookie", "popup", "modal", "overlay", "banner", "newsletter", + "related", "suggest", "recommend", "widget", "promo", "promotion", + "comment", "rating", "review", "breadcrumb", "pagination", "tag", + "metadata", "byline", "author-bio", "subscription", "paywall" + ]; + const skipIds = ["nav", "menu", "footer", "header", "sidebar", "comments","cookie-banner", "newsletter", "popup", "modal", "overlay", + "related-articles", "advertisement", "social-sharing"]; + const skipAttributes = [ + 'data-module="Advertisement"', + 'data-component="SocialShare"', + 'data-track-component="Newsletter"', + 'role="banner"', + 'role="navigation"', + 'role="complementary"' ]; - const skipIds = ["nav", "menu", "footer", "header", "sidebar", "comments"]; const className = element.className?.toLowerCase() || ""; const id = element.id?.toLowerCase() || ""; @@ -139,7 +152,10 @@ class ContentExtractor { return ( skipClasses.some((skip) => className.includes(skip)) || skipIds.some((skip) => id.includes(skip)) || + skipAttributes.some((attr) => element.getAttribute(attr.split('=')[0]) === attr.split('=')[1]?.replace(/"/g, '')) || element.getAttribute("aria-hidden") === "true" || + element.getAttribute("role") === "banner" || + element.getAttribute("role") === "navigation" || getComputedStyle(element).display === "none" ); } diff --git a/plugin/plugin_chrome/releases/Plugin-dima/modules/techniqueAnalyzer.js b/plugin/plugin_chrome/releases/Plugin-dima/modules/techniqueAnalyzer.js index 81b369d..26d61a0 100644 --- a/plugin/plugin_chrome/releases/Plugin-dima/modules/techniqueAnalyzer.js +++ b/plugin/plugin_chrome/releases/Plugin-dima/modules/techniqueAnalyzer.js @@ -378,6 +378,17 @@ class TechniqueAnalyzer { return string.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); } + calculatePositionWeight(position, textLength) { + const relativePosition = position / textLength; + // Boost pour les éléments en début de texte (titres, accroches) + if (relativePosition < 0.15) return 1.4; + // Boost modéré pour le premier tiers + if (relativePosition < 0.33) return 1.1; + // Boost pour la fin (conclusions, appels à l'action) + if (relativePosition > 0.85) return 1.2; + // Poids normal pour le milieu + return 1.0; + } calculateRiskLevel(score) { if (score < 15) return "Faible"; if (score < 30) return "Modéré";