зеркало из
https://github.com/M82-project/DIMA.git
synced 2025-10-29 13:06:08 +02:00
amélioration extractor (suppression du bruit)
Этот коммит содержится в:
родитель
ad32c2b63f
Коммит
769df8eb5d
@ -176,8 +176,7 @@ const DIMA_TECHNIQUES = [
|
||||
"same as",
|
||||
"cela rappelle",
|
||||
"déjà vu",
|
||||
"par ailleurs",
|
||||
"contexte",
|
||||
"dans ce contexte",
|
||||
],
|
||||
weight: 0.9,
|
||||
type: "technique",
|
||||
@ -493,11 +492,6 @@ const DIMA_TECHNIQUES = [
|
||||
"stéréotype",
|
||||
"les étrangers",
|
||||
"les immigrants",
|
||||
"all the",
|
||||
"toujours",
|
||||
"always",
|
||||
"jamais",
|
||||
"never",
|
||||
"en général",
|
||||
"in general",
|
||||
"les français",
|
||||
@ -853,7 +847,6 @@ const DIMA_TECHNIQUES = [
|
||||
"confident",
|
||||
"sûr",
|
||||
"sure",
|
||||
"certain",
|
||||
"capable",
|
||||
"expert",
|
||||
"maîtrise",
|
||||
|
||||
@ -130,8 +130,21 @@ class ContentExtractor {
|
||||
"advertisement",
|
||||
"social",
|
||||
"share",
|
||||
"cookie", "popup", "modal", "overlay", "banner", "newsletter",
|
||||
"related", "suggest", "recommend", "widget", "promo", "promotion",
|
||||
"comment", "rating", "review", "breadcrumb", "pagination", "tag",
|
||||
"metadata", "byline", "author-bio", "subscription", "paywall"
|
||||
];
|
||||
const skipIds = ["nav", "menu", "footer", "header", "sidebar", "comments","cookie-banner", "newsletter", "popup", "modal", "overlay",
|
||||
"related-articles", "advertisement", "social-sharing"];
|
||||
const skipAttributes = [
|
||||
'data-module="Advertisement"',
|
||||
'data-component="SocialShare"',
|
||||
'data-track-component="Newsletter"',
|
||||
'role="banner"',
|
||||
'role="navigation"',
|
||||
'role="complementary"'
|
||||
];
|
||||
const skipIds = ["nav", "menu", "footer", "header", "sidebar", "comments"];
|
||||
|
||||
const className = element.className?.toLowerCase() || "";
|
||||
const id = element.id?.toLowerCase() || "";
|
||||
@ -139,7 +152,10 @@ class ContentExtractor {
|
||||
return (
|
||||
skipClasses.some((skip) => className.includes(skip)) ||
|
||||
skipIds.some((skip) => id.includes(skip)) ||
|
||||
skipAttributes.some((attr) => element.getAttribute(attr.split('=')[0]) === attr.split('=')[1]?.replace(/"/g, '')) ||
|
||||
element.getAttribute("aria-hidden") === "true" ||
|
||||
element.getAttribute("role") === "banner" ||
|
||||
element.getAttribute("role") === "navigation" ||
|
||||
getComputedStyle(element).display === "none"
|
||||
);
|
||||
}
|
||||
|
||||
@ -378,6 +378,17 @@ class TechniqueAnalyzer {
|
||||
return string.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
||||
}
|
||||
|
||||
calculatePositionWeight(position, textLength) {
|
||||
const relativePosition = position / textLength;
|
||||
// Boost pour les éléments en début de texte (titres, accroches)
|
||||
if (relativePosition < 0.15) return 1.4;
|
||||
// Boost modéré pour le premier tiers
|
||||
if (relativePosition < 0.33) return 1.1;
|
||||
// Boost pour la fin (conclusions, appels à l'action)
|
||||
if (relativePosition > 0.85) return 1.2;
|
||||
// Poids normal pour le milieu
|
||||
return 1.0;
|
||||
}
|
||||
calculateRiskLevel(score) {
|
||||
if (score < 15) return "Faible";
|
||||
if (score < 30) return "Modéré";
|
||||
|
||||
Загрузка…
x
Ссылка в новой задаче
Block a user