зеркало из
https://github.com/VIGINUM-FR/D3lta.git
synced 2025-10-28 20:54:21 +02:00
chore: remove demoji-based emojis removal
- Also remove demoji related tests and benchmarking code.
- This speeds up the unit tests suite.
Этот коммит содержится в:
родитель
95a07bd5a3
Коммит
b8fada79c2
@ -3,8 +3,6 @@ from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass
|
||||
from typing import final
|
||||
|
||||
import demoji
|
||||
|
||||
|
||||
@dataclass
|
||||
class EmojisRemover(ABC):
|
||||
@ -96,8 +94,3 @@ class ExplicitUnicodeBlocksEmojisRemover(EmojisRemover):
|
||||
|
||||
def _remove_symbols_implementation(self, text: str) -> str:
|
||||
return self.SYMBOLS_REGEX.sub(r"", text)
|
||||
|
||||
|
||||
class DemojiEmojisRemover(EmojisRemover):
|
||||
def _remove_symbols_implementation(self, text: str) -> str:
|
||||
return demoji.replace(text)
|
||||
|
||||
50
poetry.lock
сгенерированный
50
poetry.lock
сгенерированный
@ -155,21 +155,6 @@ files = [
|
||||
]
|
||||
markers = {main = "platform_system == \"Windows\"", dev = "sys_platform == \"win32\""}
|
||||
|
||||
[[package]]
|
||||
name = "demoji"
|
||||
version = "1.1.0"
|
||||
description = "Accurately remove and replace emojis in text strings"
|
||||
optional = false
|
||||
python-versions = ">=3.6"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "demoji-1.1.0-py3-none-any.whl", hash = "sha256:6d3256c909aea299e97fe984f827a2a060c2a8f8bfcbafa7ec9659967c5df50f"},
|
||||
{file = "demoji-1.1.0.tar.gz", hash = "sha256:072efaeca725e6f63ab59d83abeb55b178842538ed9256455a82ebbd055ff216"},
|
||||
]
|
||||
|
||||
[package.extras]
|
||||
ujson = ["ujson"]
|
||||
|
||||
[[package]]
|
||||
name = "exceptiongroup"
|
||||
version = "1.3.0"
|
||||
@ -1060,18 +1045,6 @@ files = [
|
||||
{file = "protobuf-5.29.4.tar.gz", hash = "sha256:4f1dfcd7997b31ef8f53ec82781ff434a28bf71d9102ddde14d076adcfc78c99"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "py-cpuinfo"
|
||||
version = "9.0.0"
|
||||
description = "Get CPU info with pure Python"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
groups = ["dev"]
|
||||
files = [
|
||||
{file = "py-cpuinfo-9.0.0.tar.gz", hash = "sha256:3cdbbf3fac90dc6f118bfd64384f309edeadd902d7c8fb17f02ffa1fc3f49690"},
|
||||
{file = "py_cpuinfo-9.0.0-py3-none-any.whl", hash = "sha256:859625bc251f64e21f077d099d4162689c762b5d6a4c3c97553d56241c9674d5"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pybind11"
|
||||
version = "2.13.6"
|
||||
@ -1125,27 +1098,6 @@ tomli = {version = ">=1", markers = "python_version < \"3.11\""}
|
||||
[package.extras]
|
||||
dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"]
|
||||
|
||||
[[package]]
|
||||
name = "pytest-benchmark"
|
||||
version = "5.1.0"
|
||||
description = "A ``pytest`` fixture for benchmarking code. It will group the tests into rounds that are calibrated to the chosen timer."
|
||||
optional = false
|
||||
python-versions = ">=3.9"
|
||||
groups = ["dev"]
|
||||
files = [
|
||||
{file = "pytest-benchmark-5.1.0.tar.gz", hash = "sha256:9ea661cdc292e8231f7cd4c10b0319e56a2118e2c09d9f50e1b3d150d2aca105"},
|
||||
{file = "pytest_benchmark-5.1.0-py3-none-any.whl", hash = "sha256:922de2dfa3033c227c96da942d1878191afa135a29485fb942e85dff1c592c89"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
py-cpuinfo = "*"
|
||||
pytest = ">=8.1"
|
||||
|
||||
[package.extras]
|
||||
aspect = ["aspectlib"]
|
||||
elasticsearch = ["elasticsearch"]
|
||||
histogram = ["pygal", "pygaljs", "setuptools"]
|
||||
|
||||
[[package]]
|
||||
name = "python-dateutil"
|
||||
version = "2.9.0.post0"
|
||||
@ -1748,4 +1700,4 @@ files = [
|
||||
[metadata]
|
||||
lock-version = "2.1"
|
||||
python-versions = "^3.10"
|
||||
content-hash = "2a469cf6cd729d58a4315152a037a242fdc09dba63fe3adfe00bbb88c3f16863"
|
||||
content-hash = "63a5c842aafa7166bcfbdd716b0d51a14f2df0827ad594e0f8d8bb3d74e7df54"
|
||||
|
||||
@ -7,7 +7,6 @@ authors = ["Viginum"]
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = "^3.10"
|
||||
demoji = "^1.1.0"
|
||||
faiss-cpu = "1.9.0.post1"
|
||||
fasttext = "0.9.3"
|
||||
gensim = "4.3.3"
|
||||
@ -25,7 +24,6 @@ optional = true
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
pytest = "^8.3.5"
|
||||
pytest-benchmark = "^5.1.0"
|
||||
|
||||
[build-system]
|
||||
requires = ["setuptools", "poetry-core"]
|
||||
|
||||
@ -6,9 +6,6 @@ from get_unicode_emojis_list import (
|
||||
EMOJI_TESTFILE_FILENAME,
|
||||
get_all_emojis_from_latest_unicode_emojis_specification_with_download,
|
||||
)
|
||||
from pytest_benchmark.fixture import (
|
||||
BenchmarkFixture,
|
||||
)
|
||||
|
||||
import d3lta.emojis_remover
|
||||
|
||||
@ -17,13 +14,6 @@ import d3lta.emojis_remover
|
||||
name="emojis_remover",
|
||||
params=[
|
||||
d3lta.emojis_remover.ExplicitUnicodeBlocksEmojisRemover,
|
||||
pytest.param(
|
||||
d3lta.emojis_remover.DemojiEmojisRemover,
|
||||
marks=pytest.mark.xfail(
|
||||
reason="`demoji`'s detection engine does not detect all emojis in the Unicode specification",
|
||||
strict=True,
|
||||
),
|
||||
),
|
||||
],
|
||||
)
|
||||
def fixture_emojis_remover(
|
||||
@ -108,11 +98,8 @@ In consequence whereof, the National 🏞️ Assembly 👩🏭👨🏭 r
|
||||
def test_on_text_sample(
|
||||
emojis_remover: d3lta.emojis_remover.EmojisRemover,
|
||||
sample_text_with_emojipasta: str,
|
||||
sample_text: str,
|
||||
benchmark: BenchmarkFixture,
|
||||
sample_text: str
|
||||
):
|
||||
processed = benchmark(
|
||||
emojis_remover.remove_symbols,
|
||||
assert emojis_remover.remove_symbols(
|
||||
sample_text_with_emojipasta,
|
||||
)
|
||||
assert processed == sample_text
|
||||
) == sample_text
|
||||
|
||||
Загрузка…
x
Ссылка в новой задаче
Block a user