From b8fada79c223350c65b7891d13a3fdba7d347df7 Mon Sep 17 00:00:00 2001 From: Viginum-DataScientist-6 <210390336+Viginum-DataScientist-6@users.noreply.github.com> Date: Wed, 30 Jul 2025 09:25:32 +0000 Subject: [PATCH] chore: remove demoji-based emojis removal - Also remove demoji related tests and benchmarking code. - This speeds up the unit tests suite. --- d3lta/emojis_remover.py | 7 ----- poetry.lock | 50 +----------------------------------- pyproject.toml | 2 -- tests/emojis_remover_test.py | 19 +++----------- 4 files changed, 4 insertions(+), 74 deletions(-) diff --git a/d3lta/emojis_remover.py b/d3lta/emojis_remover.py index 83740a1..a41db12 100644 --- a/d3lta/emojis_remover.py +++ b/d3lta/emojis_remover.py @@ -3,8 +3,6 @@ from abc import ABC, abstractmethod from dataclasses import dataclass from typing import final -import demoji - @dataclass class EmojisRemover(ABC): @@ -96,8 +94,3 @@ class ExplicitUnicodeBlocksEmojisRemover(EmojisRemover): def _remove_symbols_implementation(self, text: str) -> str: return self.SYMBOLS_REGEX.sub(r"", text) - - -class DemojiEmojisRemover(EmojisRemover): - def _remove_symbols_implementation(self, text: str) -> str: - return demoji.replace(text) diff --git a/poetry.lock b/poetry.lock index 6ab56b7..8a8cecb 100644 --- a/poetry.lock +++ b/poetry.lock @@ -155,21 +155,6 @@ files = [ ] markers = {main = "platform_system == \"Windows\"", dev = "sys_platform == \"win32\""} -[[package]] -name = "demoji" -version = "1.1.0" -description = "Accurately remove and replace emojis in text strings" -optional = false -python-versions = ">=3.6" -groups = ["main"] -files = [ - {file = "demoji-1.1.0-py3-none-any.whl", hash = "sha256:6d3256c909aea299e97fe984f827a2a060c2a8f8bfcbafa7ec9659967c5df50f"}, - {file = "demoji-1.1.0.tar.gz", hash = "sha256:072efaeca725e6f63ab59d83abeb55b178842538ed9256455a82ebbd055ff216"}, -] - -[package.extras] -ujson = ["ujson"] - [[package]] name = "exceptiongroup" version = "1.3.0" @@ -1060,18 +1045,6 @@ files = [ {file = "protobuf-5.29.4.tar.gz", hash = "sha256:4f1dfcd7997b31ef8f53ec82781ff434a28bf71d9102ddde14d076adcfc78c99"}, ] -[[package]] -name = "py-cpuinfo" -version = "9.0.0" -description = "Get CPU info with pure Python" -optional = false -python-versions = "*" -groups = ["dev"] -files = [ - {file = "py-cpuinfo-9.0.0.tar.gz", hash = "sha256:3cdbbf3fac90dc6f118bfd64384f309edeadd902d7c8fb17f02ffa1fc3f49690"}, - {file = "py_cpuinfo-9.0.0-py3-none-any.whl", hash = "sha256:859625bc251f64e21f077d099d4162689c762b5d6a4c3c97553d56241c9674d5"}, -] - [[package]] name = "pybind11" version = "2.13.6" @@ -1125,27 +1098,6 @@ tomli = {version = ">=1", markers = "python_version < \"3.11\""} [package.extras] dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] -[[package]] -name = "pytest-benchmark" -version = "5.1.0" -description = "A ``pytest`` fixture for benchmarking code. It will group the tests into rounds that are calibrated to the chosen timer." -optional = false -python-versions = ">=3.9" -groups = ["dev"] -files = [ - {file = "pytest-benchmark-5.1.0.tar.gz", hash = "sha256:9ea661cdc292e8231f7cd4c10b0319e56a2118e2c09d9f50e1b3d150d2aca105"}, - {file = "pytest_benchmark-5.1.0-py3-none-any.whl", hash = "sha256:922de2dfa3033c227c96da942d1878191afa135a29485fb942e85dff1c592c89"}, -] - -[package.dependencies] -py-cpuinfo = "*" -pytest = ">=8.1" - -[package.extras] -aspect = ["aspectlib"] -elasticsearch = ["elasticsearch"] -histogram = ["pygal", "pygaljs", "setuptools"] - [[package]] name = "python-dateutil" version = "2.9.0.post0" @@ -1748,4 +1700,4 @@ files = [ [metadata] lock-version = "2.1" python-versions = "^3.10" -content-hash = "2a469cf6cd729d58a4315152a037a242fdc09dba63fe3adfe00bbb88c3f16863" +content-hash = "63a5c842aafa7166bcfbdd716b0d51a14f2df0827ad594e0f8d8bb3d74e7df54" diff --git a/pyproject.toml b/pyproject.toml index 1ab49c2..e007d86 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,6 @@ authors = ["Viginum"] [tool.poetry.dependencies] python = "^3.10" -demoji = "^1.1.0" faiss-cpu = "1.9.0.post1" fasttext = "0.9.3" gensim = "4.3.3" @@ -25,7 +24,6 @@ optional = true [tool.poetry.group.dev.dependencies] pytest = "^8.3.5" -pytest-benchmark = "^5.1.0" [build-system] requires = ["setuptools", "poetry-core"] diff --git a/tests/emojis_remover_test.py b/tests/emojis_remover_test.py index b1fc9c2..ed67a7e 100644 --- a/tests/emojis_remover_test.py +++ b/tests/emojis_remover_test.py @@ -6,9 +6,6 @@ from get_unicode_emojis_list import ( EMOJI_TESTFILE_FILENAME, get_all_emojis_from_latest_unicode_emojis_specification_with_download, ) -from pytest_benchmark.fixture import ( - BenchmarkFixture, -) import d3lta.emojis_remover @@ -17,13 +14,6 @@ import d3lta.emojis_remover name="emojis_remover", params=[ d3lta.emojis_remover.ExplicitUnicodeBlocksEmojisRemover, - pytest.param( - d3lta.emojis_remover.DemojiEmojisRemover, - marks=pytest.mark.xfail( - reason="`demoji`'s detection engine does not detect all emojis in the Unicode specification", - strict=True, - ), - ), ], ) def fixture_emojis_remover( @@ -108,11 +98,8 @@ In consequence whereof, the National 🏞️ Assembly 👩‍🏭👨‍🏭 r def test_on_text_sample( emojis_remover: d3lta.emojis_remover.EmojisRemover, sample_text_with_emojipasta: str, - sample_text: str, - benchmark: BenchmarkFixture, + sample_text: str ): - processed = benchmark( - emojis_remover.remove_symbols, + assert emojis_remover.remove_symbols( sample_text_with_emojipasta, - ) - assert processed == sample_text + ) == sample_text