diff --git a/d3lta/faissd3lta.py b/d3lta/faissd3lta.py
index b3f0f28..d044cac 100644
--- a/d3lta/faissd3lta.py
+++ b/d3lta/faissd3lta.py
@@ -90,16 +90,16 @@ def remove_symbols(text):
     return SYMBOL_REGEX.sub(r'', text)
 
 def preprocess_text(
-    s,
-    lower=True,
-    remove_accents=True,
-    remove_urls=True,
-    remove_mentions=True,
-    remove_emojis=True,
-    remove_hashtags_frontend=False,
-    remove_twitter_cropend=False,
-    replace_newline_characters=True,
-    remove_punctuation=False,
+    s: str | list[str] | set[str] | frozenset[str] | pd.Series,
+    lower: bool = True,
+    remove_accents: bool = True,
+    remove_urls: bool = True,
+    remove_mentions: bool = True,
+    remove_emojis: bool = True,
+    remove_hashtags_frontend: bool = False,
+    remove_twitter_cropend: bool = False,
+    replace_newline_characters: bool = True,
+    remove_punctuation: bool = False,
 ):
     """
     clean a list-like of strings, performing all the following treatments by default
diff --git a/tests/faissd3lta_test.py b/tests/faissd3lta_test.py
index 4ef4e59..99f2f1c 100644
--- a/tests/faissd3lta_test.py
+++ b/tests/faissd3lta_test.py
@@ -24,13 +24,13 @@ def examples_dataset():
     ]
 
 
-def test_compute_language(examples_dataset):
+def test_compute_language(examples_dataset: list[str]):
     df_language = pd.DataFrame(examples_dataset, columns=["text_language_detect"])
     df_language = compute_language(df_language)
     assert list(df_language["language"]) == ["fr", "fr", "fr", "en", "en", "fr"]
 
 
-def test_embedding_similarity(examples_dataset):
+def test_embedding_similarity(examples_dataset: list[str]):
     df_test = pd.DataFrame(
         examples_dataset,
         columns=["text_to_embed"],
@@ -52,7 +52,7 @@ def test_embedding_similarity(examples_dataset):
     )
 
 
-def test_semantic_faiss(examples_dataset):
+def test_semantic_faiss(examples_dataset: list[str]):
     df = pd.DataFrame(examples_dataset, columns=["text_language_detect"])
     df = compute_language(df)
     df_emb = compute_embeddings(