fix: missing dependency, obsolete keyword, dash maintenance, demo notebook for new summary

Этот коммит содержится в:
Inga Ulusoy 2025-09-25 12:45:14 +02:00
родитель 8e3024372f
Коммит 0f6f9026cd
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: BDC64F2E85CF8272
4 изменённых файлов: 20 добавлений и 47 удалений

Просмотреть файл

@ -94,7 +94,6 @@ class AnalysisExplorer:
State("left_select_id", "options"),
State("left_select_id", "value"),
State("Dropdown_select_Detector", "value"),
State("setting_Text_analyse_text", "value"),
State("setting_privacy_env_var", "value"),
State("setting_Emotion_emotion_threshold", "value"),
State("setting_Emotion_race_threshold", "value"),
@ -157,14 +156,6 @@ class AnalysisExplorer:
id="settings_TextDetector",
style={"display": "none"},
children=[
dbc.Row(
dcc.Checklist(
["Analyse text"],
["Analyse text"],
id="setting_Text_analyse_text",
style={"margin-bottom": "10px"},
),
),
# row 1
dbc.Row(
dbc.Col(
@ -344,7 +335,7 @@ class AnalysisExplorer:
port (int, optional): The port number to run the server on (default: 8050).
"""
self.app.run_server(debug=True, port=port)
self.app.run(debug=True, port=port)
# Dash callbacks
def update_picture(self, img_path: str):
@ -375,16 +366,15 @@ class AnalysisExplorer:
}
if setting_input == "TextDetector":
return display_flex, display_none, display_none, display_none
return display_flex, display_none, display_none
if setting_input == "EmotionDetector":
return display_none, display_flex, display_none, display_none
return display_none, display_flex, display_none
if setting_input == "ColorDetector":
return display_none, display_none, display_flex, display_none
return display_none, display_none, display_flex
else:
return display_none, display_none, display_none, display_none
return display_none, display_none, display_none
def _right_output_analysis(
self,
@ -392,7 +382,6 @@ class AnalysisExplorer:
all_img_options: dict,
current_img_value: str,
detector_value: str,
settings_text_analyse_text: list,
setting_privacy_env_var: str,
setting_emotion_emotion_threshold: int,
setting_emotion_race_threshold: int,
@ -426,12 +415,8 @@ class AnalysisExplorer:
identify_function = identify_dict[detector_value]
if detector_value == "TextDetector":
analyse_text = (
True if settings_text_analyse_text == ["Analyse text"] else False
)
detector_class = identify_function(
image_copy,
analyse_text=analyse_text,
accept_privacy=(
setting_privacy_env_var
if setting_privacy_env_var

Просмотреть файл

@ -104,7 +104,8 @@
"import ammico\n",
"\n",
"# for displaying a progress bar\n",
"from tqdm import tqdm"
"from tqdm import tqdm\n",
"import os"
]
},
{
@ -140,7 +141,9 @@
"metadata": {},
"outputs": [],
"source": [
"# os.environ[\"GOOGLE_APPLICATION_CREDENTIALS\"] = \"/content/drive/MyDrive/misinformation-data/misinformation-campaign-981aa55a3b13.json\""
"os.environ[\"GOOGLE_APPLICATION_CREDENTIALS\"] = (\n",
" \"/home/inga/projects/misinformation-project/misinformation-notes/misinformation-campaign-981aa55a3b13.json\"\n",
")"
]
},
{
@ -171,6 +174,7 @@
"metadata": {},
"outputs": [],
"source": [
"data_path = \"./data-test\"\n",
"image_dict = ammico.find_files(\n",
" # path = \"/content/drive/MyDrive/misinformation-data/\",\n",
" path=str(data_path),\n",
@ -337,7 +341,7 @@
" enumerate(image_dict.keys()), total=len(image_dict)\n",
"): # loop through all images\n",
" image_dict[key] = ammico.TextDetector(\n",
" image_dict[key], analyse_text=True\n",
" image_dict[key]\n",
" ).analyse_image() # analyse image with EmotionDetector and update dict\n",
"\n",
" if (\n",
@ -361,23 +365,12 @@
"outputs": [],
"source": [
"# initialize the models\n",
"image_summary_detector = ammico.SummaryDetector(\n",
" subdict=image_dict, analysis_type=\"summary\", model_type=\"base\"\n",
"model = ammico.MultimodalSummaryModel()\n",
"image_summary_detector = ammico.ImageSummaryDetector(\n",
" subdict=image_dict, summary_model=model\n",
")\n",
"\n",
"# run the analysis without having to re-iniatialize the model\n",
"for num, key in tqdm(\n",
" enumerate(image_dict.keys()), total=len(image_dict)\n",
"): # loop through all images\n",
" image_dict[key] = image_summary_detector.analyse_image(\n",
" subdict=image_dict[key], analysis_type=\"summary\"\n",
" ) # analyse image with SummaryDetector and update dict\n",
"\n",
" if (\n",
" num % dump_every == 0 | num == len(image_dict) - 1\n",
" ): # save results every dump_every to dump_file\n",
" image_df = ammico.get_dataframe(image_dict)\n",
" image_df.to_csv(dump_file)"
"image_summary_detector.analyse_images(analysis_type=\"summary\")"
]
},
{
@ -394,6 +387,7 @@
"outputs": [],
"source": [
"# initialize the models\n",
"# currently this does not work because of the way the summary detector is implemented\n",
"image_summary_detector = ammico.SummaryDetector(\n",
" subdict=image_dict, analysis_type=\"summary\", model_type=\"base\"\n",
")\n",

Просмотреть файл

@ -67,7 +67,6 @@ class TextDetector(AnalysisMethod):
def __init__(
self,
subdict: dict,
analyse_text: bool = False,
skip_extraction: bool = False,
accept_privacy: str = "PRIVACY_AMMICO",
) -> None:
@ -76,8 +75,6 @@ class TextDetector(AnalysisMethod):
Args:
subdict (dict): Dictionary containing file name/path, and possibly previous
analysis results from other modules.
analyse_text (bool, optional): Decide if extracted text will be further subject
to analysis. Defaults to False.
skip_extraction (bool, optional): Decide if text will be extracted from images or
is already provided via a csv. Defaults to False.
accept_privacy (str, optional): Environment variable to accept the privacy
@ -96,17 +93,13 @@ class TextDetector(AnalysisMethod):
"Privacy disclosure not accepted - skipping text detection."
)
self.translator = Translator(raise_exception=True)
if not isinstance(analyse_text, bool):
raise ValueError("analyse_text needs to be set to true or false")
self.analyse_text = analyse_text
self.skip_extraction = skip_extraction
if not isinstance(skip_extraction, bool):
raise ValueError("skip_extraction needs to be set to true or false")
if self.skip_extraction:
print("Skipping text extraction from image.")
print("Reading text directly from provided dictionary.")
if self.analyse_text:
self._initialize_spacy()
self._initialize_spacy()
def set_keys(self) -> dict:
"""Set the default keys for text analysis.
@ -183,7 +176,7 @@ class TextDetector(AnalysisMethod):
self._truncate_text()
self.translate_text()
self.remove_linebreaks()
if self.analyse_text and self.subdict["text_english"]:
if self.subdict["text_english"]:
self._run_spacy()
return self.subdict

Просмотреть файл

@ -47,6 +47,7 @@ dependencies = [
"spacy",
"tensorflow<2.15", # instead of <=2.16.0 to make it compatible with CUDA 11.8, may change after updating CUDA version.
"tf-keras",
"torchvision",
"tqdm",
"transformers>=4.54",
"webcolors",