зеркало из
https://github.com/ssciwr/AMMICO.git
synced 2025-10-30 21:46:04 +02:00
fix: missing dependency, obsolete keyword, dash maintenance, demo notebook for new summary
Этот коммит содержится в:
родитель
8e3024372f
Коммит
0f6f9026cd
@ -94,7 +94,6 @@ class AnalysisExplorer:
|
|||||||
State("left_select_id", "options"),
|
State("left_select_id", "options"),
|
||||||
State("left_select_id", "value"),
|
State("left_select_id", "value"),
|
||||||
State("Dropdown_select_Detector", "value"),
|
State("Dropdown_select_Detector", "value"),
|
||||||
State("setting_Text_analyse_text", "value"),
|
|
||||||
State("setting_privacy_env_var", "value"),
|
State("setting_privacy_env_var", "value"),
|
||||||
State("setting_Emotion_emotion_threshold", "value"),
|
State("setting_Emotion_emotion_threshold", "value"),
|
||||||
State("setting_Emotion_race_threshold", "value"),
|
State("setting_Emotion_race_threshold", "value"),
|
||||||
@ -157,14 +156,6 @@ class AnalysisExplorer:
|
|||||||
id="settings_TextDetector",
|
id="settings_TextDetector",
|
||||||
style={"display": "none"},
|
style={"display": "none"},
|
||||||
children=[
|
children=[
|
||||||
dbc.Row(
|
|
||||||
dcc.Checklist(
|
|
||||||
["Analyse text"],
|
|
||||||
["Analyse text"],
|
|
||||||
id="setting_Text_analyse_text",
|
|
||||||
style={"margin-bottom": "10px"},
|
|
||||||
),
|
|
||||||
),
|
|
||||||
# row 1
|
# row 1
|
||||||
dbc.Row(
|
dbc.Row(
|
||||||
dbc.Col(
|
dbc.Col(
|
||||||
@ -344,7 +335,7 @@ class AnalysisExplorer:
|
|||||||
port (int, optional): The port number to run the server on (default: 8050).
|
port (int, optional): The port number to run the server on (default: 8050).
|
||||||
"""
|
"""
|
||||||
|
|
||||||
self.app.run_server(debug=True, port=port)
|
self.app.run(debug=True, port=port)
|
||||||
|
|
||||||
# Dash callbacks
|
# Dash callbacks
|
||||||
def update_picture(self, img_path: str):
|
def update_picture(self, img_path: str):
|
||||||
@ -375,16 +366,15 @@ class AnalysisExplorer:
|
|||||||
}
|
}
|
||||||
|
|
||||||
if setting_input == "TextDetector":
|
if setting_input == "TextDetector":
|
||||||
return display_flex, display_none, display_none, display_none
|
return display_flex, display_none, display_none
|
||||||
|
|
||||||
if setting_input == "EmotionDetector":
|
if setting_input == "EmotionDetector":
|
||||||
return display_none, display_flex, display_none, display_none
|
return display_none, display_flex, display_none
|
||||||
|
|
||||||
if setting_input == "ColorDetector":
|
if setting_input == "ColorDetector":
|
||||||
return display_none, display_none, display_flex, display_none
|
return display_none, display_none, display_flex
|
||||||
|
|
||||||
else:
|
else:
|
||||||
return display_none, display_none, display_none, display_none
|
return display_none, display_none, display_none
|
||||||
|
|
||||||
def _right_output_analysis(
|
def _right_output_analysis(
|
||||||
self,
|
self,
|
||||||
@ -392,7 +382,6 @@ class AnalysisExplorer:
|
|||||||
all_img_options: dict,
|
all_img_options: dict,
|
||||||
current_img_value: str,
|
current_img_value: str,
|
||||||
detector_value: str,
|
detector_value: str,
|
||||||
settings_text_analyse_text: list,
|
|
||||||
setting_privacy_env_var: str,
|
setting_privacy_env_var: str,
|
||||||
setting_emotion_emotion_threshold: int,
|
setting_emotion_emotion_threshold: int,
|
||||||
setting_emotion_race_threshold: int,
|
setting_emotion_race_threshold: int,
|
||||||
@ -426,12 +415,8 @@ class AnalysisExplorer:
|
|||||||
identify_function = identify_dict[detector_value]
|
identify_function = identify_dict[detector_value]
|
||||||
|
|
||||||
if detector_value == "TextDetector":
|
if detector_value == "TextDetector":
|
||||||
analyse_text = (
|
|
||||||
True if settings_text_analyse_text == ["Analyse text"] else False
|
|
||||||
)
|
|
||||||
detector_class = identify_function(
|
detector_class = identify_function(
|
||||||
image_copy,
|
image_copy,
|
||||||
analyse_text=analyse_text,
|
|
||||||
accept_privacy=(
|
accept_privacy=(
|
||||||
setting_privacy_env_var
|
setting_privacy_env_var
|
||||||
if setting_privacy_env_var
|
if setting_privacy_env_var
|
||||||
|
|||||||
@ -104,7 +104,8 @@
|
|||||||
"import ammico\n",
|
"import ammico\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# for displaying a progress bar\n",
|
"# for displaying a progress bar\n",
|
||||||
"from tqdm import tqdm"
|
"from tqdm import tqdm\n",
|
||||||
|
"import os"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -140,7 +141,9 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# os.environ[\"GOOGLE_APPLICATION_CREDENTIALS\"] = \"/content/drive/MyDrive/misinformation-data/misinformation-campaign-981aa55a3b13.json\""
|
"os.environ[\"GOOGLE_APPLICATION_CREDENTIALS\"] = (\n",
|
||||||
|
" \"/home/inga/projects/misinformation-project/misinformation-notes/misinformation-campaign-981aa55a3b13.json\"\n",
|
||||||
|
")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -171,6 +174,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
"data_path = \"./data-test\"\n",
|
||||||
"image_dict = ammico.find_files(\n",
|
"image_dict = ammico.find_files(\n",
|
||||||
" # path = \"/content/drive/MyDrive/misinformation-data/\",\n",
|
" # path = \"/content/drive/MyDrive/misinformation-data/\",\n",
|
||||||
" path=str(data_path),\n",
|
" path=str(data_path),\n",
|
||||||
@ -337,7 +341,7 @@
|
|||||||
" enumerate(image_dict.keys()), total=len(image_dict)\n",
|
" enumerate(image_dict.keys()), total=len(image_dict)\n",
|
||||||
"): # loop through all images\n",
|
"): # loop through all images\n",
|
||||||
" image_dict[key] = ammico.TextDetector(\n",
|
" image_dict[key] = ammico.TextDetector(\n",
|
||||||
" image_dict[key], analyse_text=True\n",
|
" image_dict[key]\n",
|
||||||
" ).analyse_image() # analyse image with EmotionDetector and update dict\n",
|
" ).analyse_image() # analyse image with EmotionDetector and update dict\n",
|
||||||
"\n",
|
"\n",
|
||||||
" if (\n",
|
" if (\n",
|
||||||
@ -361,23 +365,12 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# initialize the models\n",
|
"# initialize the models\n",
|
||||||
"image_summary_detector = ammico.SummaryDetector(\n",
|
"model = ammico.MultimodalSummaryModel()\n",
|
||||||
" subdict=image_dict, analysis_type=\"summary\", model_type=\"base\"\n",
|
"image_summary_detector = ammico.ImageSummaryDetector(\n",
|
||||||
|
" subdict=image_dict, summary_model=model\n",
|
||||||
")\n",
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# run the analysis without having to re-iniatialize the model\n",
|
"image_summary_detector.analyse_images(analysis_type=\"summary\")"
|
||||||
"for num, key in tqdm(\n",
|
|
||||||
" enumerate(image_dict.keys()), total=len(image_dict)\n",
|
|
||||||
"): # loop through all images\n",
|
|
||||||
" image_dict[key] = image_summary_detector.analyse_image(\n",
|
|
||||||
" subdict=image_dict[key], analysis_type=\"summary\"\n",
|
|
||||||
" ) # analyse image with SummaryDetector and update dict\n",
|
|
||||||
"\n",
|
|
||||||
" if (\n",
|
|
||||||
" num % dump_every == 0 | num == len(image_dict) - 1\n",
|
|
||||||
" ): # save results every dump_every to dump_file\n",
|
|
||||||
" image_df = ammico.get_dataframe(image_dict)\n",
|
|
||||||
" image_df.to_csv(dump_file)"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -394,6 +387,7 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# initialize the models\n",
|
"# initialize the models\n",
|
||||||
|
"# currently this does not work because of the way the summary detector is implemented\n",
|
||||||
"image_summary_detector = ammico.SummaryDetector(\n",
|
"image_summary_detector = ammico.SummaryDetector(\n",
|
||||||
" subdict=image_dict, analysis_type=\"summary\", model_type=\"base\"\n",
|
" subdict=image_dict, analysis_type=\"summary\", model_type=\"base\"\n",
|
||||||
")\n",
|
")\n",
|
||||||
|
|||||||
@ -67,7 +67,6 @@ class TextDetector(AnalysisMethod):
|
|||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
subdict: dict,
|
subdict: dict,
|
||||||
analyse_text: bool = False,
|
|
||||||
skip_extraction: bool = False,
|
skip_extraction: bool = False,
|
||||||
accept_privacy: str = "PRIVACY_AMMICO",
|
accept_privacy: str = "PRIVACY_AMMICO",
|
||||||
) -> None:
|
) -> None:
|
||||||
@ -76,8 +75,6 @@ class TextDetector(AnalysisMethod):
|
|||||||
Args:
|
Args:
|
||||||
subdict (dict): Dictionary containing file name/path, and possibly previous
|
subdict (dict): Dictionary containing file name/path, and possibly previous
|
||||||
analysis results from other modules.
|
analysis results from other modules.
|
||||||
analyse_text (bool, optional): Decide if extracted text will be further subject
|
|
||||||
to analysis. Defaults to False.
|
|
||||||
skip_extraction (bool, optional): Decide if text will be extracted from images or
|
skip_extraction (bool, optional): Decide if text will be extracted from images or
|
||||||
is already provided via a csv. Defaults to False.
|
is already provided via a csv. Defaults to False.
|
||||||
accept_privacy (str, optional): Environment variable to accept the privacy
|
accept_privacy (str, optional): Environment variable to accept the privacy
|
||||||
@ -96,17 +93,13 @@ class TextDetector(AnalysisMethod):
|
|||||||
"Privacy disclosure not accepted - skipping text detection."
|
"Privacy disclosure not accepted - skipping text detection."
|
||||||
)
|
)
|
||||||
self.translator = Translator(raise_exception=True)
|
self.translator = Translator(raise_exception=True)
|
||||||
if not isinstance(analyse_text, bool):
|
|
||||||
raise ValueError("analyse_text needs to be set to true or false")
|
|
||||||
self.analyse_text = analyse_text
|
|
||||||
self.skip_extraction = skip_extraction
|
self.skip_extraction = skip_extraction
|
||||||
if not isinstance(skip_extraction, bool):
|
if not isinstance(skip_extraction, bool):
|
||||||
raise ValueError("skip_extraction needs to be set to true or false")
|
raise ValueError("skip_extraction needs to be set to true or false")
|
||||||
if self.skip_extraction:
|
if self.skip_extraction:
|
||||||
print("Skipping text extraction from image.")
|
print("Skipping text extraction from image.")
|
||||||
print("Reading text directly from provided dictionary.")
|
print("Reading text directly from provided dictionary.")
|
||||||
if self.analyse_text:
|
self._initialize_spacy()
|
||||||
self._initialize_spacy()
|
|
||||||
|
|
||||||
def set_keys(self) -> dict:
|
def set_keys(self) -> dict:
|
||||||
"""Set the default keys for text analysis.
|
"""Set the default keys for text analysis.
|
||||||
@ -183,7 +176,7 @@ class TextDetector(AnalysisMethod):
|
|||||||
self._truncate_text()
|
self._truncate_text()
|
||||||
self.translate_text()
|
self.translate_text()
|
||||||
self.remove_linebreaks()
|
self.remove_linebreaks()
|
||||||
if self.analyse_text and self.subdict["text_english"]:
|
if self.subdict["text_english"]:
|
||||||
self._run_spacy()
|
self._run_spacy()
|
||||||
return self.subdict
|
return self.subdict
|
||||||
|
|
||||||
|
|||||||
@ -47,6 +47,7 @@ dependencies = [
|
|||||||
"spacy",
|
"spacy",
|
||||||
"tensorflow<2.15", # instead of <=2.16.0 to make it compatible with CUDA 11.8, may change after updating CUDA version.
|
"tensorflow<2.15", # instead of <=2.16.0 to make it compatible with CUDA 11.8, may change after updating CUDA version.
|
||||||
"tf-keras",
|
"tf-keras",
|
||||||
|
"torchvision",
|
||||||
"tqdm",
|
"tqdm",
|
||||||
"transformers>=4.54",
|
"transformers>=4.54",
|
||||||
"webcolors",
|
"webcolors",
|
||||||
|
|||||||
Загрузка…
x
Ссылка в новой задаче
Block a user