basic integration into display functionality

Этот коммит содержится в:
Dmitrii Kapitan 2025-09-27 16:42:05 +02:00 коммит произвёл DimasfromLavoisier
родитель 5c7e2c3f64
Коммит 402a379f9c
2 изменённых файлов: 192 добавлений и 117 удалений

Просмотреть файл

@ -1,12 +1,14 @@
import ammico.faces as faces
import ammico.text as text
import ammico.colors as colors
import ammico.image_summary as image_summary
from ammico.model import MultimodalSummaryModel
import pandas as pd
from dash import html, Input, Output, dcc, State, Dash
from PIL import Image
import dash_bootstrap_components as dbc
import warnings
from typing import Dict, Any, List
from typing import Dict, Any, List, Optional
COLOR_SCHEMES = [
@ -97,7 +99,6 @@ class AnalysisExplorer:
State("left_select_id", "value"),
State("Dropdown_select_Detector", "value"),
State("Dropdown_analysis_type", "value"),
State("checkbox_enable_image_tasks", "value"),
State("textarea_questions", "value"),
State("setting_privacy_env_var", "value"),
State("setting_Emotion_emotion_threshold", "value"),
@ -112,9 +113,15 @@ class AnalysisExplorer:
Output("settings_TextDetector", "style"),
Output("settings_EmotionDetector", "style"),
Output("settings_ColorDetector", "style"),
Output("settings_VQA", "style"),
Input("Dropdown_select_Detector", "value"),
)(self._update_detector_setting)
self.app.callback(
Output("textarea_questions", "style"),
Input("Dropdown_analysis_type", "value"),
)(self._show_questions_textarea_on_demand)
# I split the different sections into subfunctions for better clarity
def _top_file_explorer(self, mydict: dict) -> html.Div:
"""Initialize the file explorer dropdown for selecting the file to be analyzed.
@ -268,8 +275,69 @@ class AnalysisExplorer:
)
],
),
# start VQA settings
html.Div(
id="settings_VQA",
style={"display": "none"},
children=[
dbc.Card(
[
dbc.CardBody(
[
dbc.Row(
dbc.Col(
dcc.Dropdown(
id="Dropdown_analysis_type",
options=[
{"label": v, "value": v}
for v in SUMMARY_ANALYSIS_TYPE
],
value="summary_and_questions",
clearable=False,
style={
"width": "100%",
"minWidth": "240px",
"maxWidth": "520px",
},
),
),
justify="start",
),
html.Div(style={"height": "8px"}),
dbc.Row(
[
dbc.Col(
dcc.Textarea(
id="textarea_questions",
value="Are there people in the image?\nWhat is this picture about?",
placeholder="One question per line...",
style={
"width": "100%",
"minHeight": "160px",
"height": "220px",
"resize": "vertical",
"overflow": "auto",
},
rows=8,
),
width=12,
),
],
justify="start",
),
]
)
],
style={
"width": "100%",
"marginTop": "10px",
"zIndex": 2000,
},
)
],
),
],
style={"width": "100%", "display": "inline-block"},
style={"width": "100%", "display": "inline-block", "overflow": "visible"},
)
return settings_layout
@ -289,6 +357,7 @@ class AnalysisExplorer:
"TextDetector",
"EmotionDetector",
"ColorDetector",
"VQA",
],
value="TextDetector",
id="Dropdown_select_Detector",
@ -296,37 +365,6 @@ class AnalysisExplorer:
),
justify="start",
),
# NEW: Analysis-type selector (summary/questions/summary_and_questions)
dbc.Row(
dcc.Dropdown(
id="Dropdown_analysis_type",
options=[{"label": v, "value": v} for v in SUMMARY_ANALYSIS_TYPE],
value="summary_and_questions",
style={"width": "60%", "margin-top": "8px"},
),
justify="start",
),
# NEW: Enable image-level tasks (VQA / caption) checkbox
dbc.Row(
dcc.Checklist(
id="checkbox_enable_image_tasks",
options=[{"label": "Enable Image Tasks (Caption / VQA)", "value": "enabled"}],
value=["enabled"], # default enabled
inline=True,
style={"margin-top": "8px"},
),
justify="start",
),
# NEW: Questions textarea (newline-separated). Only used if analysis_type includes "questions".
dbc.Row(
dcc.Textarea(
id="textarea_questions",
value="Are there people in the image?\nWhat is this picture about?",
placeholder="One question per line...",
style={"width": "60%", "height": "120px", "margin-top": "8px"},
),
justify="start",
),
dbc.Row(
children=[self._create_setting_layout()],
id="div_detector_args",
@ -402,15 +440,22 @@ class AnalysisExplorer:
}
if setting_input == "TextDetector":
return display_flex, display_none, display_none
return display_flex, display_none, display_none, display_none
if setting_input == "EmotionDetector":
return display_none, display_flex, display_none
return display_none, display_flex, display_none, display_none
if setting_input == "ColorDetector":
return display_none, display_none, display_flex
return display_none, display_none, display_flex, display_none
if setting_input == "VQA":
return display_none, display_none, display_none, display_flex
else:
return display_none, display_none, display_none
return display_none, display_none, display_none, display_none
def _parse_questions(self, text: Optional[str]) -> Optional[List[str]]:
if not text:
return None
qs = [q.strip() for q in text.splitlines() if q.strip()]
return qs if qs else None
def _right_output_analysis(
self,
@ -418,8 +463,9 @@ class AnalysisExplorer:
all_img_options: dict,
current_img_value: str,
detector_value: str,
analysis_type_value: str,
textarea_questions_value: str,
setting_privacy_env_var: str,
checkbox_enable_image_tasks_value: List[str],
setting_emotion_emotion_threshold: int,
setting_emotion_race_threshold: int,
setting_emotion_gender_threshold: int,
@ -439,78 +485,71 @@ class AnalysisExplorer:
"EmotionDetector": faces.EmotionDetector,
"TextDetector": text.TextDetector,
"ColorDetector": colors.ColorDetector,
"VQA": image_summary.ImageSummaryDetector,
}
# Get image ID from dropdown value, which is the filepath
if current_img_value is None:
return {}
image_id = all_img_options[current_img_value]
# copy image so prvious runs don't leave their default values in the dict
image_copy = self.mydict[image_id].copy()
# detector value is the string name of the chosen detector
identify_function = identify_dict[detector_value]
identify_function = identify_dict.get(detector_value)
if identify_function is None:
detector_class = None
if detector_value == "TextDetector":
detector_class = identify_function(
image_copy,
accept_privacy=(
setting_privacy_env_var
if setting_privacy_env_var
else "PRIVACY_AMMICO"
),
)
elif detector_value == "EmotionDetector":
detector_class = identify_function(
image_copy,
emotion_threshold=setting_emotion_emotion_threshold,
race_threshold=setting_emotion_race_threshold,
gender_threshold=setting_emotion_gender_threshold,
accept_disclosure=(
setting_emotion_env_var
if setting_emotion_env_var
else "DISCLOSURE_AMMICO"
),
)
elif detector_value == "ColorDetector":
detector_class = identify_function(
image_copy,
delta_e_method=setting_color_delta_e_method,
)
else:
detector_class = identify_function(image_copy)
if detector_class is not None:
analysis_dict = detector_class.analyse_image()
else:
analysis_dict = {}
image_tasks_result: Dict[str, Any] = {}
enable_image_tasks = "enabled" in (checkbox_enable_image_tasks_value or [])
if enable_image_tasks:
# parse questions textarea: newline separated
if textarea_questions_value:
questions_list = [q.strip() for q in textarea_questions_value.splitlines() if q.strip()]
else:
questions_list = None
image_copy = self.mydict.get(image_id, {}).copy()
analysis_dict: Dict[str, Any] = {}
if detector_value == "VQA":
try:
image_tasks_result = self.analyse_image(
qwen_model = MultimodalSummaryModel(
model_id="Qwen/Qwen2.5-VL-3B-Instruct"
) # TODO: allow user to specify model
vqa_cls = identify_dict.get("VQA")
vqa_detector = vqa_cls(qwen_model, subdict={})
questions_list = self._parse_questions(textarea_questions_value)
analysis_result = vqa_detector.analyse_image(
image_copy,
analysis_type=analysis_type_value,
list_of_questions=questions_list,
is_concise_summary=True,
is_concise_answer=True,
)
analysis_dict = analysis_result or {}
except Exception as e:
warnings.warn(f"Image tasks failed: {e}")
image_tasks_result = {"image_tasks_error": str(e)}
# Initialize an empty dictionary
new_analysis_dict = {}
warnings.warn(f"VQA/Image tasks failed: {e}")
analysis_dict = {"image_tasks_error": str(e)}
else:
# detector value is the string name of the chosen detector
identify_function = identify_dict[detector_value]
if detector_value == "TextDetector":
detector_class = identify_function(
image_copy,
accept_privacy=(
setting_privacy_env_var
if setting_privacy_env_var
else "PRIVACY_AMMICO"
),
)
elif detector_value == "EmotionDetector":
detector_class = identify_function(
image_copy,
emotion_threshold=setting_emotion_emotion_threshold,
race_threshold=setting_emotion_race_threshold,
gender_threshold=setting_emotion_gender_threshold,
accept_disclosure=(
setting_emotion_env_var
if setting_emotion_env_var
else "DISCLOSURE_AMMICO"
),
)
elif detector_value == "ColorDetector":
detector_class = identify_function(
image_copy,
delta_e_method=setting_color_delta_e_method,
)
else:
detector_class = identify_function(image_copy)
analysis_dict = detector_class.analyse_image()
new_analysis_dict: Dict[str, Any] = {}
# Iterate over the items in the original dictionary
for k, v in analysis_dict.items():
@ -524,21 +563,15 @@ class AnalysisExplorer:
# Add the new key-value pair to the new dictionary
new_analysis_dict[k] = new_value
if "caption" in image_tasks_result:
new_analysis_dict["caption"] = image_tasks_result.get("caption", "")
if "vqa" in image_tasks_result:
# vqa is expected to be a dict; convert to readable string
vqa_entries = image_tasks_result["vqa"]
if isinstance(vqa_entries, dict):
new_analysis_dict["vqa"] = "; ".join([f"{q}: {a}" for q, a in vqa_entries.items()])
else:
new_analysis_dict["vqa"] = str(vqa_entries)
for err_key in ("caption_error", "vqa_error", "image_tasks_error"):
if err_key in image_tasks_result:
new_analysis_dict[err_key] = image_tasks_result[err_key]
df = pd.DataFrame([new_analysis_dict]).set_index("filename").T
df.index.rename("filename", inplace=True)
return dbc.Table.from_dataframe(
df, striped=True, bordered=True, hover=True, index=True
)
def _show_questions_textarea_on_demand(self, analysis_type_value: str) -> dict:
if analysis_type_value in ("questions", "summary_and_questions"):
return {"display": "block", "width": "100%"}
else:
return {"display": "none"}

Просмотреть файл

@ -16,7 +16,7 @@ class ImageSummaryDetector(AnalysisMethod):
def __init__(
self,
summary_model: MultimodalSummaryModel,
subdict: dict = {},
subdict: Optional[Dict[str, Any]] = None,
) -> None:
"""
Class for analysing images using QWEN-2.5-VL model.
@ -29,6 +29,8 @@ class ImageSummaryDetector(AnalysisMethod):
Returns:
None.
"""
if subdict is None:
subdict = {}
super().__init__(subdict)
self.summary_model = summary_model
@ -148,7 +150,50 @@ class ImageSummaryDetector(AnalysisMethod):
return analysis_type, list_of_questions, is_summary, is_questions
def analyse_images(
def analyse_image(
self,
entry: dict,
analysis_type: Union[str, AnalysisType] = AnalysisType.SUMMARY_AND_QUESTIONS,
list_of_questions: Optional[List[str]] = None,
max_questions_per_image: int = 32,
is_concise_summary: bool = True,
is_concise_answer: bool = True,
) -> Dict[str, Any]:
"""
Analyse a single image entry. Returns dict with keys depending on analysis_type:
- 'caption' (str) if summary requested
- 'vqa' (dict) if questions requested
"""
self.subdict = entry
analysis_type, list_of_questions, is_summary, is_questions = (
self._validate_analysis_type(
analysis_type, list_of_questions, max_questions_per_image
)
)
if is_summary:
try:
caps = self.generate_caption(
entry,
num_return_sequences=1,
is_concise_summary=is_concise_summary,
)
self.subdict["caption"] = caps[0] if caps else ""
except Exception as e:
warnings.warn(f"Caption generation failed: {e}")
if is_questions:
try:
vqa_map = self.answer_questions(
list_of_questions, entry, is_concise_answer
)
self.subdict["vqa"] = vqa_map
except Exception as e:
warnings.warn(f"VQA failed: {e}")
return self.subdict
def analyse_images_from_dict(
self,
analysis_type: Union[AnalysisType, str] = AnalysisType.SUMMARY_AND_QUESTIONS,
list_of_questions: Optional[List[str]] = None,
@ -191,9 +236,7 @@ class ImageSummaryDetector(AnalysisMethod):
)
entry["caption"] = caps[0] if caps else ""
except Exception as e:
warnings.warn(
"Caption generation failed for key %s: %s", key, e
)
warnings.warn(f"Caption generation failed: {e}")
if is_questions:
try:
@ -202,7 +245,7 @@ class ImageSummaryDetector(AnalysisMethod):
)
entry["vqa"] = vqa_map
except Exception as e:
warnings.warn("VQA failed for key %s: %s", key, e)
warnings.warn(f"VQA failed: {e}")
self.subdict[key] = entry
return self.subdict
@ -251,8 +294,7 @@ class ImageSummaryDetector(AnalysisMethod):
)
except RuntimeError as e:
warnings.warn(
"Retry without autocast failed: %s. Attempting cudnn-disabled retry.",
e,
f"Retry without autocast failed: {e}. Attempting cudnn-disabled retry."
)
cudnn_was_enabled = (
torch.backends.cudnn.is_available() and torch.backends.cudnn.enabled