add vqa tests

2025-10-30 05:26:05 +02:00 · 2025-10-13 13:51:24 +02:00 · 2025-10-13 13:51:24 +02:00 · d6e0fbeffe
--- a/ammico/test/test_image_summary.py
+++ b/ammico/test/test_image_summary.py
@ -0,0 +1,37 @@
 from ammico.image_summary import ImageSummaryDetector
 import pytest
@pytest.mark.long
 def test_image_summary_detector(model, get_testdict):
    detector = ImageSummaryDetector(summary_model=model, subdict=get_testdict)
    results = detector.analyse_images_from_dict(analysis_type="summary")
    assert len(results) == 2
    for key in get_testdict.keys():
        assert key in results
        assert "caption" in results[key]
        assert isinstance(results[key]["caption"], str)
        assert len(results[key]["caption"]) > 0
@pytest.mark.long
 def test_image_summary_detector_questions(model, get_testdict):
    list_of_questions = [
        "What is happening in the image?",
        "How many cars are in the image in total?",
    ]
    detector = ImageSummaryDetector(summary_model=model, subdict=get_testdict)
    results = detector.analyse_images_from_dict(
        analysis_type="questions", list_of_questions=list_of_questions
    )
    assert len(results) == 2
    for key in get_testdict.keys():
        assert "vqa" in results[key]
        if key == "IMG_2746":
            assert "marathon" in results[key]["vqa"][0].lower()
        if key == "IMG_2809":
            assert (
                "two" in results[key]["vqa"][1].lower() or "2" in results[key]["vqa"][1]
            )
--- a/ammico/test/test_model.py
+++ b/ammico/test/test_model.py
@ -1,8 +1,8 @@
 import pytest
 import torch
 from ammico.model import MultimodalSummaryModel
@pytest.mark.long
 def test_model_init(model):
    assert model.model is not None
    assert model.processor is not None
@ -10,16 +10,19 @@ def test_model_init(model):
    assert model.device is not None
@pytest.mark.long
 def test_model_invalid_device():
    with pytest.raises(ValueError):
        MultimodalSummaryModel(device="invalid_device")
@pytest.mark.long
 def test_model_invalid_model_id():
    with pytest.raises(ValueError):
        MultimodalSummaryModel(model_id="non_existent_model", device="cpu")
@pytest.mark.long
 def test_free_resources():
    model = MultimodalSummaryModel(device="cpu")
    model.close()
--- a/pyproject.toml
+++ b/pyproject.toml
@ -39,7 +39,7 @@ dependencies = [
    "pandas",
    "Pillow",
    "pooch",
-    "qwen-vl-utils[decord]==0.0.8",
+    "qwen-vl-utils",
    "retina_face",
    "safetensors>=0.6.2",
    "setuptools",