From f1aeeabd18e78cfc5d05faa034c1e72aa80cc2d0 Mon Sep 17 00:00:00 2001 From: Petr Andriushchenko Date: Fri, 31 Mar 2023 13:35:04 +0200 Subject: [PATCH] fixed varible name, uncomment test, exluded it from CI, fixed error in multimodal_search --- .github/workflows/ci.yml | 2 +- misinformation/multimodal_search.py | 40 ++-- misinformation/test/test_multimodal_search.py | 208 +++++++++--------- notebooks/multimodal_search.ipynb | 22 +- 4 files changed, 141 insertions(+), 131 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4c1c8d2..a027012 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -32,7 +32,7 @@ jobs: - name: Run pytest run: | cd misinformation - python -m pytest -m "not gcv" -svv --cov=. --cov-report=xml + python -m pytest -m "not gcv mem_cons" -svv --cov=. --cov-report=xml - name: Upload coverage if: matrix.os == 'ubuntu-22.04' && matrix.python-version == '3.9' uses: codecov/codecov-action@v3 diff --git a/misinformation/multimodal_search.py b/misinformation/multimodal_search.py index 3212d7c..5fbdf95 100644 --- a/misinformation/multimodal_search.py +++ b/misinformation/multimodal_search.py @@ -13,8 +13,6 @@ from IPython.display import display from lavis.models import load_model_and_preprocess, load_model, BlipBase from lavis.processors import load_processor -# from memory_profiler import profile - class MultimodalSearch(AnalysisMethod): def __init__(self, subdict: dict) -> None: @@ -382,7 +380,6 @@ class MultimodalSearch(AnalysisMethod): def compute_gradcam_batch( self, - itm_model_type, model, visual_input, text_input, @@ -456,12 +453,12 @@ class MultimodalSearch(AnalysisMethod): att_map -= att_map.min() att_map /= att_map.max() cmap = plt.get_cmap("jet") - att_mapV = cmap(att_map) - att_mapV = np.delete(att_mapV, 3, 2) + att_mapv = cmap(att_map) + att_mapv = np.delete(att_mapv, 3, 2) if overlap: att_map = ( 1 * (1 - att_map**0.7).reshape(att_map.shape + (1,)) * img - + (att_map**0.7).reshape(att_map.shape + (1,)) * att_mapV + + (att_map**0.7).reshape(att_map.shape + (1,)) * att_mapv ) return att_map @@ -498,7 +495,6 @@ class MultimodalSearch(AnalysisMethod): vis_processor = load_processor("blip_image_eval").build(image_size=384) return itm_model, vis_processor - # @profile def image_text_match_reordering( self, search_query, @@ -518,6 +514,7 @@ class MultimodalSearch(AnalysisMethod): "blip_large": MultimodalSearch.upload_model_blip_large, "blip2_coco": MultimodalSearch.upload_model_blip2_coco, } + itm_model, vis_processor_itm = choose_model[itm_model_type](self) text_processor = load_processor("blip_caption") tokenizer = BlipBase.init_tokenizer() @@ -557,7 +554,6 @@ class MultimodalSearch(AnalysisMethod): if need_grad_cam: gradcam, itm_output = MultimodalSearch.compute_gradcam_batch( self, - itm_model_type, itm_model, images, queries_batch, @@ -618,20 +614,20 @@ class MultimodalSearch(AnalysisMethod): image_gradcam_with_itm[ list(search_query[index_text_query].values())[0] ] = localimage_gradcam_with_itm - del ( - itm_model, - vis_processor_itm, - text_processor, - raw_images, - images, - tokenizer, - queries_batch, - queries_tok_batch, - itm_score, - ) - if need_grad_cam: - del itm_output, gradcam, norm_img, grad_cam, avg_gradcam - torch.cuda.empty_cache() + del ( + itm_model, + vis_processor_itm, + text_processor, + raw_images, + images, + tokenizer, + queries_batch, + queries_tok_batch, + itm_score, + ) + if need_grad_cam: + del itm_output, gradcam, norm_img, grad_cam, avg_gradcam + torch.cuda.empty_cache() return itm_scores2, image_gradcam_with_itm def show_results(self, query, itm=False, image_gradcam_with_itm=False): diff --git a/misinformation/test/test_multimodal_search.py b/misinformation/test/test_multimodal_search.py index ef75364..a72934e 100644 --- a/misinformation/test/test_multimodal_search.py +++ b/misinformation/test/test_multimodal_search.py @@ -447,109 +447,111 @@ def test_parsing_images( cuda.empty_cache() -# def test_itm(): -# test_my_dict = { -# "IMG_2746": { -# "filename": "../misinformation/test/data/IMG_2746.png", -# "rank A bus": 1, -# "A bus": 0.15640679001808167, -# "rank ../misinformation/test/data/IMG_3758.png": 1, -# "../misinformation/test/data/IMG_3758.png": 0.7533495426177979, -# }, -# "IMG_2809": { -# "filename": "../misinformation/test/data/IMG_2809.png", -# "rank A bus": 0, -# "A bus": 0.1970970332622528, -# "rank ../misinformation/test/data/IMG_3758.png": 0, -# "../misinformation/test/data/IMG_3758.png": 0.8907483816146851, -# }, -# } -# search_query3 = [ -# {"text_input": "A bus"}, -# {"image": "../misinformation/test/data/IMG_3758.png"}, -# ] -# image_keys = ["IMG_2746", "IMG_2809"] -# sorted_list = [[1, 0], [1, 0]] -# for itm_model in ["blip_base", "blip_large"]: -# ( -# itm_scores, -# image_gradcam_with_itm, -# ) = ms.MultimodalSearch.image_text_match_reordering( -# test_my_dict, -# search_query3, -# itm_model, -# image_keys, -# sorted_list, -# batch_size=1, -# need_grad_cam=True, -# ) -# for i, itm in zip( -# range(len(dict_itm_scores_for_blib[itm_model])), -# dict_itm_scores_for_blib[itm_model], -# ): -# assert ( -# math.isclose(itm_scores[0].tolist()[i], itm, rel_tol=10 * related_error) -# is True -# ) -# for i, grad_cam in zip( -# range(len(dict_image_gradcam_with_itm_for_blip[itm_model])), -# dict_image_gradcam_with_itm_for_blip[itm_model], -# ): -# assert ( -# math.isclose( -# image_gradcam_with_itm["A bus"]["IMG_2809"][0][0].tolist()[i], -# grad_cam, -# rel_tol=10 * related_error, -# ) -# is True -# ) -# del itm_scores, image_gradcam_with_itm -# cuda.empty_cache() +@pytest.mark.long +def test_itm(): + test_my_dict = { + "IMG_2746": { + "filename": "../misinformation/test/data/IMG_2746.png", + "rank A bus": 1, + "A bus": 0.15640679001808167, + "rank ../misinformation/test/data/IMG_3758.png": 1, + "../misinformation/test/data/IMG_3758.png": 0.7533495426177979, + }, + "IMG_2809": { + "filename": "../misinformation/test/data/IMG_2809.png", + "rank A bus": 0, + "A bus": 0.1970970332622528, + "rank ../misinformation/test/data/IMG_3758.png": 0, + "../misinformation/test/data/IMG_3758.png": 0.8907483816146851, + }, + } + search_query3 = [ + {"text_input": "A bus"}, + {"image": "../misinformation/test/data/IMG_3758.png"}, + ] + image_keys = ["IMG_2746", "IMG_2809"] + sorted_list = [[1, 0], [1, 0]] + for itm_model in ["blip_base", "blip_large"]: + ( + itm_scores, + image_gradcam_with_itm, + ) = ms.MultimodalSearch.image_text_match_reordering( + test_my_dict, + search_query3, + itm_model, + image_keys, + sorted_list, + batch_size=1, + need_grad_cam=True, + ) + for i, itm in zip( + range(len(dict_itm_scores_for_blib[itm_model])), + dict_itm_scores_for_blib[itm_model], + ): + assert ( + math.isclose(itm_scores[0].tolist()[i], itm, rel_tol=10 * related_error) + is True + ) + for i, grad_cam in zip( + range(len(dict_image_gradcam_with_itm_for_blip[itm_model])), + dict_image_gradcam_with_itm_for_blip[itm_model], + ): + assert ( + math.isclose( + image_gradcam_with_itm["A bus"]["IMG_2809"][0][0].tolist()[i], + grad_cam, + rel_tol=10 * related_error, + ) + is True + ) + del itm_scores, image_gradcam_with_itm + cuda.empty_cache() -# def test_itm_blip2_coco(): -# test_my_dict = { -# "IMG_2746": { -# "filename": "../misinformation/test/data/IMG_2746.png", -# "rank A bus": 1, -# "A bus": 0.15640679001808167, -# "rank ../misinformation/test/data/IMG_3758.png": 1, -# "../misinformation/test/data/IMG_3758.png": 0.7533495426177979, -# }, -# "IMG_2809": { -# "filename": "../misinformation/test/data/IMG_2809.png", -# "rank A bus": 0, -# "A bus": 0.1970970332622528, -# "rank ../misinformation/test/data/IMG_3758.png": 0, -# "../misinformation/test/data/IMG_3758.png": 0.8907483816146851, -# }, -# } -# search_query3 = [ -# {"text_input": "A bus"}, -# {"image": "../misinformation/test/data/IMG_3758.png"}, -# ] -# image_keys = ["IMG_2746", "IMG_2809"] -# sorted_list = [[1, 0], [1, 0]] +@pytest.mark.long +def test_itm_blip2_coco(): + test_my_dict = { + "IMG_2746": { + "filename": "../misinformation/test/data/IMG_2746.png", + "rank A bus": 1, + "A bus": 0.15640679001808167, + "rank ../misinformation/test/data/IMG_3758.png": 1, + "../misinformation/test/data/IMG_3758.png": 0.7533495426177979, + }, + "IMG_2809": { + "filename": "../misinformation/test/data/IMG_2809.png", + "rank A bus": 0, + "A bus": 0.1970970332622528, + "rank ../misinformation/test/data/IMG_3758.png": 0, + "../misinformation/test/data/IMG_3758.png": 0.8907483816146851, + }, + } + search_query3 = [ + {"text_input": "A bus"}, + {"image": "../misinformation/test/data/IMG_3758.png"}, + ] + image_keys = ["IMG_2746", "IMG_2809"] + sorted_list = [[1, 0], [1, 0]] -# ( -# itm_scores, -# image_gradcam_with_itm, -# ) = ms.MultimodalSearch.image_text_match_reordering( -# test_my_dict, -# search_query3, -# "blip2_coco", -# image_keys, -# sorted_list, -# batch_size=1, -# need_grad_cam=False, -# ) -# for i, itm in zip( -# range(len(dict_itm_scores_for_blib["blip2_coco"])), -# dict_itm_scores_for_blib["blip2_coco"], -# ): -# assert ( -# math.isclose(itm_scores[0].tolist()[i], itm, rel_tol=10 * related_error) -# is True -# ) -# del itm_scores, image_gradcam_with_itm -# cuda.empty_cache() + ( + itm_scores, + image_gradcam_with_itm, + ) = ms.MultimodalSearch.image_text_match_reordering( + test_my_dict, + search_query3, + "blip2_coco", + image_keys, + sorted_list, + batch_size=1, + need_grad_cam=False, + ) + for i, itm in zip( + range(len(dict_itm_scores_for_blib["blip2_coco"])), + dict_itm_scores_for_blib["blip2_coco"], + ): + assert ( + math.isclose(itm_scores[0].tolist()[i], itm, rel_tol=10 * related_error) + is True + ) + del itm_scores, image_gradcam_with_itm + cuda.empty_cache() diff --git a/notebooks/multimodal_search.ipynb b/notebooks/multimodal_search.ipynb index c49096c..425f886 100644 --- a/notebooks/multimodal_search.ipynb +++ b/notebooks/multimodal_search.ipynb @@ -47,7 +47,7 @@ "outputs": [], "source": [ "images = misinformation.utils.find_files(\n", - " path=\"../data/Image_some_text/\",\n", + " path=\"../data/images/\",\n", " limit=10,\n", ")" ] @@ -64,6 +64,18 @@ "mydict = misinformation.utils.initialize_dict(images)" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "c66aec87-ede7-4985-912e-3ca29245ebf2", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "mydict" + ] + }, { "cell_type": "markdown", "id": "987540a8-d800-4c70-a76b-7bfabaf123fa", @@ -143,7 +155,7 @@ "# ) = ms.MultimodalSearch.parsing_images(\n", "# mydict,\n", "# model_type,\n", - "# path_to_load_tensors=\"./saved_tensors/18_blip_saved_features_image.pt\",\n", + "# path_to_load_tensors=\".5_blip_saved_features_image.pt\",\n", "# )" ] }, @@ -251,7 +263,7 @@ "source": [ "ms.MultimodalSearch.show_results(\n", " mydict,\n", - " search_query3[2],\n", + " search_query3[0],\n", ")" ] }, @@ -405,7 +417,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -419,7 +431,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.16" + "version": "3.9.0" } }, "nbformat": 4,