зеркало из
				https://github.com/ssciwr/AMMICO.git
				synced 2025-10-31 05:56:05 +02:00 
			
		
		
		
	
		
			
				
	
	
		
			1828 строки
		
	
	
		
			60 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
			
		
		
	
	
			1828 строки
		
	
	
		
			60 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
| {
 | |
|  "cells": [
 | |
|   {
 | |
|    "attachments": {},
 | |
|    "cell_type": "markdown",
 | |
|    "id": "22df2297-0629-45aa-b88c-6c61f1544db6",
 | |
|    "metadata": {},
 | |
|    "source": [
 | |
|     "# Image Multimodal Search"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "attachments": {},
 | |
|    "cell_type": "markdown",
 | |
|    "id": "9eeeb302-296e-48dc-86c7-254aa02f2b3a",
 | |
|    "metadata": {},
 | |
|    "source": [
 | |
|     "This notebooks shows how to carry out an image multimodal search with the [LAVIS](https://github.com/salesforce/LAVIS) library. \n",
 | |
|     "\n",
 | |
|     "The first cell is only run on google colab and installs the [ammico](https://github.com/ssciwr/AMMICO) package.\n",
 | |
|     "\n",
 | |
|     "After that, we can import `ammico` and read in the files given a folder path."
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 1,
 | |
|    "id": "0b0a6bdf",
 | |
|    "metadata": {
 | |
|     "execution": {
 | |
|      "iopub.execute_input": "2023-09-04T04:50:39.974583Z",
 | |
|      "iopub.status.busy": "2023-09-04T04:50:39.974111Z",
 | |
|      "iopub.status.idle": "2023-09-04T04:50:39.984394Z",
 | |
|      "shell.execute_reply": "2023-09-04T04:50:39.983616Z"
 | |
|     }
 | |
|    },
 | |
|    "outputs": [],
 | |
|    "source": [
 | |
|     "# if running on google colab\n",
 | |
|     "# flake8-noqa-cell\n",
 | |
|     "import os\n",
 | |
|     "\n",
 | |
|     "if \"google.colab\" in str(get_ipython()):\n",
 | |
|     "    # update python version\n",
 | |
|     "    # install setuptools\n",
 | |
|     "    # %pip install setuptools==61 -qqq\n",
 | |
|     "    # install ammico\n",
 | |
|     "    %pip install git+https://github.com/ssciwr/ammico.git -qqq\n",
 | |
|     "    # mount google drive for data and API key\n",
 | |
|     "    from google.colab import drive\n",
 | |
|     "\n",
 | |
|     "    drive.mount(\"/content/drive\")"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 2,
 | |
|    "id": "f10ad6c9-b1a0-4043-8c5d-ed660d77be37",
 | |
|    "metadata": {
 | |
|     "execution": {
 | |
|      "iopub.execute_input": "2023-09-04T04:50:39.988065Z",
 | |
|      "iopub.status.busy": "2023-09-04T04:50:39.987571Z",
 | |
|      "iopub.status.idle": "2023-09-04T04:50:53.092242Z",
 | |
|      "shell.execute_reply": "2023-09-04T04:50:53.091369Z"
 | |
|     },
 | |
|     "tags": []
 | |
|    },
 | |
|    "outputs": [],
 | |
|    "source": [
 | |
|     "import ammico.utils as mutils\n",
 | |
|     "import ammico.multimodal_search as ms"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 3,
 | |
|    "id": "8d3fe589-ff3c-4575-b8f5-650db85596bc",
 | |
|    "metadata": {
 | |
|     "execution": {
 | |
|      "iopub.execute_input": "2023-09-04T04:50:53.097229Z",
 | |
|      "iopub.status.busy": "2023-09-04T04:50:53.096405Z",
 | |
|      "iopub.status.idle": "2023-09-04T04:50:53.103333Z",
 | |
|      "shell.execute_reply": "2023-09-04T04:50:53.102563Z"
 | |
|     },
 | |
|     "tags": []
 | |
|    },
 | |
|    "outputs": [],
 | |
|    "source": [
 | |
|     "images = mutils.find_files(\n",
 | |
|     "    path=\"data/\",\n",
 | |
|     "    limit=10,\n",
 | |
|     ")"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 4,
 | |
|    "id": "a08bd3a9-e954-4a0e-ad64-6817abd3a25a",
 | |
|    "metadata": {
 | |
|     "execution": {
 | |
|      "iopub.execute_input": "2023-09-04T04:50:53.106982Z",
 | |
|      "iopub.status.busy": "2023-09-04T04:50:53.106447Z",
 | |
|      "iopub.status.idle": "2023-09-04T04:50:53.114665Z",
 | |
|      "shell.execute_reply": "2023-09-04T04:50:53.113863Z"
 | |
|     }
 | |
|    },
 | |
|    "outputs": [
 | |
|     {
 | |
|      "data": {
 | |
|       "text/plain": [
 | |
|        "{'102141_2_eng': {'filename': 'data/102141_2_eng.png'},\n",
 | |
|        " '106349S_por': {'filename': 'data/106349S_por.png'},\n",
 | |
|        " '102730_eng': {'filename': 'data/102730_eng.png'}}"
 | |
|       ]
 | |
|      },
 | |
|      "execution_count": 4,
 | |
|      "metadata": {},
 | |
|      "output_type": "execute_result"
 | |
|     }
 | |
|    ],
 | |
|    "source": [
 | |
|     "images"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 5,
 | |
|    "id": "adf3db21-1f8b-4d44-bbef-ef0acf4623a0",
 | |
|    "metadata": {
 | |
|     "execution": {
 | |
|      "iopub.execute_input": "2023-09-04T04:50:53.119582Z",
 | |
|      "iopub.status.busy": "2023-09-04T04:50:53.119086Z",
 | |
|      "iopub.status.idle": "2023-09-04T04:50:53.122878Z",
 | |
|      "shell.execute_reply": "2023-09-04T04:50:53.122062Z"
 | |
|     },
 | |
|     "tags": []
 | |
|    },
 | |
|    "outputs": [],
 | |
|    "source": [
 | |
|     "mydict = mutils.initialize_dict(images)"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 6,
 | |
|    "id": "4c091f95-07cf-42c3-82c8-5f3a3c5929f8",
 | |
|    "metadata": {
 | |
|     "execution": {
 | |
|      "iopub.execute_input": "2023-09-04T04:50:53.126811Z",
 | |
|      "iopub.status.busy": "2023-09-04T04:50:53.126211Z",
 | |
|      "iopub.status.idle": "2023-09-04T04:50:53.131303Z",
 | |
|      "shell.execute_reply": "2023-09-04T04:50:53.130482Z"
 | |
|     }
 | |
|    },
 | |
|    "outputs": [
 | |
|     {
 | |
|      "data": {
 | |
|       "text/plain": [
 | |
|        "{'102141_2_eng': {'filename': '102141_2_eng'},\n",
 | |
|        " '106349S_por': {'filename': '106349S_por'},\n",
 | |
|        " '102730_eng': {'filename': '102730_eng'}}"
 | |
|       ]
 | |
|      },
 | |
|      "execution_count": 6,
 | |
|      "metadata": {},
 | |
|      "output_type": "execute_result"
 | |
|     }
 | |
|    ],
 | |
|    "source": [
 | |
|     "mydict"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "attachments": {},
 | |
|    "cell_type": "markdown",
 | |
|    "id": "987540a8-d800-4c70-a76b-7bfabaf123fa",
 | |
|    "metadata": {},
 | |
|    "source": [
 | |
|     "## Indexing and extracting features from images in selected folder"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "attachments": {},
 | |
|    "cell_type": "markdown",
 | |
|    "id": "66d6ede4-00bc-4aeb-9a36-e52d7de33fe5",
 | |
|    "metadata": {},
 | |
|    "source": [
 | |
|     "First you need to select a model. You can choose one of the following models: \n",
 | |
|     "- [blip](https://github.com/salesforce/BLIP)\n",
 | |
|     "- [blip2](https://huggingface.co/docs/transformers/main/model_doc/blip-2) \n",
 | |
|     "- [albef](https://github.com/salesforce/ALBEF) \n",
 | |
|     "- [clip_base](https://github.com/openai/CLIP/blob/main/model-card.md)\n",
 | |
|     "- [clip_vitl14](https://github.com/mlfoundations/open_clip) \n",
 | |
|     "- [clip_vitl14_336](https://github.com/mlfoundations/open_clip)"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 7,
 | |
|    "id": "7bbca1f0-d4b0-43cd-8e05-ee39d37c328e",
 | |
|    "metadata": {
 | |
|     "execution": {
 | |
|      "iopub.execute_input": "2023-09-04T04:50:53.135942Z",
 | |
|      "iopub.status.busy": "2023-09-04T04:50:53.135446Z",
 | |
|      "iopub.status.idle": "2023-09-04T04:50:53.138921Z",
 | |
|      "shell.execute_reply": "2023-09-04T04:50:53.138149Z"
 | |
|     },
 | |
|     "tags": []
 | |
|    },
 | |
|    "outputs": [],
 | |
|    "source": [
 | |
|     "model_type = \"blip\"\n",
 | |
|     "# model_type = \"blip2\"\n",
 | |
|     "# model_type = \"albef\"\n",
 | |
|     "# model_type = \"clip_base\"\n",
 | |
|     "# model_type = \"clip_vitl14\"\n",
 | |
|     "# model_type = \"clip_vitl14_336\""
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "attachments": {},
 | |
|    "cell_type": "markdown",
 | |
|    "id": "357828c9",
 | |
|    "metadata": {},
 | |
|    "source": [
 | |
|     "To process the loaded images using the selected model, use the below code:"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 8,
 | |
|    "id": "f6f2c9b1-4a91-47cb-86b5-2c9c67e4837b",
 | |
|    "metadata": {
 | |
|     "execution": {
 | |
|      "iopub.execute_input": "2023-09-04T04:50:53.142524Z",
 | |
|      "iopub.status.busy": "2023-09-04T04:50:53.142129Z",
 | |
|      "iopub.status.idle": "2023-09-04T04:50:53.145748Z",
 | |
|      "shell.execute_reply": "2023-09-04T04:50:53.144946Z"
 | |
|     }
 | |
|    },
 | |
|    "outputs": [],
 | |
|    "source": [
 | |
|     "my_obj = ms.MultimodalSearch(mydict)"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 9,
 | |
|    "id": "16603ded-078e-4362-847b-57ad76829327",
 | |
|    "metadata": {
 | |
|     "execution": {
 | |
|      "iopub.execute_input": "2023-09-04T04:50:53.149358Z",
 | |
|      "iopub.status.busy": "2023-09-04T04:50:53.148936Z",
 | |
|      "iopub.status.idle": "2023-09-04T04:50:53.153966Z",
 | |
|      "shell.execute_reply": "2023-09-04T04:50:53.153236Z"
 | |
|     }
 | |
|    },
 | |
|    "outputs": [
 | |
|     {
 | |
|      "data": {
 | |
|       "text/plain": [
 | |
|        "{'102141_2_eng': {'filename': '102141_2_eng'},\n",
 | |
|        " '106349S_por': {'filename': '106349S_por'},\n",
 | |
|        " '102730_eng': {'filename': '102730_eng'}}"
 | |
|       ]
 | |
|      },
 | |
|      "execution_count": 9,
 | |
|      "metadata": {},
 | |
|      "output_type": "execute_result"
 | |
|     }
 | |
|    ],
 | |
|    "source": [
 | |
|     "my_obj.subdict"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 10,
 | |
|    "id": "ca095404-57d0-4f5d-aeb0-38c232252b17",
 | |
|    "metadata": {
 | |
|     "execution": {
 | |
|      "iopub.execute_input": "2023-09-04T04:50:53.158225Z",
 | |
|      "iopub.status.busy": "2023-09-04T04:50:53.157803Z",
 | |
|      "iopub.status.idle": "2023-09-04T04:51:24.013974Z",
 | |
|      "shell.execute_reply": "2023-09-04T04:51:24.012584Z"
 | |
|     },
 | |
|     "tags": []
 | |
|    },
 | |
|    "outputs": [
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       "Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       "Downloading (…)solve/main/vocab.txt: 100%|██████████| 232k/232k [00:00<00:00, 3.28MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\n"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       "Downloading (…)okenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       "Downloading (…)okenizer_config.json: 100%|██████████| 28.0/28.0 [00:00<00:00, 3.46kB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\n"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       "Downloading (…)lve/main/config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       "Downloading (…)lve/main/config.json: 100%|██████████| 570/570 [00:00<00:00, 195kB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\n"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       "  0%|          | 0.00/1.97G [00:00<?, ?B/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       "  0%|          | 4.01M/1.97G [00:00<01:20, 26.3MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       "  1%|          | 16.0M/1.97G [00:00<00:47, 43.8MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       "  1%|▏         | 26.1M/1.97G [00:00<00:33, 61.6MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       "  2%|▏         | 43.6M/1.97G [00:00<00:21, 97.1MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       "  3%|▎         | 62.7M/1.97G [00:00<00:16, 127MB/s] "
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       "  4%|▍         | 76.7M/1.97G [00:00<00:16, 120MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       "  4%|▍         | 89.3M/1.97G [00:01<00:21, 92.2MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       "  6%|▌         | 111M/1.97G [00:01<00:16, 124MB/s]  "
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       "  6%|▋         | 130M/1.97G [00:01<00:13, 142MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       "  7%|▋         | 149M/1.97G [00:01<00:12, 157MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       "  8%|▊         | 168M/1.97G [00:01<00:11, 168MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       "  9%|▉         | 192M/1.97G [00:01<00:10, 190MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 11%|█         | 215M/1.97G [00:01<00:09, 205MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 12%|█▏        | 235M/1.97G [00:02<00:18, 100MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 12%|█▏        | 251M/1.97G [00:03<00:43, 42.5MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 14%|█▎        | 274M/1.97G [00:03<00:30, 59.1MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 15%|█▍        | 297M/1.97G [00:03<00:22, 79.0MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 16%|█▌        | 319M/1.97G [00:03<00:18, 98.8MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 17%|█▋        | 338M/1.97G [00:03<00:19, 91.6MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 18%|█▊        | 359M/1.97G [00:03<00:15, 112MB/s] "
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 19%|█▉        | 381M/1.97G [00:03<00:12, 134MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 20%|█▉        | 400M/1.97G [00:04<00:13, 129MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 21%|██        | 422M/1.97G [00:04<00:11, 150MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 22%|██▏       | 440M/1.97G [00:05<00:27, 59.5MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 23%|██▎       | 462M/1.97G [00:05<00:20, 78.1MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 24%|██▍       | 487M/1.97G [00:05<00:15, 102MB/s] "
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 25%|██▌       | 510M/1.97G [00:05<00:12, 125MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 26%|██▋       | 530M/1.97G [00:05<00:12, 129MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 27%|██▋       | 551M/1.97G [00:05<00:10, 146MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 28%|██▊       | 573M/1.97G [00:05<00:09, 166MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 30%|██▉       | 596M/1.97G [00:05<00:08, 183MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 31%|███       | 619M/1.97G [00:05<00:07, 196MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 32%|███▏      | 640M/1.97G [00:06<00:07, 202MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 33%|███▎      | 662M/1.97G [00:06<00:06, 211MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 34%|███▍      | 684M/1.97G [00:06<00:06, 215MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 35%|███▌      | 706M/1.97G [00:06<00:06, 221MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 36%|███▌      | 728M/1.97G [00:06<00:08, 162MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 37%|███▋      | 750M/1.97G [00:06<00:07, 177MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 38%|███▊      | 769M/1.97G [00:06<00:11, 117MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 39%|███▉      | 791M/1.97G [00:07<00:09, 137MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 40%|████      | 812M/1.97G [00:07<00:08, 156MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 41%|████      | 832M/1.97G [00:07<00:08, 150MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 42%|████▏     | 852M/1.97G [00:07<00:07, 164MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 43%|████▎     | 874M/1.97G [00:07<00:06, 179MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 44%|████▍     | 893M/1.97G [00:08<00:13, 84.7MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 45%|████▌     | 915M/1.97G [00:08<00:10, 106MB/s] "
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 46%|████▋     | 936M/1.97G [00:08<00:08, 126MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 48%|████▊     | 959M/1.97G [00:08<00:07, 148MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 49%|████▊     | 981M/1.97G [00:08<00:06, 166MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 50%|████▉     | 0.98G/1.97G [00:08<00:05, 179MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 51%|█████     | 1.00G/1.97G [00:08<00:05, 191MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 52%|█████▏    | 1.02G/1.97G [00:08<00:07, 138MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 53%|█████▎    | 1.04G/1.97G [00:09<00:07, 127MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 54%|█████▎    | 1.06G/1.97G [00:09<00:06, 146MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 55%|█████▍    | 1.08G/1.97G [00:09<00:05, 162MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 56%|█████▌    | 1.09G/1.97G [00:09<00:12, 77.5MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 57%|█████▋    | 1.12G/1.97G [00:09<00:09, 99.8MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 58%|█████▊    | 1.14G/1.97G [00:10<00:07, 123MB/s] "
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 59%|█████▉    | 1.16G/1.97G [00:10<00:06, 145MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 60%|█████▉    | 1.18G/1.97G [00:10<00:05, 161MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 61%|██████    | 1.20G/1.97G [00:10<00:04, 173MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 62%|██████▏   | 1.22G/1.97G [00:10<00:04, 185MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 63%|██████▎   | 1.24G/1.97G [00:10<00:05, 146MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 64%|██████▍   | 1.26G/1.97G [00:10<00:04, 165MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 65%|██████▌   | 1.28G/1.97G [00:10<00:04, 165MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 66%|██████▌   | 1.30G/1.97G [00:11<00:04, 179MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 67%|██████▋   | 1.32G/1.97G [00:11<00:03, 188MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 68%|██████▊   | 1.34G/1.97G [00:11<00:03, 196MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 69%|██████▉   | 1.36G/1.97G [00:11<00:03, 208MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 70%|███████   | 1.39G/1.97G [00:11<00:02, 217MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 71%|███████▏  | 1.41G/1.97G [00:11<00:03, 192MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 73%|███████▎  | 1.43G/1.97G [00:11<00:02, 201MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 74%|███████▎  | 1.45G/1.97G [00:11<00:02, 211MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 75%|███████▍  | 1.48G/1.97G [00:11<00:02, 222MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 76%|███████▌  | 1.50G/1.97G [00:12<00:02, 208MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 77%|███████▋  | 1.52G/1.97G [00:12<00:02, 219MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 78%|███████▊  | 1.54G/1.97G [00:12<00:02, 229MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 79%|███████▉  | 1.57G/1.97G [00:12<00:01, 234MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 81%|████████  | 1.59G/1.97G [00:12<00:01, 240MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 82%|████████▏ | 1.61G/1.97G [00:12<00:01, 242MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 83%|████████▎ | 1.64G/1.97G [00:12<00:01, 242MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 84%|████████▍ | 1.66G/1.97G [00:12<00:01, 244MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 85%|████████▌ | 1.68G/1.97G [00:12<00:01, 205MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 86%|████████▋ | 1.70G/1.97G [00:13<00:01, 186MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 87%|████████▋ | 1.72G/1.97G [00:13<00:01, 147MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 88%|████████▊ | 1.74G/1.97G [00:13<00:01, 170MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 90%|████████▉ | 1.77G/1.97G [00:13<00:01, 187MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 91%|█████████ | 1.79G/1.97G [00:13<00:00, 202MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 92%|█████████▏| 1.81G/1.97G [00:13<00:00, 214MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 93%|█████████▎| 1.83G/1.97G [00:13<00:00, 221MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 94%|█████████▍| 1.86G/1.97G [00:13<00:00, 227MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 95%|█████████▌| 1.88G/1.97G [00:14<00:00, 148MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 96%|█████████▋| 1.90G/1.97G [00:14<00:00, 165MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 98%|█████████▊| 1.92G/1.97G [00:14<00:00, 183MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       " 99%|█████████▊| 1.94G/1.97G [00:14<00:00, 169MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       "100%|█████████▉| 1.96G/1.97G [00:14<00:00, 186MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\r",
 | |
|       "100%|██████████| 1.97G/1.97G [00:14<00:00, 145MB/s]"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "name": "stderr",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "\n"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "ename": "FileNotFoundError",
 | |
|      "evalue": "[Errno 2] No such file or directory: '102141_2_eng'",
 | |
|      "output_type": "error",
 | |
|      "traceback": [
 | |
|       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
 | |
|       "\u001b[0;31mFileNotFoundError\u001b[0m                         Traceback (most recent call last)",
 | |
|       "Cell \u001b[0;32mIn[10], line 8\u001b[0m\n\u001b[1;32m      1\u001b[0m (\n\u001b[1;32m      2\u001b[0m     model,\n\u001b[1;32m      3\u001b[0m     vis_processors,\n\u001b[1;32m      4\u001b[0m     txt_processors,\n\u001b[1;32m      5\u001b[0m     image_keys,\n\u001b[1;32m      6\u001b[0m     image_names,\n\u001b[1;32m      7\u001b[0m     features_image_stacked,\n\u001b[0;32m----> 8\u001b[0m ) \u001b[38;5;241m=\u001b[39m \u001b[43mmy_obj\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mparsing_images\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m      9\u001b[0m \u001b[43m    \u001b[49m\u001b[43mmodel_type\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\n\u001b[1;32m     10\u001b[0m \u001b[43m    \u001b[49m\u001b[43mpath_to_save_tensors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mdata/\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m     11\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n",
 | |
|       "File \u001b[0;32m~/work/AMMICO/AMMICO/ammico/multimodal_search.py:371\u001b[0m, in \u001b[0;36mMultimodalSearch.parsing_images\u001b[0;34m(self, model_type, path_to_save_tensors, path_to_load_tensors)\u001b[0m\n\u001b[1;32m    366\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m    367\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mSyntaxError\u001b[39;00m(\n\u001b[1;32m    368\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mPlease, use one of the following models: blip2, blip, albef, clip_base, clip_vitl14, clip_vitl14_336\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    369\u001b[0m     )\n\u001b[0;32m--> 371\u001b[0m _, images_tensors \u001b[38;5;241m=\u001b[39m \u001b[43mMultimodalSearch\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread_and_process_images\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    372\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mimage_names\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mvis_processors\u001b[49m\n\u001b[1;32m    373\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    374\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m path_to_load_tensors \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m    375\u001b[0m     \u001b[38;5;28;01mwith\u001b[39;00m torch\u001b[38;5;241m.\u001b[39mno_grad():\n",
 | |
|       "File \u001b[0;32m~/work/AMMICO/AMMICO/ammico/multimodal_search.py:168\u001b[0m, in \u001b[0;36mMultimodalSearch.read_and_process_images\u001b[0;34m(self, image_paths, vis_processor)\u001b[0m\n\u001b[1;32m    156\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mread_and_process_images\u001b[39m(\u001b[38;5;28mself\u001b[39m, image_paths: \u001b[38;5;28mlist\u001b[39m, vis_processor) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28mtuple\u001b[39m:\n\u001b[1;32m    157\u001b[0m \u001b[38;5;250m    \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m    158\u001b[0m \u001b[38;5;124;03m    Read and process images with vis_processor.\u001b[39;00m\n\u001b[1;32m    159\u001b[0m \n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    166\u001b[0m \u001b[38;5;124;03m        images_tensors (torch.Tensor): tensors of images stacked in device.\u001b[39;00m\n\u001b[1;32m    167\u001b[0m \u001b[38;5;124;03m    \"\"\"\u001b[39;00m\n\u001b[0;32m--> 168\u001b[0m     raw_images \u001b[38;5;241m=\u001b[39m [MultimodalSearch\u001b[38;5;241m.\u001b[39mread_img(\u001b[38;5;28mself\u001b[39m, path) \u001b[38;5;28;01mfor\u001b[39;00m path \u001b[38;5;129;01min\u001b[39;00m image_paths]\n\u001b[1;32m    169\u001b[0m     images \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m    170\u001b[0m         vis_processor[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124meval\u001b[39m\u001b[38;5;124m\"\u001b[39m](r_img)\n\u001b[1;32m    171\u001b[0m         \u001b[38;5;241m.\u001b[39munsqueeze(\u001b[38;5;241m0\u001b[39m)\n\u001b[1;32m    172\u001b[0m         \u001b[38;5;241m.\u001b[39mto(MultimodalSearch\u001b[38;5;241m.\u001b[39mmultimodal_device)\n\u001b[1;32m    173\u001b[0m         \u001b[38;5;28;01mfor\u001b[39;00m r_img \u001b[38;5;129;01min\u001b[39;00m raw_images\n\u001b[1;32m    174\u001b[0m     ]\n\u001b[1;32m    175\u001b[0m     images_tensors \u001b[38;5;241m=\u001b[39m torch\u001b[38;5;241m.\u001b[39mstack(images)\n",
 | |
|       "File \u001b[0;32m~/work/AMMICO/AMMICO/ammico/multimodal_search.py:168\u001b[0m, in \u001b[0;36m<listcomp>\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m    156\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mread_and_process_images\u001b[39m(\u001b[38;5;28mself\u001b[39m, image_paths: \u001b[38;5;28mlist\u001b[39m, vis_processor) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28mtuple\u001b[39m:\n\u001b[1;32m    157\u001b[0m \u001b[38;5;250m    \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m    158\u001b[0m \u001b[38;5;124;03m    Read and process images with vis_processor.\u001b[39;00m\n\u001b[1;32m    159\u001b[0m \n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    166\u001b[0m \u001b[38;5;124;03m        images_tensors (torch.Tensor): tensors of images stacked in device.\u001b[39;00m\n\u001b[1;32m    167\u001b[0m \u001b[38;5;124;03m    \"\"\"\u001b[39;00m\n\u001b[0;32m--> 168\u001b[0m     raw_images \u001b[38;5;241m=\u001b[39m [\u001b[43mMultimodalSearch\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread_img\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpath\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mfor\u001b[39;00m path \u001b[38;5;129;01min\u001b[39;00m image_paths]\n\u001b[1;32m    169\u001b[0m     images \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m    170\u001b[0m         vis_processor[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124meval\u001b[39m\u001b[38;5;124m\"\u001b[39m](r_img)\n\u001b[1;32m    171\u001b[0m         \u001b[38;5;241m.\u001b[39munsqueeze(\u001b[38;5;241m0\u001b[39m)\n\u001b[1;32m    172\u001b[0m         \u001b[38;5;241m.\u001b[39mto(MultimodalSearch\u001b[38;5;241m.\u001b[39mmultimodal_device)\n\u001b[1;32m    173\u001b[0m         \u001b[38;5;28;01mfor\u001b[39;00m r_img \u001b[38;5;129;01min\u001b[39;00m raw_images\n\u001b[1;32m    174\u001b[0m     ]\n\u001b[1;32m    175\u001b[0m     images_tensors \u001b[38;5;241m=\u001b[39m torch\u001b[38;5;241m.\u001b[39mstack(images)\n",
 | |
|       "File \u001b[0;32m~/work/AMMICO/AMMICO/ammico/multimodal_search.py:153\u001b[0m, in \u001b[0;36mMultimodalSearch.read_img\u001b[0;34m(self, filepath)\u001b[0m\n\u001b[1;32m    143\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mread_img\u001b[39m(\u001b[38;5;28mself\u001b[39m, filepath: \u001b[38;5;28mstr\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Image:\n\u001b[1;32m    144\u001b[0m \u001b[38;5;250m    \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m    145\u001b[0m \u001b[38;5;124;03m    Load Image from filepath.\u001b[39;00m\n\u001b[1;32m    146\u001b[0m \n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    151\u001b[0m \u001b[38;5;124;03m        raw_image (PIL.Image): image.\u001b[39;00m\n\u001b[1;32m    152\u001b[0m \u001b[38;5;124;03m    \"\"\"\u001b[39;00m\n\u001b[0;32m--> 153\u001b[0m     raw_image \u001b[38;5;241m=\u001b[39m \u001b[43mImage\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mopen\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfilepath\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39mconvert(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mRGB\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m    154\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m raw_image\n",
 | |
|       "File \u001b[0;32m/opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/site-packages/PIL/Image.py:3236\u001b[0m, in \u001b[0;36mopen\u001b[0;34m(fp, mode, formats)\u001b[0m\n\u001b[1;32m   3233\u001b[0m     filename \u001b[38;5;241m=\u001b[39m fp\n\u001b[1;32m   3235\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m filename:\n\u001b[0;32m-> 3236\u001b[0m     fp \u001b[38;5;241m=\u001b[39m \u001b[43mbuiltins\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mopen\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfilename\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mrb\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m   3237\u001b[0m     exclusive_fp \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[1;32m   3239\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n",
 | |
|       "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: '102141_2_eng'"
 | |
|      ]
 | |
|     }
 | |
|    ],
 | |
|    "source": [
 | |
|     "(\n",
 | |
|     "    model,\n",
 | |
|     "    vis_processors,\n",
 | |
|     "    txt_processors,\n",
 | |
|     "    image_keys,\n",
 | |
|     "    image_names,\n",
 | |
|     "    features_image_stacked,\n",
 | |
|     ") = my_obj.parsing_images(\n",
 | |
|     "    model_type, \n",
 | |
|     "    path_to_save_tensors=\"data/\",\n",
 | |
|     "    )"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 11,
 | |
|    "id": "f236c3b1-c3a6-471a-9fc5-ef831b675286",
 | |
|    "metadata": {
 | |
|     "execution": {
 | |
|      "iopub.execute_input": "2023-09-04T04:51:24.019545Z",
 | |
|      "iopub.status.busy": "2023-09-04T04:51:24.018761Z",
 | |
|      "iopub.status.idle": "2023-09-04T04:51:24.067851Z",
 | |
|      "shell.execute_reply": "2023-09-04T04:51:24.066943Z"
 | |
|     }
 | |
|    },
 | |
|    "outputs": [
 | |
|     {
 | |
|      "ename": "NameError",
 | |
|      "evalue": "name 'features_image_stacked' is not defined",
 | |
|      "output_type": "error",
 | |
|      "traceback": [
 | |
|       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
 | |
|       "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
 | |
|       "Cell \u001b[0;32mIn[11], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mfeatures_image_stacked\u001b[49m\n",
 | |
|       "\u001b[0;31mNameError\u001b[0m: name 'features_image_stacked' is not defined"
 | |
|      ]
 | |
|     }
 | |
|    ],
 | |
|    "source": [
 | |
|     "features_image_stacked"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "attachments": {},
 | |
|    "cell_type": "markdown",
 | |
|    "id": "9ff8a894-566b-4c4f-acca-21c50b5b1f52",
 | |
|    "metadata": {},
 | |
|    "source": [
 | |
|     "The images are then processed and stored in a numerical representation, a tensor. These tensors do not change for the same image and same model - so if you run this analysis once, and save the tensors giving a path with the keyword `path_to_save_tensors`, a file with filename `.<Number_of_images>_<model_name>_saved_features_image.pt` will be placed there.\n",
 | |
|     "\n",
 | |
|     "This will save you a lot of time if you want to analyse same images with the same model but different questions. To run using the saved tensors, execute the below code giving the path and name of the tensor file."
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 12,
 | |
|    "id": "56c6d488-f093-4661-835a-5c73a329c874",
 | |
|    "metadata": {
 | |
|     "execution": {
 | |
|      "iopub.execute_input": "2023-09-04T04:51:24.071978Z",
 | |
|      "iopub.status.busy": "2023-09-04T04:51:24.071413Z",
 | |
|      "iopub.status.idle": "2023-09-04T04:51:24.076673Z",
 | |
|      "shell.execute_reply": "2023-09-04T04:51:24.075906Z"
 | |
|     },
 | |
|     "tags": []
 | |
|    },
 | |
|    "outputs": [],
 | |
|    "source": [
 | |
|     "# (\n",
 | |
|     "#     model,\n",
 | |
|     "#     vis_processors,\n",
 | |
|     "#     txt_processors,\n",
 | |
|     "#     image_keys,\n",
 | |
|     "#     image_names,\n",
 | |
|     "#     features_image_stacked,\n",
 | |
|     "# ) = my_obj.parsing_images(\n",
 | |
|     "#     model_type,\n",
 | |
|     "#     path_to_load_tensors=\"/content/drive/MyDrive/misinformation-data/5_clip_base_saved_features_image.pt\",\n",
 | |
|     "# )"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "attachments": {},
 | |
|    "cell_type": "markdown",
 | |
|    "id": "309923c1-d6f8-4424-8fca-bde5f3a98b38",
 | |
|    "metadata": {},
 | |
|    "source": [
 | |
|     "Here we already processed our image folder with 5 images and the `clip_base` model. So you need just to write the name `5_clip_base_saved_features_image.pt` of the saved file that consists of tensors of all images as keyword argument for `path_to_load_tensors`. "
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "attachments": {},
 | |
|    "cell_type": "markdown",
 | |
|    "id": "162a52e8-6652-4897-b92e-645cab07aaef",
 | |
|    "metadata": {},
 | |
|    "source": [
 | |
|     "## Formulate your search queries\n",
 | |
|     "\n",
 | |
|     "Next, you need to form search queries. You can search either by image or by text. You can search for a single query, or you can search for several queries at once, the computational time should not be much different. The format of the queries is as follows:"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 13,
 | |
|    "id": "c4196a52-d01e-42e4-8674-5712f7d6f792",
 | |
|    "metadata": {
 | |
|     "execution": {
 | |
|      "iopub.execute_input": "2023-09-04T04:51:24.080378Z",
 | |
|      "iopub.status.busy": "2023-09-04T04:51:24.079877Z",
 | |
|      "iopub.status.idle": "2023-09-04T04:51:24.089664Z",
 | |
|      "shell.execute_reply": "2023-09-04T04:51:24.088912Z"
 | |
|     },
 | |
|     "tags": []
 | |
|    },
 | |
|    "outputs": [],
 | |
|    "source": [
 | |
|     "search_query3 = [\n",
 | |
|     "    {\"text_input\": \"politician press conference\"},\n",
 | |
|     "    {\"text_input\": \"a world map\"},\n",
 | |
|     "    {\"text_input\": \"a dog\"},\n",
 | |
|     "]"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "attachments": {},
 | |
|    "cell_type": "markdown",
 | |
|    "id": "8bcf3127-3dfd-4ff4-b9e7-a043099b1418",
 | |
|    "metadata": {},
 | |
|    "source": [
 | |
|     "You can filter your results in 3 different ways:\n",
 | |
|     "- `filter_number_of_images` limits the number of images found. That is, if the parameter `filter_number_of_images = 10`, then the first 10 images that best match the query will be shown. The other images ranks will be set to `None` and the similarity value to `0`.\n",
 | |
|     "- `filter_val_limit` limits the output of images with a similarity value not bigger than `filter_val_limit`. That is, if the parameter `filter_val_limit = 0.2`, all images with similarity less than 0.2 will be discarded.\n",
 | |
|     "- `filter_rel_error` (percentage) limits the output of images with a similarity value not bigger than `100 * abs(current_simularity_value - best_simularity_value_in_current_search)/best_simularity_value_in_current_search < filter_rel_error`. That is, if we set filter_rel_error = 30, it means that if the top1 image have 0.5 similarity value, we discard all image with similarity less than 0.35."
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 14,
 | |
|    "id": "7f7dc52f-7ee9-4590-96b7-e0d9d3b82378",
 | |
|    "metadata": {
 | |
|     "execution": {
 | |
|      "iopub.execute_input": "2023-09-04T04:51:24.093645Z",
 | |
|      "iopub.status.busy": "2023-09-04T04:51:24.092995Z",
 | |
|      "iopub.status.idle": "2023-09-04T04:51:24.141808Z",
 | |
|      "shell.execute_reply": "2023-09-04T04:51:24.140906Z"
 | |
|     },
 | |
|     "tags": []
 | |
|    },
 | |
|    "outputs": [
 | |
|     {
 | |
|      "ename": "NameError",
 | |
|      "evalue": "name 'model' is not defined",
 | |
|      "output_type": "error",
 | |
|      "traceback": [
 | |
|       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
 | |
|       "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
 | |
|       "Cell \u001b[0;32mIn[14], line 2\u001b[0m\n\u001b[1;32m      1\u001b[0m similarity, sorted_lists \u001b[38;5;241m=\u001b[39m my_obj\u001b[38;5;241m.\u001b[39mmultimodal_search(\n\u001b[0;32m----> 2\u001b[0m     \u001b[43mmodel\u001b[49m,\n\u001b[1;32m      3\u001b[0m     vis_processors,\n\u001b[1;32m      4\u001b[0m     txt_processors,\n\u001b[1;32m      5\u001b[0m     model_type,\n\u001b[1;32m      6\u001b[0m     image_keys,\n\u001b[1;32m      7\u001b[0m     features_image_stacked,\n\u001b[1;32m      8\u001b[0m     search_query3,\n\u001b[1;32m      9\u001b[0m     filter_number_of_images\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m20\u001b[39m,\n\u001b[1;32m     10\u001b[0m )\n",
 | |
|       "\u001b[0;31mNameError\u001b[0m: name 'model' is not defined"
 | |
|      ]
 | |
|     }
 | |
|    ],
 | |
|    "source": [
 | |
|     "similarity, sorted_lists = my_obj.multimodal_search(\n",
 | |
|     "    model,\n",
 | |
|     "    vis_processors,\n",
 | |
|     "    txt_processors,\n",
 | |
|     "    model_type,\n",
 | |
|     "    image_keys,\n",
 | |
|     "    features_image_stacked,\n",
 | |
|     "    search_query3,\n",
 | |
|     "    filter_number_of_images=20,\n",
 | |
|     ")"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 15,
 | |
|    "id": "65210ca2-b674-44bd-807a-4165e14bad74",
 | |
|    "metadata": {
 | |
|     "execution": {
 | |
|      "iopub.execute_input": "2023-09-04T04:51:24.146031Z",
 | |
|      "iopub.status.busy": "2023-09-04T04:51:24.145313Z",
 | |
|      "iopub.status.idle": "2023-09-04T04:51:24.189634Z",
 | |
|      "shell.execute_reply": "2023-09-04T04:51:24.188769Z"
 | |
|     }
 | |
|    },
 | |
|    "outputs": [
 | |
|     {
 | |
|      "ename": "NameError",
 | |
|      "evalue": "name 'similarity' is not defined",
 | |
|      "output_type": "error",
 | |
|      "traceback": [
 | |
|       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
 | |
|       "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
 | |
|       "Cell \u001b[0;32mIn[15], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43msimilarity\u001b[49m\n",
 | |
|       "\u001b[0;31mNameError\u001b[0m: name 'similarity' is not defined"
 | |
|      ]
 | |
|     }
 | |
|    ],
 | |
|    "source": [
 | |
|     "similarity"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 16,
 | |
|    "id": "557473df-e2b9-4ef0-9439-3daadf6741ac",
 | |
|    "metadata": {
 | |
|     "execution": {
 | |
|      "iopub.execute_input": "2023-09-04T04:51:24.193858Z",
 | |
|      "iopub.status.busy": "2023-09-04T04:51:24.193287Z",
 | |
|      "iopub.status.idle": "2023-09-04T04:51:24.239406Z",
 | |
|      "shell.execute_reply": "2023-09-04T04:51:24.238605Z"
 | |
|     }
 | |
|    },
 | |
|    "outputs": [
 | |
|     {
 | |
|      "ename": "NameError",
 | |
|      "evalue": "name 'sorted_lists' is not defined",
 | |
|      "output_type": "error",
 | |
|      "traceback": [
 | |
|       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
 | |
|       "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
 | |
|       "Cell \u001b[0;32mIn[16], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43msorted_lists\u001b[49m\n",
 | |
|       "\u001b[0;31mNameError\u001b[0m: name 'sorted_lists' is not defined"
 | |
|      ]
 | |
|     }
 | |
|    ],
 | |
|    "source": [
 | |
|     "sorted_lists"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 17,
 | |
|    "id": "c93d7e88-594d-4095-b5f2-7bf01210dc61",
 | |
|    "metadata": {
 | |
|     "execution": {
 | |
|      "iopub.execute_input": "2023-09-04T04:51:24.244231Z",
 | |
|      "iopub.status.busy": "2023-09-04T04:51:24.243661Z",
 | |
|      "iopub.status.idle": "2023-09-04T04:51:24.252026Z",
 | |
|      "shell.execute_reply": "2023-09-04T04:51:24.250954Z"
 | |
|     }
 | |
|    },
 | |
|    "outputs": [
 | |
|     {
 | |
|      "data": {
 | |
|       "text/plain": [
 | |
|        "{'102141_2_eng': {'filename': '102141_2_eng'},\n",
 | |
|        " '106349S_por': {'filename': '106349S_por'},\n",
 | |
|        " '102730_eng': {'filename': '102730_eng'}}"
 | |
|       ]
 | |
|      },
 | |
|      "execution_count": 17,
 | |
|      "metadata": {},
 | |
|      "output_type": "execute_result"
 | |
|     }
 | |
|    ],
 | |
|    "source": [
 | |
|     "mydict"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "attachments": {},
 | |
|    "cell_type": "markdown",
 | |
|    "id": "e1cf7e46-0c2c-4fb2-b89a-ef585ccb9339",
 | |
|    "metadata": {},
 | |
|    "source": [
 | |
|     "After launching `multimodal_search` function, the results of each query will be added to the source dictionary.  "
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 18,
 | |
|    "id": "9ad74b21-6187-4a58-9ed8-fd3e80f5a4ed",
 | |
|    "metadata": {
 | |
|     "execution": {
 | |
|      "iopub.execute_input": "2023-09-04T04:51:24.258373Z",
 | |
|      "iopub.status.busy": "2023-09-04T04:51:24.258098Z",
 | |
|      "iopub.status.idle": "2023-09-04T04:51:24.268973Z",
 | |
|      "shell.execute_reply": "2023-09-04T04:51:24.268013Z"
 | |
|     },
 | |
|     "tags": []
 | |
|    },
 | |
|    "outputs": [
 | |
|     {
 | |
|      "data": {
 | |
|       "text/plain": [
 | |
|        "{'filename': '106349S_por'}"
 | |
|       ]
 | |
|      },
 | |
|      "execution_count": 18,
 | |
|      "metadata": {},
 | |
|      "output_type": "execute_result"
 | |
|     }
 | |
|    ],
 | |
|    "source": [
 | |
|     "mydict[\"106349S_por\"]"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "attachments": {},
 | |
|    "cell_type": "markdown",
 | |
|    "id": "cd3ee120-8561-482b-a76a-e8f996783325",
 | |
|    "metadata": {},
 | |
|    "source": [
 | |
|     "A special function was written to present the search results conveniently. "
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 19,
 | |
|    "id": "4324e4fd-e9aa-4933-bb12-074d54e0c510",
 | |
|    "metadata": {
 | |
|     "execution": {
 | |
|      "iopub.execute_input": "2023-09-04T04:51:24.272389Z",
 | |
|      "iopub.status.busy": "2023-09-04T04:51:24.271964Z",
 | |
|      "iopub.status.idle": "2023-09-04T04:51:24.381493Z",
 | |
|      "shell.execute_reply": "2023-09-04T04:51:24.380564Z"
 | |
|     },
 | |
|     "tags": []
 | |
|    },
 | |
|    "outputs": [
 | |
|     {
 | |
|      "data": {
 | |
|       "text/plain": [
 | |
|        "'Your search query: politician press conference'"
 | |
|       ]
 | |
|      },
 | |
|      "metadata": {},
 | |
|      "output_type": "display_data"
 | |
|     },
 | |
|     {
 | |
|      "data": {
 | |
|       "text/plain": [
 | |
|        "'--------------------------------------------------'"
 | |
|       ]
 | |
|      },
 | |
|      "metadata": {},
 | |
|      "output_type": "display_data"
 | |
|     },
 | |
|     {
 | |
|      "data": {
 | |
|       "text/plain": [
 | |
|        "'Results:'"
 | |
|       ]
 | |
|      },
 | |
|      "metadata": {},
 | |
|      "output_type": "display_data"
 | |
|     },
 | |
|     {
 | |
|      "ename": "KeyError",
 | |
|      "evalue": "'politician press conference'",
 | |
|      "output_type": "error",
 | |
|      "traceback": [
 | |
|       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
 | |
|       "\u001b[0;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
 | |
|       "Cell \u001b[0;32mIn[19], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mmy_obj\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mshow_results\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m      2\u001b[0m \u001b[43m    \u001b[49m\u001b[43msearch_query3\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m      3\u001b[0m \u001b[43m)\u001b[49m\n",
 | |
|       "File \u001b[0;32m~/work/AMMICO/AMMICO/ammico/multimodal_search.py:970\u001b[0m, in \u001b[0;36mMultimodalSearch.show_results\u001b[0;34m(self, query, itm, image_gradcam_with_itm)\u001b[0m\n\u001b[1;32m    967\u001b[0m     current_querry_val \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m(query\u001b[38;5;241m.\u001b[39mvalues())[\u001b[38;5;241m0\u001b[39m]\n\u001b[1;32m    968\u001b[0m     current_querry_rank \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mrank \u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m+\u001b[39m \u001b[38;5;28mlist\u001b[39m(query\u001b[38;5;241m.\u001b[39mvalues())[\u001b[38;5;241m0\u001b[39m]\n\u001b[0;32m--> 970\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m s \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28;43msorted\u001b[39;49m\u001b[43m(\u001b[49m\n\u001b[1;32m    971\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msubdict\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mitems\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkey\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mlambda\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mt\u001b[49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mt\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m[\u001b[49m\u001b[43mcurrent_querry_val\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mreverse\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\n\u001b[1;32m    972\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m:\n\u001b[1;32m    973\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m s[\u001b[38;5;241m1\u001b[39m][current_querry_rank] \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m    974\u001b[0m         \u001b[38;5;28;01mbreak\u001b[39;00m\n",
 | |
|       "File \u001b[0;32m~/work/AMMICO/AMMICO/ammico/multimodal_search.py:971\u001b[0m, in \u001b[0;36mMultimodalSearch.show_results.<locals>.<lambda>\u001b[0;34m(t)\u001b[0m\n\u001b[1;32m    967\u001b[0m     current_querry_val \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m(query\u001b[38;5;241m.\u001b[39mvalues())[\u001b[38;5;241m0\u001b[39m]\n\u001b[1;32m    968\u001b[0m     current_querry_rank \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mrank \u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m+\u001b[39m \u001b[38;5;28mlist\u001b[39m(query\u001b[38;5;241m.\u001b[39mvalues())[\u001b[38;5;241m0\u001b[39m]\n\u001b[1;32m    970\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m s \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28msorted\u001b[39m(\n\u001b[0;32m--> 971\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msubdict\u001b[38;5;241m.\u001b[39mitems(), key\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mlambda\u001b[39;00m t: \u001b[43mt\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m[\u001b[49m\u001b[43mcurrent_querry_val\u001b[49m\u001b[43m]\u001b[49m, reverse\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[1;32m    972\u001b[0m ):\n\u001b[1;32m    973\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m s[\u001b[38;5;241m1\u001b[39m][current_querry_rank] \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m    974\u001b[0m         \u001b[38;5;28;01mbreak\u001b[39;00m\n",
 | |
|       "\u001b[0;31mKeyError\u001b[0m: 'politician press conference'"
 | |
|      ]
 | |
|     }
 | |
|    ],
 | |
|    "source": [
 | |
|     "my_obj.show_results(\n",
 | |
|     "    search_query3[0],\n",
 | |
|     ")"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "attachments": {},
 | |
|    "cell_type": "markdown",
 | |
|    "id": "0b750e9f-fe64-4028-9caf-52d7187462f1",
 | |
|    "metadata": {},
 | |
|    "source": [
 | |
|     "## Improve the search results\n",
 | |
|     "\n",
 | |
|     "For even better results, a slightly different approach has been prepared that can improve search results. It is quite resource-intensive, so it is applied after the main algorithm has found the most relevant images. This approach works only with text queries. Among the parameters you can choose 3 models: `\"blip_base\"`, `\"blip_large\"`, `\"blip2_coco\"`. If you get an `Out of Memory` error, try reducing the batch_size value (minimum = 1), which is the number of images being processed simultaneously. With the parameter `need_grad_cam = True/False` you can enable the calculation of the heat map of each image to be processed. Thus the `image_text_match_reordering` function calculates new similarity values and new ranks for each image. The resulting values are added to the general dictionary."
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 20,
 | |
|    "id": "b3af7b39-6d0d-4da3-9b8f-7dfd3f5779be",
 | |
|    "metadata": {
 | |
|     "execution": {
 | |
|      "iopub.execute_input": "2023-09-04T04:51:24.385581Z",
 | |
|      "iopub.status.busy": "2023-09-04T04:51:24.385076Z",
 | |
|      "iopub.status.idle": "2023-09-04T04:51:24.389324Z",
 | |
|      "shell.execute_reply": "2023-09-04T04:51:24.388428Z"
 | |
|     },
 | |
|     "tags": []
 | |
|    },
 | |
|    "outputs": [],
 | |
|    "source": [
 | |
|     "itm_model = \"blip_base\"\n",
 | |
|     "# itm_model = \"blip_large\"\n",
 | |
|     "# itm_model = \"blip2_coco\""
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 21,
 | |
|    "id": "caf1f4ae-4b37-4954-800e-7120f0419de5",
 | |
|    "metadata": {
 | |
|     "execution": {
 | |
|      "iopub.execute_input": "2023-09-04T04:51:24.393177Z",
 | |
|      "iopub.status.busy": "2023-09-04T04:51:24.392662Z",
 | |
|      "iopub.status.idle": "2023-09-04T04:51:24.436892Z",
 | |
|      "shell.execute_reply": "2023-09-04T04:51:24.435955Z"
 | |
|     },
 | |
|     "tags": []
 | |
|    },
 | |
|    "outputs": [
 | |
|     {
 | |
|      "ename": "NameError",
 | |
|      "evalue": "name 'image_keys' is not defined",
 | |
|      "output_type": "error",
 | |
|      "traceback": [
 | |
|       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
 | |
|       "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
 | |
|       "Cell \u001b[0;32mIn[21], line 4\u001b[0m\n\u001b[1;32m      1\u001b[0m itm_scores, image_gradcam_with_itm \u001b[38;5;241m=\u001b[39m my_obj\u001b[38;5;241m.\u001b[39mimage_text_match_reordering(\n\u001b[1;32m      2\u001b[0m     search_query3,\n\u001b[1;32m      3\u001b[0m     itm_model,\n\u001b[0;32m----> 4\u001b[0m     \u001b[43mimage_keys\u001b[49m,\n\u001b[1;32m      5\u001b[0m     sorted_lists,\n\u001b[1;32m      6\u001b[0m     batch_size\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m,\n\u001b[1;32m      7\u001b[0m     need_grad_cam\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m,\n\u001b[1;32m      8\u001b[0m )\n",
 | |
|       "\u001b[0;31mNameError\u001b[0m: name 'image_keys' is not defined"
 | |
|      ]
 | |
|     }
 | |
|    ],
 | |
|    "source": [
 | |
|     "itm_scores, image_gradcam_with_itm = my_obj.image_text_match_reordering(\n",
 | |
|     "    search_query3,\n",
 | |
|     "    itm_model,\n",
 | |
|     "    image_keys,\n",
 | |
|     "    sorted_lists,\n",
 | |
|     "    batch_size=1,\n",
 | |
|     "    need_grad_cam=True,\n",
 | |
|     ")"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "attachments": {},
 | |
|    "cell_type": "markdown",
 | |
|    "id": "9e98c150-5fab-4251-bce7-0d8fc7b385b9",
 | |
|    "metadata": {},
 | |
|    "source": [
 | |
|     "Then using the same output function you can add the `ITM=True` arguments to output the new image order. You can also add the `image_gradcam_with_itm` argument to output the heat maps of the calculated images. "
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 22,
 | |
|    "id": "6a829b99-5230-463a-8b11-30ffbb67fc3a",
 | |
|    "metadata": {
 | |
|     "execution": {
 | |
|      "iopub.execute_input": "2023-09-04T04:51:24.441402Z",
 | |
|      "iopub.status.busy": "2023-09-04T04:51:24.440853Z",
 | |
|      "iopub.status.idle": "2023-09-04T04:51:24.485206Z",
 | |
|      "shell.execute_reply": "2023-09-04T04:51:24.484182Z"
 | |
|     },
 | |
|     "tags": []
 | |
|    },
 | |
|    "outputs": [
 | |
|     {
 | |
|      "ename": "NameError",
 | |
|      "evalue": "name 'image_gradcam_with_itm' is not defined",
 | |
|      "output_type": "error",
 | |
|      "traceback": [
 | |
|       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
 | |
|       "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
 | |
|       "Cell \u001b[0;32mIn[22], line 2\u001b[0m\n\u001b[1;32m      1\u001b[0m my_obj\u001b[38;5;241m.\u001b[39mshow_results(\n\u001b[0;32m----> 2\u001b[0m     search_query3[\u001b[38;5;241m0\u001b[39m], itm\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m, image_gradcam_with_itm\u001b[38;5;241m=\u001b[39m\u001b[43mimage_gradcam_with_itm\u001b[49m\n\u001b[1;32m      3\u001b[0m )\n",
 | |
|       "\u001b[0;31mNameError\u001b[0m: name 'image_gradcam_with_itm' is not defined"
 | |
|      ]
 | |
|     }
 | |
|    ],
 | |
|    "source": [
 | |
|     "my_obj.show_results(\n",
 | |
|     "    search_query3[0], itm=True, image_gradcam_with_itm=image_gradcam_with_itm\n",
 | |
|     ")"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "attachments": {},
 | |
|    "cell_type": "markdown",
 | |
|    "id": "d86ab96b-1907-4b7f-a78e-3983b516d781",
 | |
|    "metadata": {
 | |
|     "tags": []
 | |
|    },
 | |
|    "source": [
 | |
|     "## Save search results to csv"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "attachments": {},
 | |
|    "cell_type": "markdown",
 | |
|    "id": "4bdbc4d4-695d-4751-ab7c-d2d98e2917d7",
 | |
|    "metadata": {
 | |
|     "tags": []
 | |
|    },
 | |
|    "source": [
 | |
|     "Convert the dictionary of dictionarys into a dictionary with lists:"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 23,
 | |
|    "id": "6c6ddd83-bc87-48f2-a8d6-1bd3f4201ff7",
 | |
|    "metadata": {
 | |
|     "execution": {
 | |
|      "iopub.execute_input": "2023-09-04T04:51:24.489469Z",
 | |
|      "iopub.status.busy": "2023-09-04T04:51:24.488734Z",
 | |
|      "iopub.status.idle": "2023-09-04T04:51:24.495000Z",
 | |
|      "shell.execute_reply": "2023-09-04T04:51:24.494279Z"
 | |
|     },
 | |
|     "tags": []
 | |
|    },
 | |
|    "outputs": [],
 | |
|    "source": [
 | |
|     "outdict = mutils.append_data_to_dict(mydict)\n",
 | |
|     "df = mutils.dump_df(outdict)"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "attachments": {},
 | |
|    "cell_type": "markdown",
 | |
|    "id": "ea2675d5-604c-45e7-86d2-080b1f4559a0",
 | |
|    "metadata": {
 | |
|     "tags": []
 | |
|    },
 | |
|    "source": [
 | |
|     "Check the dataframe:"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 24,
 | |
|    "id": "e78646d6-80be-4d3e-8123-3360957bcaa8",
 | |
|    "metadata": {
 | |
|     "execution": {
 | |
|      "iopub.execute_input": "2023-09-04T04:51:24.498638Z",
 | |
|      "iopub.status.busy": "2023-09-04T04:51:24.497955Z",
 | |
|      "iopub.status.idle": "2023-09-04T04:51:24.513508Z",
 | |
|      "shell.execute_reply": "2023-09-04T04:51:24.512731Z"
 | |
|     },
 | |
|     "tags": []
 | |
|    },
 | |
|    "outputs": [
 | |
|     {
 | |
|      "data": {
 | |
|       "text/html": [
 | |
|        "<div>\n",
 | |
|        "<style scoped>\n",
 | |
|        "    .dataframe tbody tr th:only-of-type {\n",
 | |
|        "        vertical-align: middle;\n",
 | |
|        "    }\n",
 | |
|        "\n",
 | |
|        "    .dataframe tbody tr th {\n",
 | |
|        "        vertical-align: top;\n",
 | |
|        "    }\n",
 | |
|        "\n",
 | |
|        "    .dataframe thead th {\n",
 | |
|        "        text-align: right;\n",
 | |
|        "    }\n",
 | |
|        "</style>\n",
 | |
|        "<table border=\"1\" class=\"dataframe\">\n",
 | |
|        "  <thead>\n",
 | |
|        "    <tr style=\"text-align: right;\">\n",
 | |
|        "      <th></th>\n",
 | |
|        "      <th>filename</th>\n",
 | |
|        "    </tr>\n",
 | |
|        "  </thead>\n",
 | |
|        "  <tbody>\n",
 | |
|        "    <tr>\n",
 | |
|        "      <th>0</th>\n",
 | |
|        "      <td>102141_2_eng</td>\n",
 | |
|        "    </tr>\n",
 | |
|        "    <tr>\n",
 | |
|        "      <th>1</th>\n",
 | |
|        "      <td>106349S_por</td>\n",
 | |
|        "    </tr>\n",
 | |
|        "    <tr>\n",
 | |
|        "      <th>2</th>\n",
 | |
|        "      <td>102730_eng</td>\n",
 | |
|        "    </tr>\n",
 | |
|        "  </tbody>\n",
 | |
|        "</table>\n",
 | |
|        "</div>"
 | |
|       ],
 | |
|       "text/plain": [
 | |
|        "       filename\n",
 | |
|        "0  102141_2_eng\n",
 | |
|        "1   106349S_por\n",
 | |
|        "2    102730_eng"
 | |
|       ]
 | |
|      },
 | |
|      "execution_count": 24,
 | |
|      "metadata": {},
 | |
|      "output_type": "execute_result"
 | |
|     }
 | |
|    ],
 | |
|    "source": [
 | |
|     "df.head(10)"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "attachments": {},
 | |
|    "cell_type": "markdown",
 | |
|    "id": "05546d99-afab-4565-8f30-f14e1426abcf",
 | |
|    "metadata": {},
 | |
|    "source": [
 | |
|     "Write the csv file:"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 25,
 | |
|    "id": "185f7dde-20dc-44d8-9ab0-de41f9b5734d",
 | |
|    "metadata": {
 | |
|     "execution": {
 | |
|      "iopub.execute_input": "2023-09-04T04:51:24.517172Z",
 | |
|      "iopub.status.busy": "2023-09-04T04:51:24.516616Z",
 | |
|      "iopub.status.idle": "2023-09-04T04:51:24.523471Z",
 | |
|      "shell.execute_reply": "2023-09-04T04:51:24.522769Z"
 | |
|     },
 | |
|     "tags": []
 | |
|    },
 | |
|    "outputs": [],
 | |
|    "source": [
 | |
|     "df.to_csv(\"data/data_out.csv\")"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": null,
 | |
|    "id": "b6a79201-7c17-496c-a6a1-b8ecfd3dd1e8",
 | |
|    "metadata": {},
 | |
|    "outputs": [],
 | |
|    "source": []
 | |
|   }
 | |
|  ],
 | |
|  "metadata": {
 | |
|   "kernelspec": {
 | |
|    "display_name": "Python 3",
 | |
|    "language": "python",
 | |
|    "name": "python3"
 | |
|   },
 | |
|   "language_info": {
 | |
|    "codemirror_mode": {
 | |
|     "name": "ipython",
 | |
|     "version": 3
 | |
|    },
 | |
|    "file_extension": ".py",
 | |
|    "mimetype": "text/x-python",
 | |
|    "name": "python",
 | |
|    "nbconvert_exporter": "python",
 | |
|    "pygments_lexer": "ipython3",
 | |
|    "version": "3.9.17"
 | |
|   }
 | |
|  },
 | |
|  "nbformat": 4,
 | |
|  "nbformat_minor": 5
 | |
| }
 | 
