зеркало из
https://github.com/ssciwr/AMMICO.git
synced 2025-10-30 13:36:04 +02:00
781 строка
60 KiB
HTML
781 строка
60 KiB
HTML
<!DOCTYPE html>
|
|
<html class="writer-html5" lang="en" >
|
|
<head>
|
|
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.18.1: http://docutils.sourceforge.net/" />
|
|
|
|
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
|
<title>Image Multimodal Search — AMMICO 0.0.1 documentation</title>
|
|
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
|
|
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
|
|
<link rel="stylesheet" href="../_static/nbsphinx-code-cells.css" type="text/css" />
|
|
<!--[if lt IE 9]>
|
|
<script src="../_static/js/html5shiv.min.js"></script>
|
|
<![endif]-->
|
|
|
|
<script src="../_static/jquery.js?v=5d32c60e"></script>
|
|
<script src="../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
|
|
<script src="../_static/documentation_options.js?v=d45e8c67"></script>
|
|
<script src="../_static/doctools.js?v=888ff710"></script>
|
|
<script src="../_static/sphinx_highlight.js?v=dc90522c"></script>
|
|
<script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
|
|
<script>window.MathJax = {"tex": {"inlineMath": [["$", "$"], ["\\(", "\\)"]], "processEscapes": true}, "options": {"ignoreHtmlClass": "tex2jax_ignore|mathjax_ignore|document", "processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
|
|
<script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
|
|
<script src="../_static/js/theme.js"></script>
|
|
<link rel="index" title="Index" href="../genindex.html" />
|
|
<link rel="search" title="Search" href="../search.html" />
|
|
<link rel="next" title="Color analysis of pictures" href="Example%20colors.html" />
|
|
<link rel="prev" title="Image summary and visual question answering" href="Example%20summary.html" />
|
|
</head>
|
|
|
|
<body class="wy-body-for-nav">
|
|
<div class="wy-grid-for-nav">
|
|
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
|
<div class="wy-side-scroll">
|
|
<div class="wy-side-nav-search" >
|
|
|
|
|
|
|
|
<a href="../index.html" class="icon icon-home">
|
|
AMMICO
|
|
</a>
|
|
<div role="search">
|
|
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
|
|
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
|
|
<input type="hidden" name="check_keywords" value="yes" />
|
|
<input type="hidden" name="area" value="default" />
|
|
</form>
|
|
</div>
|
|
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
|
<p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
|
|
<ul class="current">
|
|
<li class="toctree-l1"><a class="reference internal" href="../readme_link.html">AMMICO - AI Media and Misinformation Content Analysis Tool</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="Example%20faces.html">Facial Expression recognition with DeepFace</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="Example%20text.html">Notebook for text extraction on image</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="Example%20summary.html">Image summary and visual question answering</a></li>
|
|
<li class="toctree-l1 current"><a class="current reference internal" href="#">Image Multimodal Search</a><ul>
|
|
<li class="toctree-l2"><a class="reference internal" href="#Indexing-and-extracting-features-from-images-in-selected-folder">Indexing and extracting features from images in selected folder</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="#Formulate-your-search-queries">Formulate your search queries</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="#Improve-the-search-results">Improve the search results</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="#Save-search-results-to-csv">Save search results to csv</a></li>
|
|
</ul>
|
|
</li>
|
|
<li class="toctree-l1"><a class="reference internal" href="Example%20colors.html">Color analysis of pictures</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="Example%20cropposts.html">Crop posts from social media posts images</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../modules.html">AMMICO package modules</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../license_link.html">License</a></li>
|
|
</ul>
|
|
|
|
</div>
|
|
</div>
|
|
</nav>
|
|
|
|
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
|
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
|
<a href="../index.html">AMMICO</a>
|
|
</nav>
|
|
|
|
<div class="wy-nav-content">
|
|
<div class="rst-content">
|
|
<div role="navigation" aria-label="Page navigation">
|
|
<ul class="wy-breadcrumbs">
|
|
<li><a href="../index.html" class="icon icon-home" aria-label="Home"></a></li>
|
|
<li class="breadcrumb-item active">Image Multimodal Search</li>
|
|
<li class="wy-breadcrumbs-aside">
|
|
<a href="../_sources/notebooks/Example multimodal.ipynb.txt" rel="nofollow"> View page source</a>
|
|
</li>
|
|
</ul>
|
|
<hr/>
|
|
</div>
|
|
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
|
<div itemprop="articleBody">
|
|
|
|
<section id="Image-Multimodal-Search">
|
|
<h1>Image Multimodal Search<a class="headerlink" href="#Image-Multimodal-Search" title="Link to this heading"></a></h1>
|
|
<p>This notebooks shows how to carry out an image multimodal search with the <a class="reference external" href="https://github.com/salesforce/LAVIS">LAVIS</a> library.</p>
|
|
<p>The first cell is only run on google colab and installs the <a class="reference external" href="https://github.com/ssciwr/AMMICO">ammico</a> package.</p>
|
|
<p>After that, we can import <code class="docutils literal notranslate"><span class="pre">ammico</span></code> and read in the files given a folder path.</p>
|
|
<div class="nbinput nblast docutils container">
|
|
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[1]:
|
|
</pre></div>
|
|
</div>
|
|
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="c1"># if running on google colab</span>
|
|
<span class="c1"># flake8-noqa-cell</span>
|
|
<span class="kn">import</span> <span class="nn">os</span>
|
|
|
|
<span class="k">if</span> <span class="s2">"google.colab"</span> <span class="ow">in</span> <span class="nb">str</span><span class="p">(</span><span class="n">get_ipython</span><span class="p">()):</span>
|
|
<span class="c1"># update python version</span>
|
|
<span class="c1"># install setuptools</span>
|
|
<span class="c1"># %pip install setuptools==61 -qqq</span>
|
|
<span class="c1"># install ammico</span>
|
|
<span class="o">%</span><span class="k">pip</span> install git+https://github.com/ssciwr/ammico.git -qqq
|
|
<span class="c1"># mount google drive for data and API key</span>
|
|
<span class="kn">from</span> <span class="nn">google.colab</span> <span class="kn">import</span> <span class="n">drive</span>
|
|
|
|
<span class="n">drive</span><span class="o">.</span><span class="n">mount</span><span class="p">(</span><span class="s2">"/content/drive"</span><span class="p">)</span>
|
|
</pre></div>
|
|
</div>
|
|
</div>
|
|
<div class="nbinput nblast docutils container">
|
|
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[2]:
|
|
</pre></div>
|
|
</div>
|
|
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">ammico.utils</span> <span class="k">as</span> <span class="nn">mutils</span>
|
|
<span class="kn">import</span> <span class="nn">ammico.multimodal_search</span> <span class="k">as</span> <span class="nn">ms</span>
|
|
</pre></div>
|
|
</div>
|
|
</div>
|
|
<div class="nbinput nblast docutils container">
|
|
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[3]:
|
|
</pre></div>
|
|
</div>
|
|
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">images</span> <span class="o">=</span> <span class="n">mutils</span><span class="o">.</span><span class="n">find_files</span><span class="p">(</span>
|
|
<span class="n">path</span><span class="o">=</span><span class="s2">"data/"</span><span class="p">,</span>
|
|
<span class="n">limit</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span>
|
|
<span class="p">)</span>
|
|
</pre></div>
|
|
</div>
|
|
</div>
|
|
<div class="nbinput docutils container">
|
|
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[4]:
|
|
</pre></div>
|
|
</div>
|
|
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">images</span>
|
|
</pre></div>
|
|
</div>
|
|
</div>
|
|
<div class="nboutput nblast docutils container">
|
|
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[4]:
|
|
</pre></div>
|
|
</div>
|
|
<div class="output_area docutils container">
|
|
<div class="highlight"><pre>
|
|
{'102141_2_eng': {'filename': 'data/102141_2_eng.png'},
|
|
'102730_eng': {'filename': 'data/102730_eng.png'},
|
|
'106349S_por': {'filename': 'data/106349S_por.png'}}
|
|
</pre></div></div>
|
|
</div>
|
|
<div class="nbinput nblast docutils container">
|
|
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[5]:
|
|
</pre></div>
|
|
</div>
|
|
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">mydict</span> <span class="o">=</span> <span class="n">mutils</span><span class="o">.</span><span class="n">initialize_dict</span><span class="p">(</span><span class="n">images</span><span class="p">)</span>
|
|
</pre></div>
|
|
</div>
|
|
</div>
|
|
<div class="nbinput docutils container">
|
|
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[6]:
|
|
</pre></div>
|
|
</div>
|
|
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">mydict</span>
|
|
</pre></div>
|
|
</div>
|
|
</div>
|
|
<div class="nboutput nblast docutils container">
|
|
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[6]:
|
|
</pre></div>
|
|
</div>
|
|
<div class="output_area docutils container">
|
|
<div class="highlight"><pre>
|
|
{'102141_2_eng': {'filename': '102141_2_eng'},
|
|
'102730_eng': {'filename': '102730_eng'},
|
|
'106349S_por': {'filename': '106349S_por'}}
|
|
</pre></div></div>
|
|
</div>
|
|
<section id="Indexing-and-extracting-features-from-images-in-selected-folder">
|
|
<h2>Indexing and extracting features from images in selected folder<a class="headerlink" href="#Indexing-and-extracting-features-from-images-in-selected-folder" title="Link to this heading"></a></h2>
|
|
<p>First you need to select a model. You can choose one of the following models: - <a class="reference external" href="https://github.com/salesforce/BLIP">blip</a> - <a class="reference external" href="https://huggingface.co/docs/transformers/main/model_doc/blip-2">blip2</a> - <a class="reference external" href="https://github.com/salesforce/ALBEF">albef</a> - <a class="reference external" href="https://github.com/openai/CLIP/blob/main/model-card.md">clip_base</a> - <a class="reference external" href="https://github.com/mlfoundations/open_clip">clip_vitl14</a> - <a class="reference external" href="https://github.com/mlfoundations/open_clip">clip_vitl14_336</a></p>
|
|
<div class="nbinput nblast docutils container">
|
|
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[7]:
|
|
</pre></div>
|
|
</div>
|
|
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">model_type</span> <span class="o">=</span> <span class="s2">"blip"</span>
|
|
<span class="c1"># model_type = "blip2"</span>
|
|
<span class="c1"># model_type = "albef"</span>
|
|
<span class="c1"># model_type = "clip_base"</span>
|
|
<span class="c1"># model_type = "clip_vitl14"</span>
|
|
<span class="c1"># model_type = "clip_vitl14_336"</span>
|
|
</pre></div>
|
|
</div>
|
|
</div>
|
|
<p>To process the loaded images using the selected model, use the below code:</p>
|
|
<div class="nbinput nblast docutils container">
|
|
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[8]:
|
|
</pre></div>
|
|
</div>
|
|
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">my_obj</span> <span class="o">=</span> <span class="n">ms</span><span class="o">.</span><span class="n">MultimodalSearch</span><span class="p">(</span><span class="n">mydict</span><span class="p">)</span>
|
|
</pre></div>
|
|
</div>
|
|
</div>
|
|
<div class="nbinput docutils container">
|
|
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[9]:
|
|
</pre></div>
|
|
</div>
|
|
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">my_obj</span><span class="o">.</span><span class="n">subdict</span>
|
|
</pre></div>
|
|
</div>
|
|
</div>
|
|
<div class="nboutput nblast docutils container">
|
|
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[9]:
|
|
</pre></div>
|
|
</div>
|
|
<div class="output_area docutils container">
|
|
<div class="highlight"><pre>
|
|
{'102141_2_eng': {'filename': '102141_2_eng'},
|
|
'102730_eng': {'filename': '102730_eng'},
|
|
'106349S_por': {'filename': '106349S_por'}}
|
|
</pre></div></div>
|
|
</div>
|
|
<div class="nbinput docutils container">
|
|
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[10]:
|
|
</pre></div>
|
|
</div>
|
|
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="p">(</span>
|
|
<span class="n">model</span><span class="p">,</span>
|
|
<span class="n">vis_processors</span><span class="p">,</span>
|
|
<span class="n">txt_processors</span><span class="p">,</span>
|
|
<span class="n">image_keys</span><span class="p">,</span>
|
|
<span class="n">image_names</span><span class="p">,</span>
|
|
<span class="n">features_image_stacked</span><span class="p">,</span>
|
|
<span class="p">)</span> <span class="o">=</span> <span class="n">my_obj</span><span class="o">.</span><span class="n">parsing_images</span><span class="p">(</span>
|
|
<span class="n">model_type</span><span class="p">,</span>
|
|
<span class="n">path_to_save_tensors</span><span class="o">=</span><span class="s2">"data/"</span><span class="p">,</span>
|
|
<span class="p">)</span>
|
|
</pre></div>
|
|
</div>
|
|
</div>
|
|
<div class="nboutput docutils container">
|
|
<div class="prompt empty docutils container">
|
|
</div>
|
|
<div class="output_area stderr docutils container">
|
|
<div class="highlight"><pre>
|
|
(…)bert-base-uncased/resolve/main/vocab.txt: 100%|██████████| 232k/232k [00:00<00:00, 3.22MB/s]
|
|
(…)cased/resolve/main/tokenizer_config.json: 100%|██████████| 28.0/28.0 [00:00<00:00, 7.39kB/s]
|
|
(…)rt-base-uncased/resolve/main/config.json: 100%|██████████| 570/570 [00:00<00:00, 311kB/s]
|
|
100%|██████████| 1.97G/1.97G [00:10<00:00, 196MB/s]
|
|
</pre></div></div>
|
|
</div>
|
|
<div class="nboutput nblast docutils container">
|
|
<div class="prompt empty docutils container">
|
|
</div>
|
|
<div class="output_area docutils container">
|
|
<div class="highlight"><pre>
|
|
<span class="ansi-red-fg">---------------------------------------------------------------------------</span>
|
|
<span class="ansi-red-fg">FileNotFoundError</span> Traceback (most recent call last)
|
|
Cell <span class="ansi-green-fg">In[10], line 8</span>
|
|
<span class="ansi-green-intense-fg ansi-bold"> 1</span> (
|
|
<span class="ansi-green-intense-fg ansi-bold"> 2</span> model,
|
|
<span class="ansi-green-intense-fg ansi-bold"> 3</span> vis_processors,
|
|
<span class="ansi-green-intense-fg ansi-bold"> 4</span> txt_processors,
|
|
<span class="ansi-green-intense-fg ansi-bold"> 5</span> image_keys,
|
|
<span class="ansi-green-intense-fg ansi-bold"> 6</span> image_names,
|
|
<span class="ansi-green-intense-fg ansi-bold"> 7</span> features_image_stacked,
|
|
<span class="ansi-green-fg">----> 8</span> ) <span style="color: rgb(98,98,98)">=</span> <span class="ansi-yellow-bg">my_obj</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">.</span><span class="ansi-yellow-bg">parsing_images</span><span class="ansi-yellow-bg">(</span>
|
|
<span class="ansi-green-intense-fg ansi-bold"> 9</span> <span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">model_type</span><span class="ansi-yellow-bg">,</span><span class="ansi-yellow-bg"> </span>
|
|
<span class="ansi-green-intense-fg ansi-bold"> 10</span> <span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">path_to_save_tensors</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">=</span><span class="ansi-yellow-bg" style="color: rgb(175,0,0)">"</span><span class="ansi-yellow-bg" style="color: rgb(175,0,0)">data/</span><span class="ansi-yellow-bg" style="color: rgb(175,0,0)">"</span><span class="ansi-yellow-bg">,</span>
|
|
<span class="ansi-green-intense-fg ansi-bold"> 11</span> <span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">)</span>
|
|
|
|
File <span class="ansi-green-fg">~/work/AMMICO/AMMICO/ammico/multimodal_search.py:371</span>, in <span class="ansi-cyan-fg">MultimodalSearch.parsing_images</span><span class="ansi-blue-fg">(self, model_type, path_to_save_tensors, path_to_load_tensors)</span>
|
|
<span class="ansi-green-intense-fg ansi-bold"> 366</span> <span class="ansi-bold" style="color: rgb(0,135,0)">else</span>:
|
|
<span class="ansi-green-intense-fg ansi-bold"> 367</span> <span class="ansi-bold" style="color: rgb(0,135,0)">raise</span> <span class="ansi-bold" style="color: rgb(215,95,95)">SyntaxError</span>(
|
|
<span class="ansi-green-intense-fg ansi-bold"> 368</span> <span style="color: rgb(175,0,0)">"</span><span style="color: rgb(175,0,0)">Please, use one of the following models: blip2, blip, albef, clip_base, clip_vitl14, clip_vitl14_336</span><span style="color: rgb(175,0,0)">"</span>
|
|
<span class="ansi-green-intense-fg ansi-bold"> 369</span> )
|
|
<span class="ansi-green-fg">--> 371</span> _, images_tensors <span style="color: rgb(98,98,98)">=</span> <span class="ansi-yellow-bg">MultimodalSearch</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">.</span><span class="ansi-yellow-bg">read_and_process_images</span><span class="ansi-yellow-bg">(</span>
|
|
<span class="ansi-green-intense-fg ansi-bold"> 372</span> <span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg" style="color: rgb(0,135,0)">self</span><span class="ansi-yellow-bg">,</span><span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">image_names</span><span class="ansi-yellow-bg">,</span><span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">vis_processors</span>
|
|
<span class="ansi-green-intense-fg ansi-bold"> 373</span> <span class="ansi-yellow-bg">)</span>
|
|
<span class="ansi-green-intense-fg ansi-bold"> 374</span> <span class="ansi-bold" style="color: rgb(0,135,0)">if</span> path_to_load_tensors <span class="ansi-bold" style="color: rgb(175,0,255)">is</span> <span class="ansi-bold" style="color: rgb(0,135,0)">None</span>:
|
|
<span class="ansi-green-intense-fg ansi-bold"> 375</span> <span class="ansi-bold" style="color: rgb(0,135,0)">with</span> torch<span style="color: rgb(98,98,98)">.</span>no_grad():
|
|
|
|
File <span class="ansi-green-fg">~/work/AMMICO/AMMICO/ammico/multimodal_search.py:168</span>, in <span class="ansi-cyan-fg">MultimodalSearch.read_and_process_images</span><span class="ansi-blue-fg">(self, image_paths, vis_processor)</span>
|
|
<span class="ansi-green-intense-fg ansi-bold"> 156</span> <span class="ansi-bold" style="color: rgb(0,135,0)">def</span> <span style="color: rgb(0,0,255)">read_and_process_images</span>(<span style="color: rgb(0,135,0)">self</span>, image_paths: <span style="color: rgb(0,135,0)">list</span>, vis_processor) <span style="color: rgb(98,98,98)">-</span><span style="color: rgb(98,98,98)">></span> <span style="color: rgb(0,135,0)">tuple</span>:
|
|
<span class="ansi-green-intense-fg ansi-bold"> 157</span> <span style="color: rgb(188,188,188)"> </span><span style="color: rgb(175,0,0)">"""</span>
|
|
<span class="ansi-green-intense-fg ansi-bold"> 158</span> <span style="color: rgb(175,0,0)"> Read and process images with vis_processor.</span>
|
|
<span class="ansi-green-intense-fg ansi-bold"> 159</span>
|
|
<span class="ansi-green-fg"> (...)</span>
|
|
<span class="ansi-green-intense-fg ansi-bold"> 166</span> <span style="color: rgb(175,0,0)"> images_tensors (torch.Tensor): tensors of images stacked in device.</span>
|
|
<span class="ansi-green-intense-fg ansi-bold"> 167</span> <span style="color: rgb(175,0,0)"> """</span>
|
|
<span class="ansi-green-fg">--> 168</span> raw_images <span style="color: rgb(98,98,98)">=</span> [MultimodalSearch<span style="color: rgb(98,98,98)">.</span>read_img(<span style="color: rgb(0,135,0)">self</span>, path) <span class="ansi-bold" style="color: rgb(0,135,0)">for</span> path <span class="ansi-bold" style="color: rgb(175,0,255)">in</span> image_paths]
|
|
<span class="ansi-green-intense-fg ansi-bold"> 169</span> images <span style="color: rgb(98,98,98)">=</span> [
|
|
<span class="ansi-green-intense-fg ansi-bold"> 170</span> vis_processor[<span style="color: rgb(175,0,0)">"</span><span style="color: rgb(175,0,0)">eval</span><span style="color: rgb(175,0,0)">"</span>](r_img)
|
|
<span class="ansi-green-intense-fg ansi-bold"> 171</span> <span style="color: rgb(98,98,98)">.</span>unsqueeze(<span style="color: rgb(98,98,98)">0</span>)
|
|
<span class="ansi-green-intense-fg ansi-bold"> 172</span> <span style="color: rgb(98,98,98)">.</span>to(MultimodalSearch<span style="color: rgb(98,98,98)">.</span>multimodal_device)
|
|
<span class="ansi-green-intense-fg ansi-bold"> 173</span> <span class="ansi-bold" style="color: rgb(0,135,0)">for</span> r_img <span class="ansi-bold" style="color: rgb(175,0,255)">in</span> raw_images
|
|
<span class="ansi-green-intense-fg ansi-bold"> 174</span> ]
|
|
<span class="ansi-green-intense-fg ansi-bold"> 175</span> images_tensors <span style="color: rgb(98,98,98)">=</span> torch<span style="color: rgb(98,98,98)">.</span>stack(images)
|
|
|
|
File <span class="ansi-green-fg">~/work/AMMICO/AMMICO/ammico/multimodal_search.py:168</span>, in <span class="ansi-cyan-fg"><listcomp></span><span class="ansi-blue-fg">(.0)</span>
|
|
<span class="ansi-green-intense-fg ansi-bold"> 156</span> <span class="ansi-bold" style="color: rgb(0,135,0)">def</span> <span style="color: rgb(0,0,255)">read_and_process_images</span>(<span style="color: rgb(0,135,0)">self</span>, image_paths: <span style="color: rgb(0,135,0)">list</span>, vis_processor) <span style="color: rgb(98,98,98)">-</span><span style="color: rgb(98,98,98)">></span> <span style="color: rgb(0,135,0)">tuple</span>:
|
|
<span class="ansi-green-intense-fg ansi-bold"> 157</span> <span style="color: rgb(188,188,188)"> </span><span style="color: rgb(175,0,0)">"""</span>
|
|
<span class="ansi-green-intense-fg ansi-bold"> 158</span> <span style="color: rgb(175,0,0)"> Read and process images with vis_processor.</span>
|
|
<span class="ansi-green-intense-fg ansi-bold"> 159</span>
|
|
<span class="ansi-green-fg"> (...)</span>
|
|
<span class="ansi-green-intense-fg ansi-bold"> 166</span> <span style="color: rgb(175,0,0)"> images_tensors (torch.Tensor): tensors of images stacked in device.</span>
|
|
<span class="ansi-green-intense-fg ansi-bold"> 167</span> <span style="color: rgb(175,0,0)"> """</span>
|
|
<span class="ansi-green-fg">--> 168</span> raw_images <span style="color: rgb(98,98,98)">=</span> [<span class="ansi-yellow-bg">MultimodalSearch</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">.</span><span class="ansi-yellow-bg">read_img</span><span class="ansi-yellow-bg">(</span><span class="ansi-yellow-bg" style="color: rgb(0,135,0)">self</span><span class="ansi-yellow-bg">,</span><span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">path</span><span class="ansi-yellow-bg">)</span> <span class="ansi-bold" style="color: rgb(0,135,0)">for</span> path <span class="ansi-bold" style="color: rgb(175,0,255)">in</span> image_paths]
|
|
<span class="ansi-green-intense-fg ansi-bold"> 169</span> images <span style="color: rgb(98,98,98)">=</span> [
|
|
<span class="ansi-green-intense-fg ansi-bold"> 170</span> vis_processor[<span style="color: rgb(175,0,0)">"</span><span style="color: rgb(175,0,0)">eval</span><span style="color: rgb(175,0,0)">"</span>](r_img)
|
|
<span class="ansi-green-intense-fg ansi-bold"> 171</span> <span style="color: rgb(98,98,98)">.</span>unsqueeze(<span style="color: rgb(98,98,98)">0</span>)
|
|
<span class="ansi-green-intense-fg ansi-bold"> 172</span> <span style="color: rgb(98,98,98)">.</span>to(MultimodalSearch<span style="color: rgb(98,98,98)">.</span>multimodal_device)
|
|
<span class="ansi-green-intense-fg ansi-bold"> 173</span> <span class="ansi-bold" style="color: rgb(0,135,0)">for</span> r_img <span class="ansi-bold" style="color: rgb(175,0,255)">in</span> raw_images
|
|
<span class="ansi-green-intense-fg ansi-bold"> 174</span> ]
|
|
<span class="ansi-green-intense-fg ansi-bold"> 175</span> images_tensors <span style="color: rgb(98,98,98)">=</span> torch<span style="color: rgb(98,98,98)">.</span>stack(images)
|
|
|
|
File <span class="ansi-green-fg">~/work/AMMICO/AMMICO/ammico/multimodal_search.py:153</span>, in <span class="ansi-cyan-fg">MultimodalSearch.read_img</span><span class="ansi-blue-fg">(self, filepath)</span>
|
|
<span class="ansi-green-intense-fg ansi-bold"> 143</span> <span class="ansi-bold" style="color: rgb(0,135,0)">def</span> <span style="color: rgb(0,0,255)">read_img</span>(<span style="color: rgb(0,135,0)">self</span>, filepath: <span style="color: rgb(0,135,0)">str</span>) <span style="color: rgb(98,98,98)">-</span><span style="color: rgb(98,98,98)">></span> Image:
|
|
<span class="ansi-green-intense-fg ansi-bold"> 144</span> <span style="color: rgb(188,188,188)"> </span><span style="color: rgb(175,0,0)">"""</span>
|
|
<span class="ansi-green-intense-fg ansi-bold"> 145</span> <span style="color: rgb(175,0,0)"> Load Image from filepath.</span>
|
|
<span class="ansi-green-intense-fg ansi-bold"> 146</span>
|
|
<span class="ansi-green-fg"> (...)</span>
|
|
<span class="ansi-green-intense-fg ansi-bold"> 151</span> <span style="color: rgb(175,0,0)"> raw_image (PIL.Image): image.</span>
|
|
<span class="ansi-green-intense-fg ansi-bold"> 152</span> <span style="color: rgb(175,0,0)"> """</span>
|
|
<span class="ansi-green-fg">--> 153</span> raw_image <span style="color: rgb(98,98,98)">=</span> <span class="ansi-yellow-bg">Image</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">.</span><span class="ansi-yellow-bg">open</span><span class="ansi-yellow-bg">(</span><span class="ansi-yellow-bg">filepath</span><span class="ansi-yellow-bg">)</span><span style="color: rgb(98,98,98)">.</span>convert(<span style="color: rgb(175,0,0)">"</span><span style="color: rgb(175,0,0)">RGB</span><span style="color: rgb(175,0,0)">"</span>)
|
|
<span class="ansi-green-intense-fg ansi-bold"> 154</span> <span class="ansi-bold" style="color: rgb(0,135,0)">return</span> raw_image
|
|
|
|
File <span class="ansi-green-fg">/opt/hostedtoolcache/Python/3.9.18/x64/lib/python3.9/site-packages/PIL/Image.py:3243</span>, in <span class="ansi-cyan-fg">open</span><span class="ansi-blue-fg">(fp, mode, formats)</span>
|
|
<span class="ansi-green-intense-fg ansi-bold"> 3240</span> filename <span style="color: rgb(98,98,98)">=</span> fp
|
|
<span class="ansi-green-intense-fg ansi-bold"> 3242</span> <span class="ansi-bold" style="color: rgb(0,135,0)">if</span> filename:
|
|
<span class="ansi-green-fg">-> 3243</span> fp <span style="color: rgb(98,98,98)">=</span> <span class="ansi-yellow-bg">builtins</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">.</span><span class="ansi-yellow-bg">open</span><span class="ansi-yellow-bg">(</span><span class="ansi-yellow-bg">filename</span><span class="ansi-yellow-bg">,</span><span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg" style="color: rgb(175,0,0)">"</span><span class="ansi-yellow-bg" style="color: rgb(175,0,0)">rb</span><span class="ansi-yellow-bg" style="color: rgb(175,0,0)">"</span><span class="ansi-yellow-bg">)</span>
|
|
<span class="ansi-green-intense-fg ansi-bold"> 3244</span> exclusive_fp <span style="color: rgb(98,98,98)">=</span> <span class="ansi-bold" style="color: rgb(0,135,0)">True</span>
|
|
<span class="ansi-green-intense-fg ansi-bold"> 3246</span> <span class="ansi-bold" style="color: rgb(0,135,0)">try</span>:
|
|
|
|
<span class="ansi-red-fg">FileNotFoundError</span>: [Errno 2] No such file or directory: '102141_2_eng'
|
|
</pre></div></div>
|
|
</div>
|
|
<div class="nbinput docutils container">
|
|
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[11]:
|
|
</pre></div>
|
|
</div>
|
|
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">features_image_stacked</span>
|
|
</pre></div>
|
|
</div>
|
|
</div>
|
|
<div class="nboutput nblast docutils container">
|
|
<div class="prompt empty docutils container">
|
|
</div>
|
|
<div class="output_area docutils container">
|
|
<div class="highlight"><pre>
|
|
<span class="ansi-red-fg">---------------------------------------------------------------------------</span>
|
|
<span class="ansi-red-fg">NameError</span> Traceback (most recent call last)
|
|
Cell <span class="ansi-green-fg">In[11], line 1</span>
|
|
<span class="ansi-green-fg">----> 1</span> <span class="ansi-yellow-bg">features_image_stacked</span>
|
|
|
|
<span class="ansi-red-fg">NameError</span>: name 'features_image_stacked' is not defined
|
|
</pre></div></div>
|
|
</div>
|
|
<p>The images are then processed and stored in a numerical representation, a tensor. These tensors do not change for the same image and same model - so if you run this analysis once, and save the tensors giving a path with the keyword <code class="docutils literal notranslate"><span class="pre">path_to_save_tensors</span></code>, a file with filename <code class="docutils literal notranslate"><span class="pre">.<Number_of_images>_<model_name>_saved_features_image.pt</span></code> will be placed there.</p>
|
|
<p>This will save you a lot of time if you want to analyse same images with the same model but different questions. To run using the saved tensors, execute the below code giving the path and name of the tensor file.</p>
|
|
<div class="nbinput nblast docutils container">
|
|
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[12]:
|
|
</pre></div>
|
|
</div>
|
|
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="c1"># (</span>
|
|
<span class="c1"># model,</span>
|
|
<span class="c1"># vis_processors,</span>
|
|
<span class="c1"># txt_processors,</span>
|
|
<span class="c1"># image_keys,</span>
|
|
<span class="c1"># image_names,</span>
|
|
<span class="c1"># features_image_stacked,</span>
|
|
<span class="c1"># ) = my_obj.parsing_images(</span>
|
|
<span class="c1"># model_type,</span>
|
|
<span class="c1"># path_to_load_tensors="/content/drive/MyDrive/misinformation-data/5_clip_base_saved_features_image.pt",</span>
|
|
<span class="c1"># )</span>
|
|
</pre></div>
|
|
</div>
|
|
</div>
|
|
<p>Here we already processed our image folder with 5 images and the <code class="docutils literal notranslate"><span class="pre">clip_base</span></code> model. So you need just to write the name <code class="docutils literal notranslate"><span class="pre">5_clip_base_saved_features_image.pt</span></code> of the saved file that consists of tensors of all images as keyword argument for <code class="docutils literal notranslate"><span class="pre">path_to_load_tensors</span></code>.</p>
|
|
</section>
|
|
<section id="Formulate-your-search-queries">
|
|
<h2>Formulate your search queries<a class="headerlink" href="#Formulate-your-search-queries" title="Link to this heading"></a></h2>
|
|
<p>Next, you need to form search queries. You can search either by image or by text. You can search for a single query, or you can search for several queries at once, the computational time should not be much different. The format of the queries is as follows:</p>
|
|
<div class="nbinput nblast docutils container">
|
|
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[13]:
|
|
</pre></div>
|
|
</div>
|
|
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">search_query3</span> <span class="o">=</span> <span class="p">[</span>
|
|
<span class="p">{</span><span class="s2">"text_input"</span><span class="p">:</span> <span class="s2">"politician press conference"</span><span class="p">},</span>
|
|
<span class="p">{</span><span class="s2">"text_input"</span><span class="p">:</span> <span class="s2">"a world map"</span><span class="p">},</span>
|
|
<span class="p">{</span><span class="s2">"text_input"</span><span class="p">:</span> <span class="s2">"a dog"</span><span class="p">},</span>
|
|
<span class="p">]</span>
|
|
</pre></div>
|
|
</div>
|
|
</div>
|
|
<p>You can filter your results in 3 different ways: - <code class="docutils literal notranslate"><span class="pre">filter_number_of_images</span></code> limits the number of images found. That is, if the parameter <code class="docutils literal notranslate"><span class="pre">filter_number_of_images</span> <span class="pre">=</span> <span class="pre">10</span></code>, then the first 10 images that best match the query will be shown. The other images ranks will be set to <code class="docutils literal notranslate"><span class="pre">None</span></code> and the similarity value to <code class="docutils literal notranslate"><span class="pre">0</span></code>. - <code class="docutils literal notranslate"><span class="pre">filter_val_limit</span></code> limits the output of images with a similarity value not bigger than <code class="docutils literal notranslate"><span class="pre">filter_val_limit</span></code>. That is, if the parameter <code class="docutils literal notranslate"><span class="pre">filter_val_limit</span> <span class="pre">=</span> <span class="pre">0.2</span></code>, all images
|
|
with similarity less than 0.2 will be discarded. - <code class="docutils literal notranslate"><span class="pre">filter_rel_error</span></code> (percentage) limits the output of images with a similarity value not bigger than <code class="docutils literal notranslate"><span class="pre">100</span> <span class="pre">*</span> <span class="pre">abs(current_simularity_value</span> <span class="pre">-</span> <span class="pre">best_simularity_value_in_current_search)/best_simularity_value_in_current_search</span> <span class="pre"><</span> <span class="pre">filter_rel_error</span></code>. That is, if we set filter_rel_error = 30, it means that if the top1 image have 0.5 similarity value, we discard all image with similarity less than 0.35.</p>
|
|
<div class="nbinput docutils container">
|
|
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[14]:
|
|
</pre></div>
|
|
</div>
|
|
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">similarity</span><span class="p">,</span> <span class="n">sorted_lists</span> <span class="o">=</span> <span class="n">my_obj</span><span class="o">.</span><span class="n">multimodal_search</span><span class="p">(</span>
|
|
<span class="n">model</span><span class="p">,</span>
|
|
<span class="n">vis_processors</span><span class="p">,</span>
|
|
<span class="n">txt_processors</span><span class="p">,</span>
|
|
<span class="n">model_type</span><span class="p">,</span>
|
|
<span class="n">image_keys</span><span class="p">,</span>
|
|
<span class="n">features_image_stacked</span><span class="p">,</span>
|
|
<span class="n">search_query3</span><span class="p">,</span>
|
|
<span class="n">filter_number_of_images</span><span class="o">=</span><span class="mi">20</span><span class="p">,</span>
|
|
<span class="p">)</span>
|
|
</pre></div>
|
|
</div>
|
|
</div>
|
|
<div class="nboutput nblast docutils container">
|
|
<div class="prompt empty docutils container">
|
|
</div>
|
|
<div class="output_area docutils container">
|
|
<div class="highlight"><pre>
|
|
<span class="ansi-red-fg">---------------------------------------------------------------------------</span>
|
|
<span class="ansi-red-fg">NameError</span> Traceback (most recent call last)
|
|
Cell <span class="ansi-green-fg">In[14], line 2</span>
|
|
<span class="ansi-green-intense-fg ansi-bold"> 1</span> similarity, sorted_lists <span style="color: rgb(98,98,98)">=</span> my_obj<span style="color: rgb(98,98,98)">.</span>multimodal_search(
|
|
<span class="ansi-green-fg">----> 2</span> <span class="ansi-yellow-bg">model</span>,
|
|
<span class="ansi-green-intense-fg ansi-bold"> 3</span> vis_processors,
|
|
<span class="ansi-green-intense-fg ansi-bold"> 4</span> txt_processors,
|
|
<span class="ansi-green-intense-fg ansi-bold"> 5</span> model_type,
|
|
<span class="ansi-green-intense-fg ansi-bold"> 6</span> image_keys,
|
|
<span class="ansi-green-intense-fg ansi-bold"> 7</span> features_image_stacked,
|
|
<span class="ansi-green-intense-fg ansi-bold"> 8</span> search_query3,
|
|
<span class="ansi-green-intense-fg ansi-bold"> 9</span> filter_number_of_images<span style="color: rgb(98,98,98)">=</span><span style="color: rgb(98,98,98)">20</span>,
|
|
<span class="ansi-green-intense-fg ansi-bold"> 10</span> )
|
|
|
|
<span class="ansi-red-fg">NameError</span>: name 'model' is not defined
|
|
</pre></div></div>
|
|
</div>
|
|
<div class="nbinput docutils container">
|
|
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[15]:
|
|
</pre></div>
|
|
</div>
|
|
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">similarity</span>
|
|
</pre></div>
|
|
</div>
|
|
</div>
|
|
<div class="nboutput nblast docutils container">
|
|
<div class="prompt empty docutils container">
|
|
</div>
|
|
<div class="output_area docutils container">
|
|
<div class="highlight"><pre>
|
|
<span class="ansi-red-fg">---------------------------------------------------------------------------</span>
|
|
<span class="ansi-red-fg">NameError</span> Traceback (most recent call last)
|
|
Cell <span class="ansi-green-fg">In[15], line 1</span>
|
|
<span class="ansi-green-fg">----> 1</span> <span class="ansi-yellow-bg">similarity</span>
|
|
|
|
<span class="ansi-red-fg">NameError</span>: name 'similarity' is not defined
|
|
</pre></div></div>
|
|
</div>
|
|
<div class="nbinput docutils container">
|
|
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[16]:
|
|
</pre></div>
|
|
</div>
|
|
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">sorted_lists</span>
|
|
</pre></div>
|
|
</div>
|
|
</div>
|
|
<div class="nboutput nblast docutils container">
|
|
<div class="prompt empty docutils container">
|
|
</div>
|
|
<div class="output_area docutils container">
|
|
<div class="highlight"><pre>
|
|
<span class="ansi-red-fg">---------------------------------------------------------------------------</span>
|
|
<span class="ansi-red-fg">NameError</span> Traceback (most recent call last)
|
|
Cell <span class="ansi-green-fg">In[16], line 1</span>
|
|
<span class="ansi-green-fg">----> 1</span> <span class="ansi-yellow-bg">sorted_lists</span>
|
|
|
|
<span class="ansi-red-fg">NameError</span>: name 'sorted_lists' is not defined
|
|
</pre></div></div>
|
|
</div>
|
|
<div class="nbinput docutils container">
|
|
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[17]:
|
|
</pre></div>
|
|
</div>
|
|
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">mydict</span>
|
|
</pre></div>
|
|
</div>
|
|
</div>
|
|
<div class="nboutput nblast docutils container">
|
|
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[17]:
|
|
</pre></div>
|
|
</div>
|
|
<div class="output_area docutils container">
|
|
<div class="highlight"><pre>
|
|
{'102141_2_eng': {'filename': '102141_2_eng'},
|
|
'102730_eng': {'filename': '102730_eng'},
|
|
'106349S_por': {'filename': '106349S_por'}}
|
|
</pre></div></div>
|
|
</div>
|
|
<p>After launching <code class="docutils literal notranslate"><span class="pre">multimodal_search</span></code> function, the results of each query will be added to the source dictionary.</p>
|
|
<div class="nbinput docutils container">
|
|
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[18]:
|
|
</pre></div>
|
|
</div>
|
|
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">mydict</span><span class="p">[</span><span class="s2">"106349S_por"</span><span class="p">]</span>
|
|
</pre></div>
|
|
</div>
|
|
</div>
|
|
<div class="nboutput nblast docutils container">
|
|
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[18]:
|
|
</pre></div>
|
|
</div>
|
|
<div class="output_area docutils container">
|
|
<div class="highlight"><pre>
|
|
{'filename': '106349S_por'}
|
|
</pre></div></div>
|
|
</div>
|
|
<p>A special function was written to present the search results conveniently.</p>
|
|
<div class="nbinput docutils container">
|
|
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[19]:
|
|
</pre></div>
|
|
</div>
|
|
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">my_obj</span><span class="o">.</span><span class="n">show_results</span><span class="p">(</span>
|
|
<span class="n">search_query3</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span>
|
|
<span class="p">)</span>
|
|
</pre></div>
|
|
</div>
|
|
</div>
|
|
<div class="nboutput docutils container">
|
|
<div class="prompt empty docutils container">
|
|
</div>
|
|
<div class="output_area docutils container">
|
|
<div class="highlight"><pre>
|
|
'Your search query: politician press conference'
|
|
</pre></div></div>
|
|
</div>
|
|
<div class="nboutput docutils container">
|
|
<div class="prompt empty docutils container">
|
|
</div>
|
|
<div class="output_area docutils container">
|
|
<div class="highlight"><pre>
|
|
'--------------------------------------------------'
|
|
</pre></div></div>
|
|
</div>
|
|
<div class="nboutput docutils container">
|
|
<div class="prompt empty docutils container">
|
|
</div>
|
|
<div class="output_area docutils container">
|
|
<div class="highlight"><pre>
|
|
'Results:'
|
|
</pre></div></div>
|
|
</div>
|
|
<div class="nboutput nblast docutils container">
|
|
<div class="prompt empty docutils container">
|
|
</div>
|
|
<div class="output_area docutils container">
|
|
<div class="highlight"><pre>
|
|
<span class="ansi-red-fg">---------------------------------------------------------------------------</span>
|
|
<span class="ansi-red-fg">KeyError</span> Traceback (most recent call last)
|
|
Cell <span class="ansi-green-fg">In[19], line 1</span>
|
|
<span class="ansi-green-fg">----> 1</span> <span class="ansi-yellow-bg">my_obj</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">.</span><span class="ansi-yellow-bg">show_results</span><span class="ansi-yellow-bg">(</span>
|
|
<span class="ansi-green-intense-fg ansi-bold"> 2</span> <span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">search_query3</span><span class="ansi-yellow-bg">[</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">0</span><span class="ansi-yellow-bg">]</span><span class="ansi-yellow-bg">,</span>
|
|
<span class="ansi-green-intense-fg ansi-bold"> 3</span> <span class="ansi-yellow-bg">)</span>
|
|
|
|
File <span class="ansi-green-fg">~/work/AMMICO/AMMICO/ammico/multimodal_search.py:970</span>, in <span class="ansi-cyan-fg">MultimodalSearch.show_results</span><span class="ansi-blue-fg">(self, query, itm, image_gradcam_with_itm)</span>
|
|
<span class="ansi-green-intense-fg ansi-bold"> 967</span> current_querry_val <span style="color: rgb(98,98,98)">=</span> <span style="color: rgb(0,135,0)">list</span>(query<span style="color: rgb(98,98,98)">.</span>values())[<span style="color: rgb(98,98,98)">0</span>]
|
|
<span class="ansi-green-intense-fg ansi-bold"> 968</span> current_querry_rank <span style="color: rgb(98,98,98)">=</span> <span style="color: rgb(175,0,0)">"</span><span style="color: rgb(175,0,0)">rank </span><span style="color: rgb(175,0,0)">"</span> <span style="color: rgb(98,98,98)">+</span> <span style="color: rgb(0,135,0)">list</span>(query<span style="color: rgb(98,98,98)">.</span>values())[<span style="color: rgb(98,98,98)">0</span>]
|
|
<span class="ansi-green-fg">--> 970</span> <span class="ansi-bold" style="color: rgb(0,135,0)">for</span> s <span class="ansi-bold" style="color: rgb(175,0,255)">in</span> <span class="ansi-yellow-bg" style="color: rgb(0,135,0)">sorted</span><span class="ansi-yellow-bg">(</span>
|
|
<span class="ansi-green-intense-fg ansi-bold"> 971</span> <span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg" style="color: rgb(0,135,0)">self</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">.</span><span class="ansi-yellow-bg">subdict</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">.</span><span class="ansi-yellow-bg">items</span><span class="ansi-yellow-bg">(</span><span class="ansi-yellow-bg">)</span><span class="ansi-yellow-bg">,</span><span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">key</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">=</span><span class="ansi-yellow-bg ansi-bold" style="color: rgb(0,135,0)">lambda</span><span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">t</span><span class="ansi-yellow-bg">:</span><span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">t</span><span class="ansi-yellow-bg">[</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">1</span><span class="ansi-yellow-bg">]</span><span class="ansi-yellow-bg">[</span><span class="ansi-yellow-bg">current_querry_val</span><span class="ansi-yellow-bg">]</span><span class="ansi-yellow-bg">,</span><span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">reverse</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">=</span><span class="ansi-yellow-bg ansi-bold" style="color: rgb(0,135,0)">True</span>
|
|
<span class="ansi-green-intense-fg ansi-bold"> 972</span> <span class="ansi-yellow-bg">)</span>:
|
|
<span class="ansi-green-intense-fg ansi-bold"> 973</span> <span class="ansi-bold" style="color: rgb(0,135,0)">if</span> s[<span style="color: rgb(98,98,98)">1</span>][current_querry_rank] <span class="ansi-bold" style="color: rgb(175,0,255)">is</span> <span class="ansi-bold" style="color: rgb(0,135,0)">None</span>:
|
|
<span class="ansi-green-intense-fg ansi-bold"> 974</span> <span class="ansi-bold" style="color: rgb(0,135,0)">break</span>
|
|
|
|
File <span class="ansi-green-fg">~/work/AMMICO/AMMICO/ammico/multimodal_search.py:971</span>, in <span class="ansi-cyan-fg">MultimodalSearch.show_results.<locals>.<lambda></span><span class="ansi-blue-fg">(t)</span>
|
|
<span class="ansi-green-intense-fg ansi-bold"> 967</span> current_querry_val <span style="color: rgb(98,98,98)">=</span> <span style="color: rgb(0,135,0)">list</span>(query<span style="color: rgb(98,98,98)">.</span>values())[<span style="color: rgb(98,98,98)">0</span>]
|
|
<span class="ansi-green-intense-fg ansi-bold"> 968</span> current_querry_rank <span style="color: rgb(98,98,98)">=</span> <span style="color: rgb(175,0,0)">"</span><span style="color: rgb(175,0,0)">rank </span><span style="color: rgb(175,0,0)">"</span> <span style="color: rgb(98,98,98)">+</span> <span style="color: rgb(0,135,0)">list</span>(query<span style="color: rgb(98,98,98)">.</span>values())[<span style="color: rgb(98,98,98)">0</span>]
|
|
<span class="ansi-green-intense-fg ansi-bold"> 970</span> <span class="ansi-bold" style="color: rgb(0,135,0)">for</span> s <span class="ansi-bold" style="color: rgb(175,0,255)">in</span> <span style="color: rgb(0,135,0)">sorted</span>(
|
|
<span class="ansi-green-fg">--> 971</span> <span style="color: rgb(0,135,0)">self</span><span style="color: rgb(98,98,98)">.</span>subdict<span style="color: rgb(98,98,98)">.</span>items(), key<span style="color: rgb(98,98,98)">=</span><span class="ansi-bold" style="color: rgb(0,135,0)">lambda</span> t: <span class="ansi-yellow-bg">t</span><span class="ansi-yellow-bg">[</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">1</span><span class="ansi-yellow-bg">]</span><span class="ansi-yellow-bg">[</span><span class="ansi-yellow-bg">current_querry_val</span><span class="ansi-yellow-bg">]</span>, reverse<span style="color: rgb(98,98,98)">=</span><span class="ansi-bold" style="color: rgb(0,135,0)">True</span>
|
|
<span class="ansi-green-intense-fg ansi-bold"> 972</span> ):
|
|
<span class="ansi-green-intense-fg ansi-bold"> 973</span> <span class="ansi-bold" style="color: rgb(0,135,0)">if</span> s[<span style="color: rgb(98,98,98)">1</span>][current_querry_rank] <span class="ansi-bold" style="color: rgb(175,0,255)">is</span> <span class="ansi-bold" style="color: rgb(0,135,0)">None</span>:
|
|
<span class="ansi-green-intense-fg ansi-bold"> 974</span> <span class="ansi-bold" style="color: rgb(0,135,0)">break</span>
|
|
|
|
<span class="ansi-red-fg">KeyError</span>: 'politician press conference'
|
|
</pre></div></div>
|
|
</div>
|
|
</section>
|
|
<section id="Improve-the-search-results">
|
|
<h2>Improve the search results<a class="headerlink" href="#Improve-the-search-results" title="Link to this heading"></a></h2>
|
|
<p>For even better results, a slightly different approach has been prepared that can improve search results. It is quite resource-intensive, so it is applied after the main algorithm has found the most relevant images. This approach works only with text queries. Among the parameters you can choose 3 models: <code class="docutils literal notranslate"><span class="pre">"blip_base"</span></code>, <code class="docutils literal notranslate"><span class="pre">"blip_large"</span></code>, <code class="docutils literal notranslate"><span class="pre">"blip2_coco"</span></code>. If you get an <code class="docutils literal notranslate"><span class="pre">Out</span> <span class="pre">of</span> <span class="pre">Memory</span></code> error, try reducing the batch_size value (minimum = 1), which is the number of images being processed
|
|
simultaneously. With the parameter <code class="docutils literal notranslate"><span class="pre">need_grad_cam</span> <span class="pre">=</span> <span class="pre">True/False</span></code> you can enable the calculation of the heat map of each image to be processed. Thus the <code class="docutils literal notranslate"><span class="pre">image_text_match_reordering</span></code> function calculates new similarity values and new ranks for each image. The resulting values are added to the general dictionary.</p>
|
|
<div class="nbinput nblast docutils container">
|
|
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[20]:
|
|
</pre></div>
|
|
</div>
|
|
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">itm_model</span> <span class="o">=</span> <span class="s2">"blip_base"</span>
|
|
<span class="c1"># itm_model = "blip_large"</span>
|
|
<span class="c1"># itm_model = "blip2_coco"</span>
|
|
</pre></div>
|
|
</div>
|
|
</div>
|
|
<div class="nbinput docutils container">
|
|
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[21]:
|
|
</pre></div>
|
|
</div>
|
|
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">itm_scores</span><span class="p">,</span> <span class="n">image_gradcam_with_itm</span> <span class="o">=</span> <span class="n">my_obj</span><span class="o">.</span><span class="n">image_text_match_reordering</span><span class="p">(</span>
|
|
<span class="n">search_query3</span><span class="p">,</span>
|
|
<span class="n">itm_model</span><span class="p">,</span>
|
|
<span class="n">image_keys</span><span class="p">,</span>
|
|
<span class="n">sorted_lists</span><span class="p">,</span>
|
|
<span class="n">batch_size</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
|
|
<span class="n">need_grad_cam</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span>
|
|
<span class="p">)</span>
|
|
</pre></div>
|
|
</div>
|
|
</div>
|
|
<div class="nboutput nblast docutils container">
|
|
<div class="prompt empty docutils container">
|
|
</div>
|
|
<div class="output_area docutils container">
|
|
<div class="highlight"><pre>
|
|
<span class="ansi-red-fg">---------------------------------------------------------------------------</span>
|
|
<span class="ansi-red-fg">NameError</span> Traceback (most recent call last)
|
|
Cell <span class="ansi-green-fg">In[21], line 4</span>
|
|
<span class="ansi-green-intense-fg ansi-bold"> 1</span> itm_scores, image_gradcam_with_itm <span style="color: rgb(98,98,98)">=</span> my_obj<span style="color: rgb(98,98,98)">.</span>image_text_match_reordering(
|
|
<span class="ansi-green-intense-fg ansi-bold"> 2</span> search_query3,
|
|
<span class="ansi-green-intense-fg ansi-bold"> 3</span> itm_model,
|
|
<span class="ansi-green-fg">----> 4</span> <span class="ansi-yellow-bg">image_keys</span>,
|
|
<span class="ansi-green-intense-fg ansi-bold"> 5</span> sorted_lists,
|
|
<span class="ansi-green-intense-fg ansi-bold"> 6</span> batch_size<span style="color: rgb(98,98,98)">=</span><span style="color: rgb(98,98,98)">1</span>,
|
|
<span class="ansi-green-intense-fg ansi-bold"> 7</span> need_grad_cam<span style="color: rgb(98,98,98)">=</span><span class="ansi-bold" style="color: rgb(0,135,0)">True</span>,
|
|
<span class="ansi-green-intense-fg ansi-bold"> 8</span> )
|
|
|
|
<span class="ansi-red-fg">NameError</span>: name 'image_keys' is not defined
|
|
</pre></div></div>
|
|
</div>
|
|
<p>Then using the same output function you can add the <code class="docutils literal notranslate"><span class="pre">ITM=True</span></code> arguments to output the new image order. You can also add the <code class="docutils literal notranslate"><span class="pre">image_gradcam_with_itm</span></code> argument to output the heat maps of the calculated images.</p>
|
|
<div class="nbinput docutils container">
|
|
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[22]:
|
|
</pre></div>
|
|
</div>
|
|
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">my_obj</span><span class="o">.</span><span class="n">show_results</span><span class="p">(</span>
|
|
<span class="n">search_query3</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">itm</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">image_gradcam_with_itm</span><span class="o">=</span><span class="n">image_gradcam_with_itm</span>
|
|
<span class="p">)</span>
|
|
</pre></div>
|
|
</div>
|
|
</div>
|
|
<div class="nboutput nblast docutils container">
|
|
<div class="prompt empty docutils container">
|
|
</div>
|
|
<div class="output_area docutils container">
|
|
<div class="highlight"><pre>
|
|
<span class="ansi-red-fg">---------------------------------------------------------------------------</span>
|
|
<span class="ansi-red-fg">NameError</span> Traceback (most recent call last)
|
|
Cell <span class="ansi-green-fg">In[22], line 2</span>
|
|
<span class="ansi-green-intense-fg ansi-bold"> 1</span> my_obj<span style="color: rgb(98,98,98)">.</span>show_results(
|
|
<span class="ansi-green-fg">----> 2</span> search_query3[<span style="color: rgb(98,98,98)">0</span>], itm<span style="color: rgb(98,98,98)">=</span><span class="ansi-bold" style="color: rgb(0,135,0)">True</span>, image_gradcam_with_itm<span style="color: rgb(98,98,98)">=</span><span class="ansi-yellow-bg">image_gradcam_with_itm</span>
|
|
<span class="ansi-green-intense-fg ansi-bold"> 3</span> )
|
|
|
|
<span class="ansi-red-fg">NameError</span>: name 'image_gradcam_with_itm' is not defined
|
|
</pre></div></div>
|
|
</div>
|
|
</section>
|
|
<section id="Save-search-results-to-csv">
|
|
<h2>Save search results to csv<a class="headerlink" href="#Save-search-results-to-csv" title="Link to this heading"></a></h2>
|
|
<p>Convert the dictionary of dictionarys into a dictionary with lists:</p>
|
|
<div class="nbinput nblast docutils container">
|
|
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[23]:
|
|
</pre></div>
|
|
</div>
|
|
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">outdict</span> <span class="o">=</span> <span class="n">mutils</span><span class="o">.</span><span class="n">append_data_to_dict</span><span class="p">(</span><span class="n">mydict</span><span class="p">)</span>
|
|
<span class="n">df</span> <span class="o">=</span> <span class="n">mutils</span><span class="o">.</span><span class="n">dump_df</span><span class="p">(</span><span class="n">outdict</span><span class="p">)</span>
|
|
</pre></div>
|
|
</div>
|
|
</div>
|
|
<p>Check the dataframe:</p>
|
|
<div class="nbinput docutils container">
|
|
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[24]:
|
|
</pre></div>
|
|
</div>
|
|
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">df</span><span class="o">.</span><span class="n">head</span><span class="p">(</span><span class="mi">10</span><span class="p">)</span>
|
|
</pre></div>
|
|
</div>
|
|
</div>
|
|
<div class="nboutput nblast docutils container">
|
|
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[24]:
|
|
</pre></div>
|
|
</div>
|
|
<div class="output_area rendered_html docutils container">
|
|
<div>
|
|
<style scoped>
|
|
.dataframe tbody tr th:only-of-type {
|
|
vertical-align: middle;
|
|
}
|
|
|
|
.dataframe tbody tr th {
|
|
vertical-align: top;
|
|
}
|
|
|
|
.dataframe thead th {
|
|
text-align: right;
|
|
}
|
|
</style>
|
|
<table border="1" class="dataframe">
|
|
<thead>
|
|
<tr style="text-align: right;">
|
|
<th></th>
|
|
<th>filename</th>
|
|
</tr>
|
|
</thead>
|
|
<tbody>
|
|
<tr>
|
|
<th>0</th>
|
|
<td>102141_2_eng</td>
|
|
</tr>
|
|
<tr>
|
|
<th>1</th>
|
|
<td>102730_eng</td>
|
|
</tr>
|
|
<tr>
|
|
<th>2</th>
|
|
<td>106349S_por</td>
|
|
</tr>
|
|
</tbody>
|
|
</table>
|
|
</div></div>
|
|
</div>
|
|
<p>Write the csv file:</p>
|
|
<div class="nbinput nblast docutils container">
|
|
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[25]:
|
|
</pre></div>
|
|
</div>
|
|
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">df</span><span class="o">.</span><span class="n">to_csv</span><span class="p">(</span><span class="s2">"data/data_out.csv"</span><span class="p">)</span>
|
|
</pre></div>
|
|
</div>
|
|
</div>
|
|
<div class="nbinput nblast docutils container">
|
|
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
|
|
</pre></div>
|
|
</div>
|
|
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span>
|
|
</pre></div>
|
|
</div>
|
|
</div>
|
|
</section>
|
|
</section>
|
|
|
|
|
|
</div>
|
|
</div>
|
|
<footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
|
|
<a href="Example%20summary.html" class="btn btn-neutral float-left" title="Image summary and visual question answering" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
|
|
<a href="Example%20colors.html" class="btn btn-neutral float-right" title="Color analysis of pictures" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
|
|
</div>
|
|
|
|
<hr/>
|
|
|
|
<div role="contentinfo">
|
|
<p>© Copyright 2022, Scientific Software Center, Heidelberg University.</p>
|
|
</div>
|
|
|
|
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
|
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
|
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
|
|
|
|
|
</footer>
|
|
</div>
|
|
</div>
|
|
</section>
|
|
</div>
|
|
<script>
|
|
jQuery(function () {
|
|
SphinxRtdTheme.Navigation.enable(true);
|
|
});
|
|
</script>
|
|
|
|
</body>
|
|
</html> |