зеркало из
				https://github.com/ssciwr/AMMICO.git
				synced 2025-10-31 05:56:05 +02:00 
			
		
		
		
	
		
			
				
	
	
		
			481 строка
		
	
	
		
			30 KiB
		
	
	
	
		
			HTML
		
	
	
	
	
	
			
		
		
	
	
			481 строка
		
	
	
		
			30 KiB
		
	
	
	
		
			HTML
		
	
	
	
	
	
| <!DOCTYPE html>
 | |
| <html class="writer-html5" lang="en" >
 | |
| <head>
 | |
|   <meta charset="utf-8" /><meta name="generator" content="Docutils 0.18.1: http://docutils.sourceforge.net/" />
 | |
| 
 | |
|   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
 | |
|   <title>Image summary and visual question answering — AMMICO 0.0.1 documentation</title>
 | |
|       <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
 | |
|       <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
 | |
|       <link rel="stylesheet" href="../_static/nbsphinx-code-cells.css" type="text/css" />
 | |
|   <!--[if lt IE 9]>
 | |
|     <script src="../_static/js/html5shiv.min.js"></script>
 | |
|   <![endif]-->
 | |
|   
 | |
|         <script src="../_static/jquery.js"></script>
 | |
|         <script src="../_static/_sphinx_javascript_frameworks_compat.js"></script>
 | |
|         <script data-url_root="../" id="documentation_options" src="../_static/documentation_options.js"></script>
 | |
|         <script src="../_static/doctools.js"></script>
 | |
|         <script src="../_static/sphinx_highlight.js"></script>
 | |
|         <script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
 | |
|         <script>window.MathJax = {"tex": {"inlineMath": [["$", "$"], ["\\(", "\\)"]], "processEscapes": true}, "options": {"ignoreHtmlClass": "tex2jax_ignore|mathjax_ignore|document", "processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
 | |
|         <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
 | |
|     <script src="../_static/js/theme.js"></script>
 | |
|     <link rel="index" title="Index" href="../genindex.html" />
 | |
|     <link rel="search" title="Search" href="../search.html" />
 | |
|     <link rel="next" title="Image Multimodal Search" href="Example%20multimodal.html" />
 | |
|     <link rel="prev" title="Notebook for text extraction on image" href="Example%20text.html" /> 
 | |
| </head>
 | |
| 
 | |
| <body class="wy-body-for-nav"> 
 | |
|   <div class="wy-grid-for-nav">
 | |
|     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
 | |
|       <div class="wy-side-scroll">
 | |
|         <div class="wy-side-nav-search" >
 | |
| 
 | |
|           
 | |
|           
 | |
|           <a href="../index.html" class="icon icon-home">
 | |
|             AMMICO
 | |
|           </a>
 | |
| <div role="search">
 | |
|   <form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
 | |
|     <input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
 | |
|     <input type="hidden" name="check_keywords" value="yes" />
 | |
|     <input type="hidden" name="area" value="default" />
 | |
|   </form>
 | |
| </div>
 | |
|         </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
 | |
|               <p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
 | |
| <ul class="current">
 | |
| <li class="toctree-l1"><a class="reference internal" href="../readme_link.html">AMMICO - AI Media and Misinformation Content Analysis Tool</a></li>
 | |
| <li class="toctree-l1"><a class="reference internal" href="Example%20faces.html">Facial Expression recognition with DeepFace</a></li>
 | |
| <li class="toctree-l1"><a class="reference internal" href="Example%20text.html">Notebook for text extraction on image</a></li>
 | |
| <li class="toctree-l1 current"><a class="current reference internal" href="#">Image summary and visual question answering</a><ul>
 | |
| <li class="toctree-l2"><a class="reference internal" href="#Create-captions-for-images-and-directly-write-to-csv">Create captions for images and directly write to csv</a></li>
 | |
| <li class="toctree-l2"><a class="reference internal" href="#Manually-inspect-the-summaries">Manually inspect the summaries</a></li>
 | |
| <li class="toctree-l2"><a class="reference internal" href="#Generate-answers-to-free-form-questions-about-images-written-in-natural-language.">Generate answers to free-form questions about images written in natural language.</a></li>
 | |
| <li class="toctree-l2"><a class="reference internal" href="#Or-directly-analyze-for-further-processing">Or directly analyze for further processing</a></li>
 | |
| <li class="toctree-l2"><a class="reference internal" href="#Convert-to-dataframe-and-write-csv">Convert to dataframe and write csv</a></li>
 | |
| </ul>
 | |
| </li>
 | |
| <li class="toctree-l1"><a class="reference internal" href="Example%20multimodal.html">Image Multimodal Search</a></li>
 | |
| <li class="toctree-l1"><a class="reference internal" href="Example%20colors.html">Color analysis of pictures</a></li>
 | |
| <li class="toctree-l1"><a class="reference internal" href="Example%20objects.html">Objects recognition</a></li>
 | |
| <li class="toctree-l1"><a class="reference internal" href="Example%20cropposts.html">Crop posts from social media posts images</a></li>
 | |
| <li class="toctree-l1"><a class="reference internal" href="../modules.html">AMMICO package modules</a></li>
 | |
| <li class="toctree-l1"><a class="reference internal" href="../license_link.html">License</a></li>
 | |
| </ul>
 | |
| 
 | |
|         </div>
 | |
|       </div>
 | |
|     </nav>
 | |
| 
 | |
|     <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
 | |
|           <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
 | |
|           <a href="../index.html">AMMICO</a>
 | |
|       </nav>
 | |
| 
 | |
|       <div class="wy-nav-content">
 | |
|         <div class="rst-content">
 | |
|           <div role="navigation" aria-label="Page navigation">
 | |
|   <ul class="wy-breadcrumbs">
 | |
|       <li><a href="../index.html" class="icon icon-home" aria-label="Home"></a></li>
 | |
|       <li class="breadcrumb-item active">Image summary and visual question answering</li>
 | |
|       <li class="wy-breadcrumbs-aside">
 | |
|             <a href="../_sources/notebooks/Example summary.ipynb.txt" rel="nofollow"> View page source</a>
 | |
|       </li>
 | |
|   </ul>
 | |
|   <hr/>
 | |
| </div>
 | |
|           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
 | |
|            <div itemprop="articleBody">
 | |
|              
 | |
|   <section id="Image-summary-and-visual-question-answering">
 | |
| <h1>Image summary and visual question answering<a class="headerlink" href="#Image-summary-and-visual-question-answering" title="Permalink to this heading"></a></h1>
 | |
| <p>This notebooks shows how to generate image captions and use the visual question answering with <a class="reference external" href="https://github.com/salesforce/LAVIS">LAVIS</a>.</p>
 | |
| <p>The first cell is only run on google colab and installs the <a class="reference external" href="https://github.com/ssciwr/AMMICO">ammico</a> package.</p>
 | |
| <p>After that, we can import <code class="docutils literal notranslate"><span class="pre">ammico</span></code> and read in the files given a folder path.</p>
 | |
| <div class="nbinput nblast docutils container">
 | |
| <div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[1]:
 | |
| </pre></div>
 | |
| </div>
 | |
| <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="c1"># if running on google colab</span>
 | |
| <span class="c1"># flake8-noqa-cell</span>
 | |
| <span class="kn">import</span> <span class="nn">os</span>
 | |
| 
 | |
| <span class="k">if</span> <span class="s2">"google.colab"</span> <span class="ow">in</span> <span class="nb">str</span><span class="p">(</span><span class="n">get_ipython</span><span class="p">()):</span>
 | |
|     <span class="c1"># update python version</span>
 | |
|     <span class="c1"># install setuptools</span>
 | |
|     <span class="c1"># %pip install setuptools==61 -qqq</span>
 | |
|     <span class="c1"># install ammico</span>
 | |
|     <span class="o">%</span><span class="k">pip</span> install git+https://github.com/ssciwr/ammico.git -qqq
 | |
|     <span class="c1"># mount google drive for data and API key</span>
 | |
|     <span class="kn">from</span> <span class="nn">google.colab</span> <span class="kn">import</span> <span class="n">drive</span>
 | |
| 
 | |
|     <span class="n">drive</span><span class="o">.</span><span class="n">mount</span><span class="p">(</span><span class="s2">"/content/drive"</span><span class="p">)</span>
 | |
| </pre></div>
 | |
| </div>
 | |
| </div>
 | |
| <div class="nbinput nblast docutils container">
 | |
| <div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[2]:
 | |
| </pre></div>
 | |
| </div>
 | |
| <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">ammico</span>
 | |
| <span class="kn">from</span> <span class="nn">ammico</span> <span class="kn">import</span> <span class="n">utils</span> <span class="k">as</span> <span class="n">mutils</span>
 | |
| <span class="kn">from</span> <span class="nn">ammico</span> <span class="kn">import</span> <span class="n">display</span> <span class="k">as</span> <span class="n">mdisplay</span>
 | |
| <span class="kn">import</span> <span class="nn">ammico.summary</span> <span class="k">as</span> <span class="nn">sm</span>
 | |
| </pre></div>
 | |
| </div>
 | |
| </div>
 | |
| <div class="nbinput nblast docutils container">
 | |
| <div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[3]:
 | |
| </pre></div>
 | |
| </div>
 | |
| <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="c1"># Here you need to provide the path to your google drive folder</span>
 | |
| <span class="c1"># or local folder containing the images</span>
 | |
| <span class="n">images</span> <span class="o">=</span> <span class="n">mutils</span><span class="o">.</span><span class="n">find_files</span><span class="p">(</span>
 | |
|     <span class="n">path</span><span class="o">=</span><span class="s2">"data/"</span><span class="p">,</span>
 | |
|     <span class="n">limit</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span>
 | |
| <span class="p">)</span>
 | |
| </pre></div>
 | |
| </div>
 | |
| </div>
 | |
| <div class="nbinput nblast docutils container">
 | |
| <div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[4]:
 | |
| </pre></div>
 | |
| </div>
 | |
| <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">mydict</span> <span class="o">=</span> <span class="n">mutils</span><span class="o">.</span><span class="n">initialize_dict</span><span class="p">(</span><span class="n">images</span><span class="p">)</span>
 | |
| </pre></div>
 | |
| </div>
 | |
| </div>
 | |
| <section id="Create-captions-for-images-and-directly-write-to-csv">
 | |
| <h2>Create captions for images and directly write to csv<a class="headerlink" href="#Create-captions-for-images-and-directly-write-to-csv" title="Permalink to this heading"></a></h2>
 | |
| <p>Here you can choose between two models: “base” or “large”. This will generate the caption for each image and directly put the results in a dataframe. This dataframe can be exported as a csv file.</p>
 | |
| <p>The results are written into the columns <code class="docutils literal notranslate"><span class="pre">const_image_summary</span></code> - this will always be the same result (as always the same seed will be used). The column <code class="docutils literal notranslate"><span class="pre">3_non-deterministic</span> <span class="pre">summary</span></code> displays three different answers generated with different seeds, these are most likely different when you run the analysis again.</p>
 | |
| <div class="nbinput docutils container">
 | |
| <div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[5]:
 | |
| </pre></div>
 | |
| </div>
 | |
| <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">obj</span> <span class="o">=</span> <span class="n">sm</span><span class="o">.</span><span class="n">SummaryDetector</span><span class="p">(</span><span class="n">mydict</span><span class="p">)</span>
 | |
| <span class="n">summary_model</span><span class="p">,</span> <span class="n">summary_vis_processors</span> <span class="o">=</span> <span class="n">obj</span><span class="o">.</span><span class="n">load_model</span><span class="p">(</span><span class="n">model_type</span><span class="o">=</span><span class="s2">"base"</span><span class="p">)</span>
 | |
| <span class="c1"># summary_model, summary_vis_processors = mutils.load_model("large")</span>
 | |
| </pre></div>
 | |
| </div>
 | |
| </div>
 | |
| <div class="nboutput nblast docutils container">
 | |
| <div class="prompt empty docutils container">
 | |
| </div>
 | |
| <div class="output_area stderr docutils container">
 | |
| <div class="highlight"><pre>
 | |
| 100%|██████████| 2.50G/2.50G [00:16<00:00, 165MB/s]
 | |
| 100%|██████████| 1.35G/1.35G [00:07<00:00, 199MB/s]
 | |
| </pre></div></div>
 | |
| </div>
 | |
| <div class="nbinput docutils container">
 | |
| <div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[6]:
 | |
| </pre></div>
 | |
| </div>
 | |
| <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="k">for</span> <span class="n">key</span> <span class="ow">in</span> <span class="n">mydict</span><span class="p">:</span>
 | |
|     <span class="n">mydict</span><span class="p">[</span><span class="n">key</span><span class="p">]</span> <span class="o">=</span> <span class="n">sm</span><span class="o">.</span><span class="n">SummaryDetector</span><span class="p">(</span><span class="n">mydict</span><span class="p">[</span><span class="n">key</span><span class="p">])</span><span class="o">.</span><span class="n">analyse_image</span><span class="p">(</span>
 | |
|         <span class="n">summary_model</span><span class="o">=</span><span class="n">summary_model</span><span class="p">,</span> <span class="n">summary_vis_processors</span><span class="o">=</span><span class="n">summary_vis_processors</span>
 | |
|     <span class="p">)</span>
 | |
| </pre></div>
 | |
| </div>
 | |
| </div>
 | |
| <div class="nboutput nblast docutils container">
 | |
| <div class="prompt empty docutils container">
 | |
| </div>
 | |
| <div class="output_area docutils container">
 | |
| <div class="highlight"><pre>
 | |
| <span class="ansi-red-fg">---------------------------------------------------------------------------</span>
 | |
| <span class="ansi-red-fg">TypeError</span>                                 Traceback (most recent call last)
 | |
| Cell <span class="ansi-green-fg">In[6], line 2</span>
 | |
| <span class="ansi-green-intense-fg ansi-bold">      1</span> <span class="ansi-bold" style="color: rgb(0,135,0)">for</span> key <span class="ansi-bold" style="color: rgb(175,0,255)">in</span> mydict:
 | |
| <span class="ansi-green-fg">----> 2</span>     mydict[key] <span style="color: rgb(98,98,98)">=</span> <span class="ansi-yellow-bg">sm</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">.</span><span class="ansi-yellow-bg">SummaryDetector</span><span class="ansi-yellow-bg">(</span><span class="ansi-yellow-bg">mydict</span><span class="ansi-yellow-bg">[</span><span class="ansi-yellow-bg">key</span><span class="ansi-yellow-bg">]</span><span class="ansi-yellow-bg">)</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">.</span><span class="ansi-yellow-bg">analyse_image</span><span class="ansi-yellow-bg">(</span>
 | |
| <span class="ansi-green-intense-fg ansi-bold">      3</span> <span class="ansi-yellow-bg">        </span><span class="ansi-yellow-bg">summary_model</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">=</span><span class="ansi-yellow-bg">summary_model</span><span class="ansi-yellow-bg">,</span><span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">summary_vis_processors</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">=</span><span class="ansi-yellow-bg">summary_vis_processors</span>
 | |
| <span class="ansi-green-intense-fg ansi-bold">      4</span> <span class="ansi-yellow-bg">    </span><span class="ansi-yellow-bg">)</span>
 | |
| 
 | |
| <span class="ansi-red-fg">TypeError</span>: analyse_image() got an unexpected keyword argument 'summary_model'
 | |
| </pre></div></div>
 | |
| </div>
 | |
| <p>Convert the dictionary of dictionarys into a dictionary with lists:</p>
 | |
| <div class="nbinput nblast docutils container">
 | |
| <div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[7]:
 | |
| </pre></div>
 | |
| </div>
 | |
| <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">outdict</span> <span class="o">=</span> <span class="n">mutils</span><span class="o">.</span><span class="n">append_data_to_dict</span><span class="p">(</span><span class="n">mydict</span><span class="p">)</span>
 | |
| <span class="n">df</span> <span class="o">=</span> <span class="n">mutils</span><span class="o">.</span><span class="n">dump_df</span><span class="p">(</span><span class="n">outdict</span><span class="p">)</span>
 | |
| </pre></div>
 | |
| </div>
 | |
| </div>
 | |
| <p>Check the dataframe:</p>
 | |
| <div class="nbinput docutils container">
 | |
| <div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[8]:
 | |
| </pre></div>
 | |
| </div>
 | |
| <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">df</span><span class="o">.</span><span class="n">head</span><span class="p">(</span><span class="mi">10</span><span class="p">)</span>
 | |
| </pre></div>
 | |
| </div>
 | |
| </div>
 | |
| <div class="nboutput nblast docutils container">
 | |
| <div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[8]:
 | |
| </pre></div>
 | |
| </div>
 | |
| <div class="output_area rendered_html docutils container">
 | |
| <div>
 | |
| <style scoped>
 | |
|     .dataframe tbody tr th:only-of-type {
 | |
|         vertical-align: middle;
 | |
|     }
 | |
| 
 | |
|     .dataframe tbody tr th {
 | |
|         vertical-align: top;
 | |
|     }
 | |
| 
 | |
|     .dataframe thead th {
 | |
|         text-align: right;
 | |
|     }
 | |
| </style>
 | |
| <table border="1" class="dataframe">
 | |
|   <thead>
 | |
|     <tr style="text-align: right;">
 | |
|       <th></th>
 | |
|       <th>filename</th>
 | |
|     </tr>
 | |
|   </thead>
 | |
|   <tbody>
 | |
|     <tr>
 | |
|       <th>0</th>
 | |
|       <td>data/106349S_por.png</td>
 | |
|     </tr>
 | |
|     <tr>
 | |
|       <th>1</th>
 | |
|       <td>data/102141_2_eng.png</td>
 | |
|     </tr>
 | |
|     <tr>
 | |
|       <th>2</th>
 | |
|       <td>data/102730_eng.png</td>
 | |
|     </tr>
 | |
|   </tbody>
 | |
| </table>
 | |
| </div></div>
 | |
| </div>
 | |
| <p>Write the csv file:</p>
 | |
| <div class="nbinput nblast docutils container">
 | |
| <div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[9]:
 | |
| </pre></div>
 | |
| </div>
 | |
| <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">df</span><span class="o">.</span><span class="n">to_csv</span><span class="p">(</span><span class="s2">"data_out.csv"</span><span class="p">)</span>
 | |
| </pre></div>
 | |
| </div>
 | |
| </div>
 | |
| </section>
 | |
| <section id="Manually-inspect-the-summaries">
 | |
| <h2>Manually inspect the summaries<a class="headerlink" href="#Manually-inspect-the-summaries" title="Permalink to this heading"></a></h2>
 | |
| <p>To check the analysis, you can inspect the analyzed elements here. Loading the results takes a moment, so please be patient. If you are sure of what you are doing.</p>
 | |
| <p><code class="docutils literal notranslate"><span class="pre">const_image_summary</span></code> - the permanent summarys, which does not change from run to run (analyse_image).</p>
 | |
| <p><code class="docutils literal notranslate"><span class="pre">3_non-deterministic</span> <span class="pre">summary</span></code> - 3 different summarys examples that change from run to run (analyse_image).</p>
 | |
| <div class="nbinput docutils container">
 | |
| <div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[10]:
 | |
| </pre></div>
 | |
| </div>
 | |
| <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">analysis_explorer</span> <span class="o">=</span> <span class="n">mdisplay</span><span class="o">.</span><span class="n">AnalysisExplorer</span><span class="p">(</span><span class="n">mydict</span><span class="p">,</span> <span class="n">identify</span><span class="o">=</span><span class="s2">"summary"</span><span class="p">)</span>
 | |
| <span class="n">analysis_explorer</span><span class="o">.</span><span class="n">run_server</span><span class="p">(</span><span class="n">port</span><span class="o">=</span><span class="mi">8055</span><span class="p">)</span>
 | |
| </pre></div>
 | |
| </div>
 | |
| </div>
 | |
| <div class="nboutput nblast docutils container">
 | |
| <div class="prompt empty docutils container">
 | |
| </div>
 | |
| <div class="output_area docutils container">
 | |
| <div class="highlight"><pre>
 | |
| <span class="ansi-red-fg">---------------------------------------------------------------------------</span>
 | |
| <span class="ansi-red-fg">TypeError</span>                                 Traceback (most recent call last)
 | |
| Cell <span class="ansi-green-fg">In[10], line 1</span>
 | |
| <span class="ansi-green-fg">----> 1</span> analysis_explorer <span style="color: rgb(98,98,98)">=</span> <span class="ansi-yellow-bg">mdisplay</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">.</span><span class="ansi-yellow-bg">AnalysisExplorer</span><span class="ansi-yellow-bg">(</span><span class="ansi-yellow-bg">mydict</span><span class="ansi-yellow-bg">,</span><span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">identify</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">=</span><span class="ansi-yellow-bg" style="color: rgb(175,0,0)">"</span><span class="ansi-yellow-bg" style="color: rgb(175,0,0)">summary</span><span class="ansi-yellow-bg" style="color: rgb(175,0,0)">"</span><span class="ansi-yellow-bg">)</span>
 | |
| <span class="ansi-green-intense-fg ansi-bold">      2</span> analysis_explorer<span style="color: rgb(98,98,98)">.</span>run_server(port<span style="color: rgb(98,98,98)">=</span><span style="color: rgb(98,98,98)">8055</span>)
 | |
| 
 | |
| <span class="ansi-red-fg">TypeError</span>: __init__() got an unexpected keyword argument 'identify'
 | |
| </pre></div></div>
 | |
| </div>
 | |
| </section>
 | |
| <section id="Generate-answers-to-free-form-questions-about-images-written-in-natural-language.">
 | |
| <h2>Generate answers to free-form questions about images written in natural language.<a class="headerlink" href="#Generate-answers-to-free-form-questions-about-images-written-in-natural-language." title="Permalink to this heading"></a></h2>
 | |
| <p>Set the list of questions as a list of strings:</p>
 | |
| <div class="nbinput nblast docutils container">
 | |
| <div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[11]:
 | |
| </pre></div>
 | |
| </div>
 | |
| <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">list_of_questions</span> <span class="o">=</span> <span class="p">[</span>
 | |
|     <span class="s2">"How many persons on the picture?"</span><span class="p">,</span>
 | |
|     <span class="s2">"Are there any politicians in the picture?"</span><span class="p">,</span>
 | |
|     <span class="s2">"Does the picture show something from medicine?"</span><span class="p">,</span>
 | |
| <span class="p">]</span>
 | |
| </pre></div>
 | |
| </div>
 | |
| </div>
 | |
| <p>Explore the analysis using the interface:</p>
 | |
| <div class="nbinput docutils container">
 | |
| <div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[12]:
 | |
| </pre></div>
 | |
| </div>
 | |
| <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">analysis_explorer</span> <span class="o">=</span> <span class="n">mdisplay</span><span class="o">.</span><span class="n">AnalysisExplorer</span><span class="p">(</span><span class="n">mydict</span><span class="p">,</span> <span class="n">identify</span><span class="o">=</span><span class="s2">"summary"</span><span class="p">)</span>
 | |
| <span class="n">analysis_explorer</span><span class="o">.</span><span class="n">run_server</span><span class="p">(</span><span class="n">port</span><span class="o">=</span><span class="mi">8055</span><span class="p">)</span>
 | |
| </pre></div>
 | |
| </div>
 | |
| </div>
 | |
| <div class="nboutput nblast docutils container">
 | |
| <div class="prompt empty docutils container">
 | |
| </div>
 | |
| <div class="output_area docutils container">
 | |
| <div class="highlight"><pre>
 | |
| <span class="ansi-red-fg">---------------------------------------------------------------------------</span>
 | |
| <span class="ansi-red-fg">TypeError</span>                                 Traceback (most recent call last)
 | |
| Cell <span class="ansi-green-fg">In[12], line 1</span>
 | |
| <span class="ansi-green-fg">----> 1</span> analysis_explorer <span style="color: rgb(98,98,98)">=</span> <span class="ansi-yellow-bg">mdisplay</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">.</span><span class="ansi-yellow-bg">AnalysisExplorer</span><span class="ansi-yellow-bg">(</span><span class="ansi-yellow-bg">mydict</span><span class="ansi-yellow-bg">,</span><span class="ansi-yellow-bg"> </span><span class="ansi-yellow-bg">identify</span><span class="ansi-yellow-bg" style="color: rgb(98,98,98)">=</span><span class="ansi-yellow-bg" style="color: rgb(175,0,0)">"</span><span class="ansi-yellow-bg" style="color: rgb(175,0,0)">summary</span><span class="ansi-yellow-bg" style="color: rgb(175,0,0)">"</span><span class="ansi-yellow-bg">)</span>
 | |
| <span class="ansi-green-intense-fg ansi-bold">      2</span> analysis_explorer<span style="color: rgb(98,98,98)">.</span>run_server(port<span style="color: rgb(98,98,98)">=</span><span style="color: rgb(98,98,98)">8055</span>)
 | |
| 
 | |
| <span class="ansi-red-fg">TypeError</span>: __init__() got an unexpected keyword argument 'identify'
 | |
| </pre></div></div>
 | |
| </div>
 | |
| </section>
 | |
| <section id="Or-directly-analyze-for-further-processing">
 | |
| <h2>Or directly analyze for further processing<a class="headerlink" href="#Or-directly-analyze-for-further-processing" title="Permalink to this heading"></a></h2>
 | |
| <p>Instead of inspecting each of the images, you can also directly carry out the analysis and export the result into a csv. This may take a while depending on how many images you have loaded.</p>
 | |
| <div class="nbinput nblast docutils container">
 | |
| <div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[13]:
 | |
| </pre></div>
 | |
| </div>
 | |
| <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="k">for</span> <span class="n">key</span> <span class="ow">in</span> <span class="n">mydict</span><span class="p">:</span>
 | |
|     <span class="n">mydict</span><span class="p">[</span><span class="n">key</span><span class="p">]</span> <span class="o">=</span> <span class="n">sm</span><span class="o">.</span><span class="n">SummaryDetector</span><span class="p">(</span><span class="n">mydict</span><span class="p">[</span><span class="n">key</span><span class="p">])</span><span class="o">.</span><span class="n">analyse_questions</span><span class="p">(</span><span class="n">list_of_questions</span><span class="p">)</span>
 | |
| </pre></div>
 | |
| </div>
 | |
| </div>
 | |
| </section>
 | |
| <section id="Convert-to-dataframe-and-write-csv">
 | |
| <h2>Convert to dataframe and write csv<a class="headerlink" href="#Convert-to-dataframe-and-write-csv" title="Permalink to this heading"></a></h2>
 | |
| <p>These steps are required to convert the dictionary of dictionarys into a dictionary with lists, that can be converted into a pandas dataframe and exported to a csv file.</p>
 | |
| <div class="nbinput nblast docutils container">
 | |
| <div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[14]:
 | |
| </pre></div>
 | |
| </div>
 | |
| <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">outdict2</span> <span class="o">=</span> <span class="n">mutils</span><span class="o">.</span><span class="n">append_data_to_dict</span><span class="p">(</span><span class="n">mydict</span><span class="p">)</span>
 | |
| <span class="n">df2</span> <span class="o">=</span> <span class="n">mutils</span><span class="o">.</span><span class="n">dump_df</span><span class="p">(</span><span class="n">outdict2</span><span class="p">)</span>
 | |
| </pre></div>
 | |
| </div>
 | |
| </div>
 | |
| <div class="nbinput docutils container">
 | |
| <div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[15]:
 | |
| </pre></div>
 | |
| </div>
 | |
| <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">df2</span><span class="o">.</span><span class="n">head</span><span class="p">(</span><span class="mi">10</span><span class="p">)</span>
 | |
| </pre></div>
 | |
| </div>
 | |
| </div>
 | |
| <div class="nboutput nblast docutils container">
 | |
| <div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[15]:
 | |
| </pre></div>
 | |
| </div>
 | |
| <div class="output_area rendered_html docutils container">
 | |
| <div>
 | |
| <style scoped>
 | |
|     .dataframe tbody tr th:only-of-type {
 | |
|         vertical-align: middle;
 | |
|     }
 | |
| 
 | |
|     .dataframe tbody tr th {
 | |
|         vertical-align: top;
 | |
|     }
 | |
| 
 | |
|     .dataframe thead th {
 | |
|         text-align: right;
 | |
|     }
 | |
| </style>
 | |
| <table border="1" class="dataframe">
 | |
|   <thead>
 | |
|     <tr style="text-align: right;">
 | |
|       <th></th>
 | |
|       <th>filename</th>
 | |
|       <th>How many persons on the picture?</th>
 | |
|       <th>Are there any politicians in the picture?</th>
 | |
|       <th>Does the picture show something from medicine?</th>
 | |
|     </tr>
 | |
|   </thead>
 | |
|   <tbody>
 | |
|     <tr>
 | |
|       <th>0</th>
 | |
|       <td>data/106349S_por.png</td>
 | |
|       <td>1</td>
 | |
|       <td>yes</td>
 | |
|       <td>yes</td>
 | |
|     </tr>
 | |
|     <tr>
 | |
|       <th>1</th>
 | |
|       <td>data/102141_2_eng.png</td>
 | |
|       <td>1</td>
 | |
|       <td>no</td>
 | |
|       <td>yes</td>
 | |
|     </tr>
 | |
|     <tr>
 | |
|       <th>2</th>
 | |
|       <td>data/102730_eng.png</td>
 | |
|       <td>2</td>
 | |
|       <td>no</td>
 | |
|       <td>yes</td>
 | |
|     </tr>
 | |
|   </tbody>
 | |
| </table>
 | |
| </div></div>
 | |
| </div>
 | |
| <div class="nbinput nblast docutils container">
 | |
| <div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[16]:
 | |
| </pre></div>
 | |
| </div>
 | |
| <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">df2</span><span class="o">.</span><span class="n">to_csv</span><span class="p">(</span><span class="s2">"data_out2.csv"</span><span class="p">)</span>
 | |
| </pre></div>
 | |
| </div>
 | |
| </div>
 | |
| <div class="nbinput nblast docutils container">
 | |
| <div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
 | |
| </pre></div>
 | |
| </div>
 | |
| <div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span>
 | |
| </pre></div>
 | |
| </div>
 | |
| </div>
 | |
| </section>
 | |
| </section>
 | |
| 
 | |
| 
 | |
|            </div>
 | |
|           </div>
 | |
|           <footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
 | |
|         <a href="Example%20text.html" class="btn btn-neutral float-left" title="Notebook for text extraction on image" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
 | |
|         <a href="Example%20multimodal.html" class="btn btn-neutral float-right" title="Image Multimodal Search" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
 | |
|     </div>
 | |
| 
 | |
|   <hr/>
 | |
| 
 | |
|   <div role="contentinfo">
 | |
|     <p>© Copyright 2022, Scientific Software Center, Heidelberg University.</p>
 | |
|   </div>
 | |
| 
 | |
|   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
 | |
|     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
 | |
|     provided by <a href="https://readthedocs.org">Read the Docs</a>.
 | |
|    
 | |
| 
 | |
| </footer>
 | |
|         </div>
 | |
|       </div>
 | |
|     </section>
 | |
|   </div>
 | |
|   <script>
 | |
|       jQuery(function () {
 | |
|           SphinxRtdTheme.Navigation.enable(true);
 | |
|       });
 | |
|   </script> 
 | |
| 
 | |
| </body>
 | |
| </html> | 
