зеркало из
https://github.com/ssciwr/AMMICO.git
synced 2025-10-30 05:26:05 +02:00
1373 строки
91 KiB
HTML
1373 строки
91 KiB
HTML
|
||
|
||
<!DOCTYPE html>
|
||
<html class="writer-html5" lang="en" data-content_root="../">
|
||
<head>
|
||
<meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
|
||
|
||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||
<title>AMMICO Demonstration Notebook — AMMICO 0.2.2 documentation</title>
|
||
<link rel="stylesheet" type="text/css" href="../_static/pygments.css?v=b86133f3" />
|
||
<link rel="stylesheet" type="text/css" href="../_static/css/theme.css?v=e59714d7" />
|
||
<link rel="stylesheet" type="text/css" href="../_static/nbsphinx-code-cells.css?v=2aa19091" />
|
||
|
||
|
||
<script src="../_static/jquery.js?v=5d32c60e"></script>
|
||
<script src="../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
|
||
<script src="../_static/documentation_options.js?v=000c92bf"></script>
|
||
<script src="../_static/doctools.js?v=9bcbadda"></script>
|
||
<script src="../_static/sphinx_highlight.js?v=dc90522c"></script>
|
||
<script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
|
||
<script src="../_static/js/theme.js"></script>
|
||
<link rel="index" title="Index" href="../genindex.html" />
|
||
<link rel="search" title="Search" href="../search.html" />
|
||
<link rel="next" title="AMMICO package modules" href="../modules.html" />
|
||
<link rel="prev" title="Instructions how to generate and enable a google Cloud Vision API key" href="../create_API_key_link.html" />
|
||
</head>
|
||
|
||
<body class="wy-body-for-nav">
|
||
<div class="wy-grid-for-nav">
|
||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
||
<div class="wy-side-scroll">
|
||
<div class="wy-side-nav-search" >
|
||
|
||
|
||
|
||
<a href="../index.html" class="icon icon-home">
|
||
AMMICO
|
||
</a>
|
||
<div role="search">
|
||
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
|
||
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
|
||
<input type="hidden" name="check_keywords" value="yes" />
|
||
<input type="hidden" name="area" value="default" />
|
||
</form>
|
||
</div>
|
||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
||
<p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
|
||
<ul class="current">
|
||
<li class="toctree-l1"><a class="reference internal" href="../readme_link.html">AMMICO - AI-based Media and Misinformation Content Analysis Tool</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../faq_link.html">FAQ</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../create_API_key_link.html">Instructions how to generate and enable a google Cloud Vision API key</a></li>
|
||
<li class="toctree-l1 current"><a class="current reference internal" href="#">AMMICO Demonstration Notebook</a><ul>
|
||
<li class="toctree-l2"><a class="reference internal" href="#Use-a-test-dataset">Use a test dataset</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="#Import-the-ammico-package">Import the ammico package</a></li>
|
||
</ul>
|
||
</li>
|
||
<li class="toctree-l1"><a class="reference internal" href="#Step-0:-Create-and-set-a-Google-Cloud-Vision-Key">Step 0: Create and set a Google Cloud Vision Key</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="#Step-1:-Read-your-data-into-AMMICO">Step 1: Read your data into AMMICO</a><ul>
|
||
<li class="toctree-l2"><a class="reference internal" href="#Step-2:-Inspect-the-input-files-using-the-graphical-user-interface">Step 2: Inspect the input files using the graphical user interface</a><ul>
|
||
<li class="toctree-l3"><a class="reference internal" href="#Ethical-disclosure-statement">Ethical disclosure statement</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="#Privacy-disclosure-statement">Privacy disclosure statement</a></li>
|
||
</ul>
|
||
</li>
|
||
<li class="toctree-l2"><a class="reference internal" href="#Step-3:-Analyze-all-images">Step 3: Analyze all images</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="#Step-4:-Convert-analysis-output-to-pandas-dataframe-and-write-csv">Step 4: Convert analysis output to pandas dataframe and write csv</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="#Read-in-a-csv-file-containing-text-and-translating/analysing-the-text">Read in a csv file containing text and translating/analysing the text</a></li>
|
||
</ul>
|
||
</li>
|
||
<li class="toctree-l1"><a class="reference internal" href="#The-detector-modules">The detector modules</a><ul>
|
||
<li class="toctree-l2"><a class="reference internal" href="#Image-summary-and-query">Image summary and query</a><ul>
|
||
<li class="toctree-l3"><a class="reference internal" href="#BLIP2-models">BLIP2 models</a></li>
|
||
</ul>
|
||
</li>
|
||
<li class="toctree-l2"><a class="reference internal" href="#Detection-of-faces-and-facial-expression-analysis">Detection of faces and facial expression analysis</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="#Image-Multimodal-Search">Image Multimodal Search</a><ul>
|
||
<li class="toctree-l3"><a class="reference internal" href="#Indexing-and-extracting-features-from-images-in-selected-folder">Indexing and extracting features from images in selected folder</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="#Formulate-your-search-queries">Formulate your search queries</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="#Improve-the-search-results">Improve the search results</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="#Save-search-results-to-csv">Save search results to csv</a></li>
|
||
</ul>
|
||
</li>
|
||
<li class="toctree-l2"><a class="reference internal" href="#Color-analysis-of-pictures">Color analysis of pictures</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="#Further-detector-modules">Further detector modules</a></li>
|
||
</ul>
|
||
</li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../modules.html">AMMICO package modules</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../license_link.html">License</a></li>
|
||
</ul>
|
||
|
||
</div>
|
||
</div>
|
||
</nav>
|
||
|
||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||
<a href="../index.html">AMMICO</a>
|
||
</nav>
|
||
|
||
<div class="wy-nav-content">
|
||
<div class="rst-content">
|
||
<div role="navigation" aria-label="Page navigation">
|
||
<ul class="wy-breadcrumbs">
|
||
<li><a href="../index.html" class="icon icon-home" aria-label="Home"></a></li>
|
||
<li class="breadcrumb-item active">AMMICO Demonstration Notebook</li>
|
||
<li class="wy-breadcrumbs-aside">
|
||
<a href="https://github.com/ssciwr/AMMICO/blob/main/docs/source/notebooks/DemoNotebook_ammico.ipynb" class="fa fa-github"> Edit on GitHub</a>
|
||
</li>
|
||
</ul>
|
||
<hr/>
|
||
</div>
|
||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
||
<div itemprop="articleBody">
|
||
|
||
<section id="AMMICO-Demonstration-Notebook">
|
||
<h1>AMMICO Demonstration Notebook<a class="headerlink" href="#AMMICO-Demonstration-Notebook" title="Link to this heading"></a></h1>
|
||
<p>With ammico, you can analyze text on images and image content at the same time. This is a demonstration notebook to showcase the capabilities of ammico. You can run this notebook on google colab or locally / on your own HPC resource. The analysis can be quite slow on the google colab default runtime. For production data processing, it is recommended to run the analysis locally on a GPU-supported machine. You can also make use of the colab GPU runtime, or purchase additional runtime. However,
|
||
google colab comes with pre-installed libraries that can lead to dependency conflicts. The setting on google colab changes frequently, so it is only ensured that this demonstration notebook runs on the default runtime.</p>
|
||
<div class="line-block">
|
||
<div class="line">This first cell only runs on google colab; on all other machines, you need to create a conda environment first and install ammico from the Python Package Index using</div>
|
||
<div class="line"><code class="docutils literal notranslate"><span class="pre">pip</span> <span class="pre">install</span> <span class="pre">ammico</span></code></div>
|
||
<div class="line">Alternatively you can install the development version from the GitHub repository</div>
|
||
<div class="line"><code class="docutils literal notranslate"><span class="pre">pip</span> <span class="pre">install</span> <span class="pre">git+https://github.com/ssciwr/AMMICO.git</span></code></div>
|
||
</div>
|
||
<div class="nbinput nblast docutils container">
|
||
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
|
||
</pre></div>
|
||
</div>
|
||
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span># if running on google colab\
|
||
# PLEASE RUN THIS ONLY AS CPU RUNTIME
|
||
# for a GPU runtime, there are conflicts with pre-installed packages -
|
||
# you first need to uninstall them (prepare a clean environment with no pre-installs) and then install ammico
|
||
# flake8-noqa-cell
|
||
|
||
if "google.colab" in str(get_ipython()):
|
||
# update python version
|
||
# install setuptools
|
||
# %pip install setuptools==61 -qqq
|
||
# uninstall some pre-installed packages due to incompatibility
|
||
%pip uninstall --yes tensorflow-probability dopamine-rl lida pandas-gbq torchaudio torchdata torchtext orbax-checkpoint flex-y jax jaxlib -qqq
|
||
# install ammico
|
||
%pip install git+https://github.com/ssciwr/ammico.git -qqq
|
||
# install older version of jax to support transformers use of diffusers
|
||
# mount google drive for data and API key
|
||
from google.colab import drive
|
||
|
||
drive.mount("/content/drive")
|
||
</pre></div>
|
||
</div>
|
||
</div>
|
||
<section id="Use-a-test-dataset">
|
||
<h2>Use a test dataset<a class="headerlink" href="#Use-a-test-dataset" title="Link to this heading"></a></h2>
|
||
<p>You can download this dataset for test purposes. Skip this step if you use your own data. If the data set on Hugging Face is gated or private, Hugging Face will ask you for a login token. However, for the default dataset in this notebook you do not need to provide one.</p>
|
||
<div class="nbinput nblast docutils container">
|
||
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
|
||
</pre></div>
|
||
</div>
|
||
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span>from datasets import load_dataset
|
||
from pathlib import Path
|
||
|
||
# If the dataset is gated/private, make sure you have run huggingface-cli login
|
||
dataset = load_dataset("iulusoy/test-images")
|
||
</pre></div>
|
||
</div>
|
||
</div>
|
||
<p>Next you need to provide a path for the saved images - a folder where the data is stored locally. This directory is automatically created if it does not exist.</p>
|
||
<div class="nbinput nblast docutils container">
|
||
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
|
||
</pre></div>
|
||
</div>
|
||
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span>data_path = "./data-test"
|
||
data_path = Path(data_path)
|
||
data_path.mkdir(parents=True, exist_ok=True)
|
||
# now save the files from the Huggingface dataset as images into the data_path folder
|
||
for i, image in enumerate(dataset["train"]["image"]):
|
||
filename = "img" + str(i) + ".png"
|
||
image.save(data_path / filename)
|
||
</pre></div>
|
||
</div>
|
||
</div>
|
||
</section>
|
||
<section id="Import-the-ammico-package">
|
||
<h2>Import the ammico package<a class="headerlink" href="#Import-the-ammico-package" title="Link to this heading"></a></h2>
|
||
<div class="nbinput nblast docutils container">
|
||
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
|
||
</pre></div>
|
||
</div>
|
||
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span># jax also sometimes leads to problems on google colab
|
||
# if this is the case, try restarting the kernel and executing this
|
||
# and the above two code cells again
|
||
import ammico
|
||
|
||
# for displaying a progress bar
|
||
from tqdm import tqdm
|
||
</pre></div>
|
||
</div>
|
||
</div>
|
||
<p>Sometimes you may need to restart a session after installing the correct versions of packages, because <code class="docutils literal notranslate"><span class="pre">Tensorflow</span></code> and <code class="docutils literal notranslate"><span class="pre">EmotitionDetector</span></code> may not work and give an error. You can check it by running this code:</p>
|
||
<div class="highlight-none notranslate"><div class="highlight"><pre><span></span>import tensorflow as tf
|
||
tf.ones([2, 2])
|
||
</pre></div>
|
||
</div>
|
||
<p>If this code generates an error, you need to restart session. For this please click <code class="docutils literal notranslate"><span class="pre">Runtime</span></code> -> <code class="docutils literal notranslate"><span class="pre">Restart</span> <span class="pre">session</span></code>. And rerun the notebook again. All required packages will already be installed, so the execution will be very fast.</p>
|
||
</section>
|
||
</section>
|
||
<section id="Step-0:-Create-and-set-a-Google-Cloud-Vision-Key">
|
||
<h1>Step 0: Create and set a Google Cloud Vision Key<a class="headerlink" href="#Step-0:-Create-and-set-a-Google-Cloud-Vision-Key" title="Link to this heading"></a></h1>
|
||
<p>Please note that for the <a class="reference external" href="https://cloud.google.com/vision/docs/setup">Google Cloud Vision API</a> (the TextDetector class) you need to set a key in order to process the images. A key is generated following <a class="reference internal" href="../set_up_credentials.html"><span class="doc">these instructions</span></a>. This key is ideally set as an environment variable using for example</p>
|
||
<div class="highlight-none notranslate"><div class="highlight"><pre><span></span>os.environ[
|
||
"GOOGLE_APPLICATION_CREDENTIALS"
|
||
] = "/content/drive/MyDrive/misinformation-data/misinformation-campaign-981aa55a3b13.json"
|
||
</pre></div>
|
||
</div>
|
||
<p>where you place the key on your Google Drive if running on colab, or place it in a local folder on your machine.</p>
|
||
<div class="nbinput nblast docutils container">
|
||
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
|
||
</pre></div>
|
||
</div>
|
||
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span># os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/content/drive/MyDrive/misinformation-data/misinformation-campaign-981aa55a3b13.json"
|
||
</pre></div>
|
||
</div>
|
||
</div>
|
||
</section>
|
||
<section id="Step-1:-Read-your-data-into-AMMICO">
|
||
<h1>Step 1: Read your data into AMMICO<a class="headerlink" href="#Step-1:-Read-your-data-into-AMMICO" title="Link to this heading"></a></h1>
|
||
<p>The ammico package reads in one or several input files given in a folder for processing. The user can select to read in all image files in a folder, to include subfolders via the <code class="docutils literal notranslate"><span class="pre">recursive</span></code> option, and can select the file extension that should be considered (for example, only “jpg” files, or both “jpg” and “png” files). For reading in the files, the ammico function <code class="docutils literal notranslate"><span class="pre">find_files</span></code> is used, with optional keywords:</p>
|
||
<table class="docutils align-default">
|
||
<thead>
|
||
<tr class="row-odd"><th class="head"><p>input key</p></th>
|
||
<th class="head"><p>input type</p></th>
|
||
<th class="head"><p>possible input values</p></th>
|
||
</tr>
|
||
</thead>
|
||
<tbody>
|
||
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">path</span></code></p></td>
|
||
<td><p><code class="docutils literal notranslate"><span class="pre">str</span></code></p></td>
|
||
<td><p>the directory containing the image files (defaults to the location set by environment variable <code class="docutils literal notranslate"><span class="pre">AMMICO_DATA_HOME</span></code>)</p></td>
|
||
</tr>
|
||
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">pattern</span></code></p></td>
|
||
<td><p><code class="docutils literal notranslate"><span class="pre">str\|list</span></code></p></td>
|
||
<td><p>the file extensions to consider (defaults to “png”, “jpg”, “jpeg”, “gif”, “webp”, “avif”, “tiff”)</p></td>
|
||
</tr>
|
||
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">recursive</span></code></p></td>
|
||
<td><p><code class="docutils literal notranslate"><span class="pre">bool</span></code></p></td>
|
||
<td><p>include subdirectories recursively (defaults to <code class="docutils literal notranslate"><span class="pre">True</span></code>)</p></td>
|
||
</tr>
|
||
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">limit</span></code></p></td>
|
||
<td><p><code class="docutils literal notranslate"><span class="pre">int</span></code></p></td>
|
||
<td><p>maximum number of files to read (defaults to <code class="docutils literal notranslate"><span class="pre">20</span></code>, for all images set to <code class="docutils literal notranslate"><span class="pre">None</span></code> or <code class="docutils literal notranslate"><span class="pre">-1</span></code>)</p></td>
|
||
</tr>
|
||
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">random_seed</span></code></p></td>
|
||
<td><p><code class="docutils literal notranslate"><span class="pre">str</span></code></p></td>
|
||
<td><p>the random seed for shuffling the images; applies when only a few images are read and the selection should be preserved (defaults to <code class="docutils literal notranslate"><span class="pre">None</span></code>)</p></td>
|
||
</tr>
|
||
</tbody>
|
||
</table>
|
||
<p>The <code class="docutils literal notranslate"><span class="pre">find_files</span></code> function returns a nested dictionary that contains the file ids and the paths to the files and is empty otherwise. This dictionary is filled step by step with more data as each detector class is run on the data (see below).</p>
|
||
<p>If you downloaded the test dataset above, you can directly provide the path you already set for the test directory, <code class="docutils literal notranslate"><span class="pre">data_path</span></code>. The below cell is already set up for the test dataset.</p>
|
||
<p>If you use your own dataset, you need to toggle the path and provide the directory where you have saved your data.</p>
|
||
<div class="nbinput nblast docutils container">
|
||
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
|
||
</pre></div>
|
||
</div>
|
||
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span>image_dict = ammico.find_files(
|
||
# path="/content/drive/MyDrive/misinformation-data/",
|
||
path=str(data_path),
|
||
limit=15,
|
||
)
|
||
</pre></div>
|
||
</div>
|
||
</div>
|
||
<section id="Step-2:-Inspect-the-input-files-using-the-graphical-user-interface">
|
||
<h2>Step 2: Inspect the input files using the graphical user interface<a class="headerlink" href="#Step-2:-Inspect-the-input-files-using-the-graphical-user-interface" title="Link to this heading"></a></h2>
|
||
<p>A Dash user interface is to select the most suitable options for the analysis, before running a complete analysis on the whole data set. The options for each detector module are explained below in the corresponding sections; for example, different models can be selected that will provide slightly different results. This way, the user can interactively explore which settings provide the most accurate results. In the interface, the nested <code class="docutils literal notranslate"><span class="pre">image_dict</span></code> is passed through the <code class="docutils literal notranslate"><span class="pre">AnalysisExplorer</span></code>
|
||
class. The interface is run on a specific port which is passed using the <code class="docutils literal notranslate"><span class="pre">port</span></code> keyword; if a port is already in use, it will return an error message, in which case the user should select a different port number. The interface opens a dash app inside the Jupyter Notebook and allows selection of the input file in the top left dropdown menu, as well as selection of the detector type in the top right, with options for each detector type as explained below. The output of the detector is shown
|
||
directly on the right next to the image. This way, the user can directly inspect how updating the options for each detector changes the computed results, and find the best settings for a production run.</p>
|
||
<section id="Ethical-disclosure-statement">
|
||
<h3>Ethical disclosure statement<a class="headerlink" href="#Ethical-disclosure-statement" title="Link to this heading"></a></h3>
|
||
<p>If you want to run an analysis using the EmotionDetector detector type, you have first have to respond to an ethical disclosure statement. This disclosure statement ensures that you only use the full capabilities of the EmotionDetector after you have been made aware of its shortcomings.</p>
|
||
<p>For this, answer “yes” or “no” to the below prompt. This will set an environment variable with the name given as in <code class="docutils literal notranslate"><span class="pre">accept_disclosure</span></code>. To re-run the disclosure prompt, unset the variable by uncommenting the line <code class="docutils literal notranslate"><span class="pre">os.environ.pop(accept_disclosure,</span> <span class="pre">None)</span></code>. To permanently set this environment variable, add it to your shell via your <code class="docutils literal notranslate"><span class="pre">.profile</span></code> or <code class="docutils literal notranslate"><span class="pre">.bashr</span></code> file.</p>
|
||
<p>If the disclosure statement is accepted, the EmotionDetector will perform age, gender and race/ethnicity classification depending on the provided thresholds. If the disclosure is rejected, only the presence of faces and emotion (if not wearing a mask) is detected.</p>
|
||
<div class="nbinput nblast docutils container">
|
||
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
|
||
</pre></div>
|
||
</div>
|
||
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span># respond to the disclosure statement
|
||
# this will set an environment variable for you
|
||
# if you do not want to re-accept the disclosure every time, you can set this environment variable in your shell
|
||
# to re-set the environment variable, uncomment the below line
|
||
accept_disclosure = "DISCLOSURE_AMMICO"
|
||
# os.environ.pop(accept_disclosure, None)
|
||
_ = ammico.ethical_disclosure(accept_disclosure=accept_disclosure)
|
||
</pre></div>
|
||
</div>
|
||
</div>
|
||
</section>
|
||
<section id="Privacy-disclosure-statement">
|
||
<h3>Privacy disclosure statement<a class="headerlink" href="#Privacy-disclosure-statement" title="Link to this heading"></a></h3>
|
||
<p>If you want to run an analysis using the TextDetector detector type, you have first have to respond to a privacy disclosure statement. This disclosure statement ensures that you are aware that your data will be sent to google cloud vision servers for analysis.</p>
|
||
<p>For this, answer “yes” or “no” to the below prompt. This will set an environment variable with the name given as in <code class="docutils literal notranslate"><span class="pre">accept_privacy</span></code>. To re-run the disclosure prompt, unset the variable by uncommenting the line <code class="docutils literal notranslate"><span class="pre">os.environ.pop(accept_privacy,</span> <span class="pre">None)</span></code>. To permanently set this environment variable, add it to your shell via your <code class="docutils literal notranslate"><span class="pre">.profile</span></code> or <code class="docutils literal notranslate"><span class="pre">.bashr</span></code> file.</p>
|
||
<p>If the privacy disclosure statement is accepted, the TextDetector will perform the text extraction, translation and if selected, analysis. If the privacy disclosure is rejected, no text processing will be carried out and you cannot use the TextDetector.</p>
|
||
<div class="nbinput nblast docutils container">
|
||
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
|
||
</pre></div>
|
||
</div>
|
||
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span># respond to the privacy disclosure statement
|
||
# this will set an environment variable for you
|
||
# if you do not want to re-accept the privacy disclosure every time, you can set this environment variable in your shell
|
||
# to re-set the environment variable, uncomment the below line
|
||
accept_privacy = "PRIVACY_AMMICO"
|
||
# os.environ.pop(accept_privacy, None)
|
||
_ = ammico.privacy_disclosure(accept_privacy=accept_privacy)
|
||
</pre></div>
|
||
</div>
|
||
</div>
|
||
<div class="nbinput nblast docutils container">
|
||
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
|
||
</pre></div>
|
||
</div>
|
||
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span>analysis_explorer = ammico.AnalysisExplorer(image_dict)
|
||
analysis_explorer.run_server(port=8055)
|
||
</pre></div>
|
||
</div>
|
||
</div>
|
||
</section>
|
||
</section>
|
||
<section id="Step-3:-Analyze-all-images">
|
||
<h2>Step 3: Analyze all images<a class="headerlink" href="#Step-3:-Analyze-all-images" title="Link to this heading"></a></h2>
|
||
<p>The analysis can be run in production on all images in the data set. Depending on the size of the data set and the computing resources available, this can take some time.</p>
|
||
<p>It is also possible to set the dump file creation <code class="docutils literal notranslate"><span class="pre">dump_file</span></code> in order to save the calculated data every <code class="docutils literal notranslate"><span class="pre">dump_every</span></code> images.</p>
|
||
<div class="nbinput nblast docutils container">
|
||
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
|
||
</pre></div>
|
||
</div>
|
||
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span># dump file name
|
||
dump_file = "dump_file.csv"
|
||
# dump every N images
|
||
dump_every = 10
|
||
</pre></div>
|
||
</div>
|
||
</div>
|
||
<p>The desired detector modules are called sequentially in any order, for example the <code class="docutils literal notranslate"><span class="pre">EmotionDetector</span></code>:</p>
|
||
<div class="nbinput nblast docutils container">
|
||
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
|
||
</pre></div>
|
||
</div>
|
||
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span># set the thresholds for the emotion detection
|
||
emotion_threshold = 50 # this is the default value for the detection confidence
|
||
# the lowest possible value is 0
|
||
# the highest possible value is 100
|
||
race_threshold = 50
|
||
gender_threshold = 50
|
||
for num, key in tqdm(
|
||
enumerate(image_dict.keys()), total=len(image_dict)
|
||
): # loop through all images
|
||
image_dict[key] = ammico.EmotionDetector(
|
||
image_dict[key],
|
||
emotion_threshold=emotion_threshold,
|
||
race_threshold=race_threshold,
|
||
gender_threshold=gender_threshold,
|
||
).analyse_image() # analyse image with EmotionDetector and update dict
|
||
if (
|
||
num % dump_every == 0 or num == len(image_dict) - 1
|
||
): # save results every dump_every to dump_file
|
||
image_df = ammico.get_dataframe(image_dict)
|
||
image_df.to_csv(dump_file)
|
||
</pre></div>
|
||
</div>
|
||
</div>
|
||
<p><code class="docutils literal notranslate"><span class="pre">TextDetector</span></code>:</p>
|
||
<div class="nbinput nblast docutils container">
|
||
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
|
||
</pre></div>
|
||
</div>
|
||
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span>for num, key in tqdm(
|
||
enumerate(image_dict.keys()), total=len(image_dict)
|
||
): # loop through all images
|
||
image_dict[key] = ammico.TextDetector(
|
||
image_dict[key], analyse_text=True
|
||
).analyse_image() # analyse image with EmotionDetector and update dict
|
||
|
||
if (
|
||
num % dump_every == 0 | num == len(image_dict) - 1
|
||
): # save results every dump_every to dump_file
|
||
image_df = ammico.get_dataframe(image_dict)
|
||
image_df.to_csv(dump_file)
|
||
</pre></div>
|
||
</div>
|
||
</div>
|
||
<p>For the computationally demanding <code class="docutils literal notranslate"><span class="pre">SummaryDetector</span></code>, it is best to initialize the model first and then analyze each image while passing the model explicitly. This can be done in a separate loop or in the same loop as for text and emotion detection.</p>
|
||
<div class="nbinput nblast docutils container">
|
||
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
|
||
</pre></div>
|
||
</div>
|
||
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span># initialize the models
|
||
image_summary_detector = ammico.SummaryDetector(
|
||
subdict=image_dict, analysis_type="summary", model_type="base"
|
||
)
|
||
|
||
# run the analysis without having to re-iniatialize the model
|
||
for num, key in tqdm(
|
||
enumerate(image_dict.keys()), total=len(image_dict)
|
||
): # loop through all images
|
||
image_dict[key] = image_summary_detector.analyse_image(
|
||
subdict=image_dict[key], analysis_type="summary"
|
||
) # analyse image with SummaryDetector and update dict
|
||
|
||
if (
|
||
num % dump_every == 0 | num == len(image_dict) - 1
|
||
): # save results every dump_every to dump_file
|
||
image_df = ammico.get_dataframe(image_dict)
|
||
image_df.to_csv(dump_file)
|
||
</pre></div>
|
||
</div>
|
||
</div>
|
||
<p>Or you can run all Detectors in one loop as for example:</p>
|
||
<div class="nbinput nblast docutils container">
|
||
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
|
||
</pre></div>
|
||
</div>
|
||
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span># initialize the models
|
||
image_summary_detector = ammico.SummaryDetector(
|
||
subdict=image_dict, analysis_type="summary", model_type="base"
|
||
)
|
||
|
||
for num, key in tqdm(
|
||
enumerate(image_dict.keys()), total=len(image_dict)
|
||
): # loop through all images
|
||
image_dict[key] = ammico.EmotionDetector(
|
||
image_dict[key]
|
||
).analyse_image() # analyse image with EmotionDetector and update dict
|
||
image_dict[key] = ammico.TextDetector(
|
||
image_dict[key], analyse_text=True
|
||
).analyse_image() # analyse image with TextDetector and update dict
|
||
image_dict[key] = image_summary_detector.analyse_image(
|
||
subdict=image_dict[key], analysis_type="summary"
|
||
) # analyse image with SummaryDetector and update dict
|
||
|
||
if (
|
||
num % dump_every == 0 | num == len(image_dict) - 1
|
||
): # save results every dump_every to dump_file
|
||
image_df = ammico.get_dataframe(image_dict)
|
||
image_df.to_csv(dump_file)
|
||
</pre></div>
|
||
</div>
|
||
</div>
|
||
<p>The nested dictionary will be updated from containing only the file id’s and paths to the image files, to containing all calculated image features.</p>
|
||
</section>
|
||
<section id="Step-4:-Convert-analysis-output-to-pandas-dataframe-and-write-csv">
|
||
<h2>Step 4: Convert analysis output to pandas dataframe and write csv<a class="headerlink" href="#Step-4:-Convert-analysis-output-to-pandas-dataframe-and-write-csv" title="Link to this heading"></a></h2>
|
||
<p>The content of the nested dictionary can then conveniently be converted into a pandas dataframe for further analysis in Python, or be written as a csv file:</p>
|
||
<div class="nbinput nblast docutils container">
|
||
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
|
||
</pre></div>
|
||
</div>
|
||
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span>image_df = ammico.get_dataframe(image_dict)
|
||
</pre></div>
|
||
</div>
|
||
</div>
|
||
<p>Inspect the dataframe:</p>
|
||
<div class="nbinput nblast docutils container">
|
||
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
|
||
</pre></div>
|
||
</div>
|
||
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span>image_df.head(3)
|
||
</pre></div>
|
||
</div>
|
||
</div>
|
||
<p>Or write to a csv file:</p>
|
||
<div class="nbinput nblast docutils container">
|
||
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
|
||
</pre></div>
|
||
</div>
|
||
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span>image_df.to_csv("/content/drive/MyDrive/misinformation-data/data_out.csv")
|
||
</pre></div>
|
||
</div>
|
||
</div>
|
||
</section>
|
||
<section id="Read-in-a-csv-file-containing-text-and-translating/analysing-the-text">
|
||
<h2>Read in a csv file containing text and translating/analysing the text<a class="headerlink" href="#Read-in-a-csv-file-containing-text-and-translating/analysing-the-text" title="Link to this heading"></a></h2>
|
||
<p>Instead of extracting text from an image, or to re-process text that was already extracted, it is also possible to provide a <code class="docutils literal notranslate"><span class="pre">csv</span></code> file containing text in its rows. Provide the path and name of the csv file with the keyword <code class="docutils literal notranslate"><span class="pre">csv_path</span></code>. The keyword <code class="docutils literal notranslate"><span class="pre">column_key</span></code> tells the Analyzer which column key in the csv file holds the text that should be analyzed. This defaults to “text”.</p>
|
||
<div class="nbinput nblast docutils container">
|
||
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
|
||
</pre></div>
|
||
</div>
|
||
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span>ta = ammico.TextAnalyzer(csv_path="../data/ref/test.csv", column_key="text")
|
||
</pre></div>
|
||
</div>
|
||
</div>
|
||
<div class="nbinput nblast docutils container">
|
||
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
|
||
</pre></div>
|
||
</div>
|
||
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span># read the csv file
|
||
ta.read_csv()
|
||
# set up the dict containing all text entries
|
||
text_dict = ta.mydict
|
||
</pre></div>
|
||
</div>
|
||
</div>
|
||
<div class="nbinput nblast docutils container">
|
||
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
|
||
</pre></div>
|
||
</div>
|
||
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span># set the dump file
|
||
# dump file name
|
||
dump_file = "dump_file.csv"
|
||
# dump every N images
|
||
dump_every = 10
|
||
</pre></div>
|
||
</div>
|
||
</div>
|
||
<div class="nbinput nblast docutils container">
|
||
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
|
||
</pre></div>
|
||
</div>
|
||
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span># analyze the csv file
|
||
for num, key in tqdm(
|
||
enumerate(text_dict.keys()), total=len(text_dict)
|
||
): # loop through all text entries
|
||
ammico.TextDetector(
|
||
text_dict[key], analyse_text=True, skip_extraction=True
|
||
).analyse_image() # analyse text with TextDetector and update dict
|
||
if (
|
||
num % dump_every == 0 | num == len(text_dict) - 1
|
||
): # save results every dump_every to dump_file
|
||
image_df = ammico.get_dataframe(text_dict)
|
||
image_df.to_csv(dump_file)
|
||
</pre></div>
|
||
</div>
|
||
</div>
|
||
<div class="nbinput nblast docutils container">
|
||
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
|
||
</pre></div>
|
||
</div>
|
||
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span># save the results to a csv file
|
||
text_df = ammico.get_dataframe(text_dict)
|
||
</pre></div>
|
||
</div>
|
||
</div>
|
||
<div class="nbinput nblast docutils container">
|
||
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
|
||
</pre></div>
|
||
</div>
|
||
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span># inspect
|
||
text_df.head(3)
|
||
</pre></div>
|
||
</div>
|
||
</div>
|
||
<div class="nbinput nblast docutils container">
|
||
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
|
||
</pre></div>
|
||
</div>
|
||
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span># write to csv
|
||
text_df.to_csv("data_out.csv")
|
||
</pre></div>
|
||
</div>
|
||
</div>
|
||
</section>
|
||
</section>
|
||
<section id="The-detector-modules">
|
||
<h1>The detector modules<a class="headerlink" href="#The-detector-modules" title="Link to this heading"></a></h1>
|
||
<p>The different detector modules with their options are explained in more detail in this section. ## Text detector Text on the images can be extracted using the <code class="docutils literal notranslate"><span class="pre">TextDetector</span></code> class (<code class="docutils literal notranslate"><span class="pre">text</span></code> module). The text is initally extracted using the Google Cloud Vision API and then translated into English with googletrans. The translated text is cleaned of whitespace, linebreaks, and numbers using Python syntax and spaCy.</p>
|
||
<p><img alt="fa9661e35a864a6989f950e5186bb570" class="no-scaled-link" src="../_images/text_detector.png" style="width: 800px;" /></p>
|
||
<p>The user can set if the text should be further summarized, and analyzed for sentiment and named entity recognition, by setting the keyword <code class="docutils literal notranslate"><span class="pre">analyse_text</span></code> to <code class="docutils literal notranslate"><span class="pre">True</span></code> (the default is <code class="docutils literal notranslate"><span class="pre">False</span></code>). If set, the transformers pipeline is used for each of these tasks, with the default models as of 03/2023. Other models can be selected by setting the optional keyword <code class="docutils literal notranslate"><span class="pre">model_names</span></code> to a list of selected models, on for each task:
|
||
<code class="docutils literal notranslate"><span class="pre">model_names=["sshleifer/distilbart-cnn-12-6",</span> <span class="pre">"distilbert-base-uncased-finetuned-sst-2-english",</span> <span class="pre">"dbmdz/bert-large-cased-finetuned-conll03-english"]</span></code> for summary, sentiment, and ner. To be even more specific, revision numbers can also be selected by specifying the optional keyword <code class="docutils literal notranslate"><span class="pre">revision_numbers</span></code> to a list of revision numbers for each model, for example <code class="docutils literal notranslate"><span class="pre">revision_numbers=["a4f8f3e",</span> <span class="pre">"af0f99b",</span> <span class="pre">"f2482bf"]</span></code>.</p>
|
||
<p>Please note that for the Google Cloud Vision API (the TextDetector class) you need to set a key in order to process the images. This key is ideally set as an environment variable using for example</p>
|
||
<div class="nbinput nblast docutils container">
|
||
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
|
||
</pre></div>
|
||
</div>
|
||
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span># os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/content/drive/MyDrive/misinformation-data/misinformation-campaign-981aa55a3b13.json"
|
||
<br/></pre></div>
|
||
</div>
|
||
</div>
|
||
<p>where you place the key on your Google Drive if running on colab, or place it in a local folder on your machine.</p>
|
||
<p>Summarizing, the text detection is carried out using the following method call and keywords, where <code class="docutils literal notranslate"><span class="pre">analyse_text</span></code>, <code class="docutils literal notranslate"><span class="pre">model_names</span></code>, and <code class="docutils literal notranslate"><span class="pre">revision_numbers</span></code> are optional:</p>
|
||
<div class="nbinput nblast docutils container">
|
||
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
|
||
</pre></div>
|
||
</div>
|
||
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span>for num, key in tqdm(
|
||
enumerate(image_dict.keys()), total=len(image_dict)
|
||
): # loop through all images
|
||
image_dict[key] = ammico.TextDetector(
|
||
image_dict[key], # analyse image with TextDetector and update dict
|
||
analyse_text=True,
|
||
model_names=[
|
||
"sshleifer/distilbart-cnn-12-6",
|
||
"distilbert-base-uncased-finetuned-sst-2-english",
|
||
"dbmdz/bert-large-cased-finetuned-conll03-english",
|
||
],
|
||
revision_numbers=["a4f8f3e", "af0f99b", "f2482bf"],
|
||
).analyse_image()
|
||
|
||
if (
|
||
num % dump_every == 0 | num == len(image_dict) - 1
|
||
): # save results every dump_every to dump_file
|
||
image_df = ammico.get_dataframe(image_dict)
|
||
image_df.to_csv(dump_file)
|
||
</pre></div>
|
||
</div>
|
||
</div>
|
||
<div class="nbinput nblast docutils container">
|
||
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
|
||
</pre></div>
|
||
</div>
|
||
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span># write output to csv
|
||
image_df = ammico.get_dataframe(image_dict)
|
||
image_df.to_csv("/content/drive/MyDrive/misinformation-data/data_out.csv")
|
||
</pre></div>
|
||
</div>
|
||
</div>
|
||
<p>The models can be adapted interactively in the notebook interface and the best models can then be used in a subsequent analysis of the whole data set.</p>
|
||
<p>A detailed description of the output keys and data types is given in the following table.</p>
|
||
<table class="docutils align-default">
|
||
<thead>
|
||
<tr class="row-odd"><th class="head"><p>output key</p></th>
|
||
<th class="head"><p>output type</p></th>
|
||
<th class="head"><p>output value</p></th>
|
||
</tr>
|
||
</thead>
|
||
<tbody>
|
||
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">text</span></code></p></td>
|
||
<td><p><code class="docutils literal notranslate"><span class="pre">str</span></code></p></td>
|
||
<td><p>the extracted text in the original language</p></td>
|
||
</tr>
|
||
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">text_language</span></code></p></td>
|
||
<td><p><code class="docutils literal notranslate"><span class="pre">str</span></code></p></td>
|
||
<td><p>the detected dominant language of the extracted text</p></td>
|
||
</tr>
|
||
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">text_english</span></code></p></td>
|
||
<td><p><code class="docutils literal notranslate"><span class="pre">str</span></code></p></td>
|
||
<td><p>the text translated into English</p></td>
|
||
</tr>
|
||
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">text_clean</span></code></p></td>
|
||
<td><p><code class="docutils literal notranslate"><span class="pre">str</span></code></p></td>
|
||
<td><p>the text after cleaning from numbers and unrecognizable words</p></td>
|
||
</tr>
|
||
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">text_summary</span></code></p></td>
|
||
<td><p><code class="docutils literal notranslate"><span class="pre">str</span></code></p></td>
|
||
<td><p>the summary of the text, generated with a transformers model</p></td>
|
||
</tr>
|
||
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">sentiment</span></code></p></td>
|
||
<td><p><code class="docutils literal notranslate"><span class="pre">str</span></code></p></td>
|
||
<td><p>the detected sentiment, generated with a transformers model</p></td>
|
||
</tr>
|
||
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">sentiment_score</span></code></p></td>
|
||
<td><p><code class="docutils literal notranslate"><span class="pre">float</span></code></p></td>
|
||
<td><p>the confidence associated with the predicted sentiment</p></td>
|
||
</tr>
|
||
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">entity</span></code></p></td>
|
||
<td><p><code class="docutils literal notranslate"><span class="pre">list[str]</span></code></p></td>
|
||
<td><p>the detected named entities, generated with a transformers model</p></td>
|
||
</tr>
|
||
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">entity_type</span></code></p></td>
|
||
<td><p><code class="docutils literal notranslate"><span class="pre">list[str]</span></code></p></td>
|
||
<td><p>the detected entity type</p></td>
|
||
</tr>
|
||
</tbody>
|
||
</table>
|
||
<section id="Image-summary-and-query">
|
||
<h2>Image summary and query<a class="headerlink" href="#Image-summary-and-query" title="Link to this heading"></a></h2>
|
||
<p>The <code class="docutils literal notranslate"><span class="pre">SummaryDetector</span></code> can be used to generate image captions (<code class="docutils literal notranslate"><span class="pre">summary</span></code>) as well as visual question answering (<code class="docutils literal notranslate"><span class="pre">VQA</span></code>).</p>
|
||
<p><img alt="c1bb5284d8da452db91b3ed56781bca5" class="no-scaled-link" src="../_images/summary_detector.png" style="width: 800px;" /></p>
|
||
<p>This module is based on the <a class="reference external" href="https://github.com/salesforce/LAVIS">LAVIS</a> library. Since the models can be quite large, an initial object is created which will load the necessary models into RAM/VRAM and then use them in the analysis. The user can specify the type of analysis to be performed using the <code class="docutils literal notranslate"><span class="pre">analysis_type</span></code> keyword. Setting it to <code class="docutils literal notranslate"><span class="pre">summary</span></code> will generate a caption (summary), <code class="docutils literal notranslate"><span class="pre">questions</span></code> will prepare answers (VQA) to a list of questions as set by the user,
|
||
<code class="docutils literal notranslate"><span class="pre">summary_and_questions</span></code> will do both. Note that the desired analysis type needs to be set here in the initialization of the detector object, and not when running the analysis for each image; the same holds true for the selected model.</p>
|
||
<p>The implemented models are listed below.</p>
|
||
<table class="docutils align-default">
|
||
<thead>
|
||
<tr class="row-odd"><th class="head"><p>input model name</p></th>
|
||
<th class="head"><p>model</p></th>
|
||
</tr>
|
||
</thead>
|
||
<tbody>
|
||
<tr class="row-even"><td><p>base</p></td>
|
||
<td><p>BLIP image captioning base, ViT-B/16, pretrained on COCO dataset</p></td>
|
||
</tr>
|
||
<tr class="row-odd"><td><p>large</p></td>
|
||
<td><p>BLIP image captioning large, ViT-L/16, pretrained on COCO dataset</p></td>
|
||
</tr>
|
||
<tr class="row-even"><td><p>vqa</p></td>
|
||
<td><p>BLIP base model fine-tuned on VQA v2.0 dataset</p></td>
|
||
</tr>
|
||
<tr class="row-odd"><td><p>blip2_t5_pretrain_flant5xxl</p></td>
|
||
<td><p>BLIP2 pretrained on FlanT5XXL</p></td>
|
||
</tr>
|
||
<tr class="row-even"><td><p>blip2_t5_pretrain_flant5xl</p></td>
|
||
<td><p>BLIP2 pretrained on FlanT5XL</p></td>
|
||
</tr>
|
||
<tr class="row-odd"><td><p>blip2_t5_caption_coco_flant5xl</p></td>
|
||
<td><p>BLIP2 pretrained on FlanT5XL, fine-tuned on COCO</p></td>
|
||
</tr>
|
||
<tr class="row-even"><td><p>blip2_opt_pretrain_opt2.7b</p></td>
|
||
<td><p>BLIP2 pretrained on OPT-2.7b</p></td>
|
||
</tr>
|
||
<tr class="row-odd"><td><p>blip2_opt_pretrain_opt6.7b</p></td>
|
||
<td><p>BLIP2 pretrained on OPT-6.7b</p></td>
|
||
</tr>
|
||
<tr class="row-even"><td><p>blip2_opt_caption_coco_opt2.7b</p></td>
|
||
<td><p>BLIP2 pretrained on OPT-2.7b, fine-tuned on COCO</p></td>
|
||
</tr>
|
||
<tr class="row-odd"><td><p>blip2_opt_caption_coco_opt6.7b</p></td>
|
||
<td><p>BLIP2 pretrained on OPT-6.7b, fine-tuned on COCO</p></td>
|
||
</tr>
|
||
</tbody>
|
||
</table>
|
||
<p>Please note that <code class="docutils literal notranslate"><span class="pre">base</span></code>, <code class="docutils literal notranslate"><span class="pre">large</span></code> and <code class="docutils literal notranslate"><span class="pre">vqa</span></code> models can be run on the base TPU video card in Google Colab. To run any advanced <code class="docutils literal notranslate"><span class="pre">BLIP2</span></code> models you need more than 20 gb of video memory, so you need to connect a paid A100 in Google Colab.</p>
|
||
<p>First of all, we can run only the summary module <code class="docutils literal notranslate"><span class="pre">analysis_type</span></code>. You can choose a <code class="docutils literal notranslate"><span class="pre">base</span></code> or a <code class="docutils literal notranslate"><span class="pre">large</span></code> model_type.</p>
|
||
<div class="nbinput nblast docutils container">
|
||
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
|
||
</pre></div>
|
||
</div>
|
||
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span>image_summary_detector = ammico.SummaryDetector(
|
||
image_dict, analysis_type="summary", model_type="base"
|
||
)
|
||
</pre></div>
|
||
</div>
|
||
</div>
|
||
<div class="nbinput nblast docutils container">
|
||
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
|
||
</pre></div>
|
||
</div>
|
||
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span>for num, key in tqdm(enumerate(image_dict.keys()), total=len(image_dict)):
|
||
image_dict[key] = image_summary_detector.analyse_image(
|
||
subdict=image_dict[key], analysis_type="summary"
|
||
)
|
||
|
||
if num % dump_every == 0 | num == len(image_dict) - 1:
|
||
image_df = ammico.get_dataframe(image_dict)
|
||
image_df.to_csv(dump_file)
|
||
</pre></div>
|
||
</div>
|
||
</div>
|
||
<p>For VQA, a list of questions needs to be passed when carrying out the analysis; these should be given as a list of strings.</p>
|
||
<div class="nbinput nblast docutils container">
|
||
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
|
||
</pre></div>
|
||
</div>
|
||
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span>list_of_questions = [
|
||
"How many persons on the picture?",
|
||
"Are there any politicians in the picture?",
|
||
"Does the picture show something from medicine?",
|
||
]
|
||
</pre></div>
|
||
</div>
|
||
</div>
|
||
<p>If you want to execute only the VQA module without captioning, just specify the <code class="docutils literal notranslate"><span class="pre">analysis_type</span></code> as <code class="docutils literal notranslate"><span class="pre">questions</span></code> and <code class="docutils literal notranslate"><span class="pre">model_type</span></code> as <code class="docutils literal notranslate"><span class="pre">vqa</span></code>.</p>
|
||
<div class="nbinput nblast docutils container">
|
||
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
|
||
</pre></div>
|
||
</div>
|
||
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span>image_summary_vqa_detector = ammico.SummaryDetector(
|
||
image_dict, analysis_type="questions", model_type="vqa"
|
||
)
|
||
|
||
for num, key in tqdm(enumerate(image_dict.keys()), total=len(image_dict)):
|
||
image_dict[key] = image_summary_vqa_detector.analyse_image(
|
||
subdict=image_dict[key],
|
||
analysis_type="questions",
|
||
list_of_questions=list_of_questions,
|
||
)
|
||
if num % dump_every == 0 | num == len(image_dict) - 1:
|
||
image_df = ammico.get_dataframe(image_dict)
|
||
image_df.to_csv(dump_file)
|
||
</pre></div>
|
||
</div>
|
||
</div>
|
||
<p>Or you can specify the analysis type as <code class="docutils literal notranslate"><span class="pre">summary_and_questions</span></code>, then both caption creation and question answers will be generated for each image. In this case, you can choose a <code class="docutils literal notranslate"><span class="pre">base</span></code> or a <code class="docutils literal notranslate"><span class="pre">large</span></code> model_type.</p>
|
||
<div class="nbinput nblast docutils container">
|
||
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
|
||
</pre></div>
|
||
</div>
|
||
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span>image_summary_vqa_detector = ammico.SummaryDetector(
|
||
image_dict, analysis_type="summary_and_questions", model_type="base"
|
||
)
|
||
for num, key in tqdm(enumerate(image_dict.keys()), total=len(image_dict)):
|
||
image_dict[key] = image_summary_vqa_detector.analyse_image(
|
||
subdict=image_dict[key],
|
||
analysis_type="summary_and_questions",
|
||
list_of_questions=list_of_questions,
|
||
)
|
||
if num % dump_every == 0 | num == len(image_dict) - 1:
|
||
image_df = ammico.get_dataframe(image_dict)
|
||
image_df.to_csv(dump_file)
|
||
</pre></div>
|
||
</div>
|
||
</div>
|
||
<p>The output is given as a dictionary with the following keys and data types:</p>
|
||
<table class="docutils align-default">
|
||
<thead>
|
||
<tr class="row-odd"><th class="head"><p>output key</p></th>
|
||
<th class="head"><p>output type</p></th>
|
||
<th class="head"><p>output value</p></th>
|
||
</tr>
|
||
</thead>
|
||
<tbody>
|
||
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">const_image_summary</span></code></p></td>
|
||
<td><p><code class="docutils literal notranslate"><span class="pre">str</span></code></p></td>
|
||
<td><p>when <code class="docutils literal notranslate"><span class="pre">analysis_type="summary"</span></code> or <code class="docutils literal notranslate"><span class="pre">"summary_and_questions"</span></code>, constant image caption (does not change upon re-running the analysis for the same model)</p></td>
|
||
</tr>
|
||
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">3_non-deterministic_summary</span></code></p></td>
|
||
<td><p><code class="docutils literal notranslate"><span class="pre">list[str]</span></code></p></td>
|
||
<td><p>when <code class="docutils literal notranslate"><span class="pre">analysis_type="summary"</span></code> or <code class="docutils literal notranslate"><span class="pre">summary_and_questions</span></code>, three different captions generated with different random seeds</p></td>
|
||
</tr>
|
||
<tr class="row-even"><td><p><em>a user-defined input question</em></p></td>
|
||
<td><p><code class="docutils literal notranslate"><span class="pre">str</span></code></p></td>
|
||
<td><p>when <code class="docutils literal notranslate"><span class="pre">analysis_type="questions"</span></code> or <code class="docutils literal notranslate"><span class="pre">summary_and_questions</span></code>, the answer to the user-defined input question</p></td>
|
||
</tr>
|
||
</tbody>
|
||
</table>
|
||
<section id="BLIP2-models">
|
||
<h3>BLIP2 models<a class="headerlink" href="#BLIP2-models" title="Link to this heading"></a></h3>
|
||
<p>The BLIP2 models are computationally very heavy models, and require approximately 60GB of RAM. These models can easily use more than 20GB GPU memory.</p>
|
||
<div class="nbinput nblast docutils container">
|
||
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
|
||
</pre></div>
|
||
</div>
|
||
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span>obj = ammico.SummaryDetector(
|
||
subdict=image_dict,
|
||
analysis_type="summary_and_questions",
|
||
model_type="blip2_t5_caption_coco_flant5xl",
|
||
)
|
||
# list of the new models that can be used:
|
||
# "blip2_t5_pretrain_flant5xxl",
|
||
# "blip2_t5_pretrain_flant5xl",
|
||
# "blip2_t5_caption_coco_flant5xl",
|
||
# "blip2_opt_pretrain_opt2.7b",
|
||
# "blip2_opt_pretrain_opt6.7b",
|
||
# "blip2_opt_caption_coco_opt2.7b",
|
||
# "blip2_opt_caption_coco_opt6.7b",
|
||
|
||
# You can use `pretrain_` model types for zero-shot image-to-text generation with prompts.
|
||
# Or you can use `caption_coco_`` model types to generate coco-style captions.
|
||
# `flant5` and `opt` means that the model equipped with FlanT5 and OPT LLMs respectively.
|
||
|
||
# also you can perform all calculation on cpu if you set device_type= "cpu" or gpu if you set device_type= "cuda"
|
||
</pre></div>
|
||
</div>
|
||
</div>
|
||
<p>You can also pass a list of questions to this cell if <code class="docutils literal notranslate"><span class="pre">analysis_type="summary_and_questions"</span></code> or <code class="docutils literal notranslate"><span class="pre">analysis_type="questions"</span></code>. But the format of questions has changed in new models.</p>
|
||
<p>Here is an example of a list of questions:</p>
|
||
<div class="nbinput nblast docutils container">
|
||
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
|
||
</pre></div>
|
||
</div>
|
||
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span>list_of_questions = [
|
||
"Question: Are there people in the image? Answer:",
|
||
"Question: What is this picture about? Answer:",
|
||
]
|
||
</pre></div>
|
||
</div>
|
||
</div>
|
||
<div class="nbinput nblast docutils container">
|
||
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
|
||
</pre></div>
|
||
</div>
|
||
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span>for key in image_dict:
|
||
image_dict[key] = obj.analyse_image(
|
||
subdict=image_dict[key],
|
||
analysis_type="questions",
|
||
list_of_questions=list_of_questions,
|
||
)
|
||
|
||
# analysis_type can be
|
||
# "summary",
|
||
# "questions",
|
||
# "summary_and_questions".
|
||
</pre></div>
|
||
</div>
|
||
</div>
|
||
<p>You can also pass a question with previous answers as context into this model and pass in questions like this one to get a more accurate answer:</p>
|
||
<p>You can combine as many questions as you want in a single query as a list.</p>
|
||
<div class="nbinput nblast docutils container">
|
||
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
|
||
</pre></div>
|
||
</div>
|
||
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span>list_of_questions = [
|
||
"Question: What country is in the picture? Answer: USA. Question: Why? Answer: Because there is an American flag in the background . Question: Where it comes from? Answer:",
|
||
"Question: Which city is this? Answer: Frankfurt. Question: Why?",
|
||
]
|
||
</pre></div>
|
||
</div>
|
||
</div>
|
||
<div class="nbinput nblast docutils container">
|
||
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
|
||
</pre></div>
|
||
</div>
|
||
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span>for key in image_dict:
|
||
image_dict[key] = obj.analyse_image(
|
||
subdict=image_dict[key],
|
||
analysis_type="questions",
|
||
list_of_questions=list_of_questions,
|
||
)
|
||
</pre></div>
|
||
</div>
|
||
</div>
|
||
<div class="nbinput nblast docutils container">
|
||
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
|
||
</pre></div>
|
||
</div>
|
||
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span>image_dict
|
||
</pre></div>
|
||
</div>
|
||
</div>
|
||
<p>You can also ask sequential questions if you pass the argument <code class="docutils literal notranslate"><span class="pre">consequential_questions=True</span></code>. This means that the answers to previous questions will be passed as context to the next question. However, this method will work a bit slower, because for each image the answers to the questions will not be calculated simultaneously, but sequentially.</p>
|
||
<div class="nbinput nblast docutils container">
|
||
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
|
||
</pre></div>
|
||
</div>
|
||
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span>list_of_questions = [
|
||
"Question: Is this picture taken inside or outside? Answer:",
|
||
"Question: Why? Answer:",
|
||
]
|
||
</pre></div>
|
||
</div>
|
||
</div>
|
||
<div class="nbinput nblast docutils container">
|
||
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
|
||
</pre></div>
|
||
</div>
|
||
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span>for key in image_dict:
|
||
image_dict[key] = obj.analyse_image(
|
||
subdict=image_dict[key],
|
||
analysis_type="questions",
|
||
list_of_questions=list_of_questions,
|
||
consequential_questions=True,
|
||
)
|
||
</pre></div>
|
||
</div>
|
||
</div>
|
||
<div class="nbinput nblast docutils container">
|
||
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
|
||
</pre></div>
|
||
</div>
|
||
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span>image_dict
|
||
</pre></div>
|
||
</div>
|
||
</div>
|
||
<div class="nbinput nblast docutils container">
|
||
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
|
||
</pre></div>
|
||
</div>
|
||
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span># write output to csv
|
||
image_df = ammico.get_dataframe(image_dict)
|
||
image_df.to_csv("/content/drive/MyDrive/misinformation-data/data_out.csv")
|
||
</pre></div>
|
||
</div>
|
||
</div>
|
||
</section>
|
||
</section>
|
||
<section id="Detection-of-faces-and-facial-expression-analysis">
|
||
<h2>Detection of faces and facial expression analysis<a class="headerlink" href="#Detection-of-faces-and-facial-expression-analysis" title="Link to this heading"></a></h2>
|
||
<p>Faces and facial expressions are detected and analyzed using the <code class="docutils literal notranslate"><span class="pre">EmotionDetector</span></code> class from the <code class="docutils literal notranslate"><span class="pre">faces</span></code> module. Initially, it is detected if faces are present on the image using RetinaFace, followed by analysis if face masks are worn (Face-Mask-Detection). The probabilistic detection of age, gender, race, and emotions is carried out with deepface, but only if the disclosure statement has been accepted (see above).</p>
|
||
<p><img alt="1d72c51aea934efea31bf149f207463f" class="no-scaled-link" src="../_images/emotion_detector.png" style="width: 800px;" /></p>
|
||
<p>Depending on the features found on the image, the face detection module returns a different analysis content: If no faces are found on the image, all further steps are skipped and the result <code class="docutils literal notranslate"><span class="pre">"face":</span> <span class="pre">"No",</span> <span class="pre">"multiple_faces":</span> <span class="pre">"No",</span> <span class="pre">"no_faces":</span> <span class="pre">0,</span> <span class="pre">"wears_mask":</span> <span class="pre">["No"],</span> <span class="pre">"age":</span> <span class="pre">[None],</span> <span class="pre">"gender":</span> <span class="pre">[None],</span> <span class="pre">"race":</span> <span class="pre">[None],</span> <span class="pre">"emotion":</span> <span class="pre">[None],</span> <span class="pre">"emotion</span> <span class="pre">(category)":</span> <span class="pre">[None]</span></code> is returned. If one or several faces are found, up to three faces are analyzed if they are partially concealed by a face mask. If
|
||
yes, only age and gender are detected; if no, also race, emotion, and dominant emotion are detected. In case of the latter, the output could look like this: <code class="docutils literal notranslate"><span class="pre">"face":</span> <span class="pre">"Yes",</span> <span class="pre">"multiple_faces":</span> <span class="pre">"Yes",</span> <span class="pre">"no_faces":</span> <span class="pre">2,</span> <span class="pre">"wears_mask":</span> <span class="pre">["No",</span> <span class="pre">"No"],</span> <span class="pre">"age":</span> <span class="pre">[27,</span> <span class="pre">28],</span> <span class="pre">"gender":</span> <span class="pre">["Man",</span> <span class="pre">"Man"],</span> <span class="pre">"race":</span> <span class="pre">["asian",</span> <span class="pre">None],</span> <span class="pre">"emotion":</span> <span class="pre">["angry",</span> <span class="pre">"neutral"],</span> <span class="pre">"emotion</span> <span class="pre">(category)":</span> <span class="pre">["Negative",</span> <span class="pre">"Neutral"]</span></code>, where for the two faces that are detected (given by <code class="docutils literal notranslate"><span class="pre">no_faces</span></code>), some of the values are returned as a list
|
||
with the first item for the first (largest) face and the second item for the second (smaller) face (for example, <code class="docutils literal notranslate"><span class="pre">"emotion"</span></code> returns a list <code class="docutils literal notranslate"><span class="pre">["angry",</span> <span class="pre">"neutral"]</span></code> signifying the first face expressing anger, and the second face having a neutral expression).</p>
|
||
<p>The emotion detection reports the seven facial expressions angry, fear, neutral, sad, disgust, happy and surprise. These emotions are assigned based on the returned confidence of the model (between 0 and 1), with a high confidence signifying a high likelihood of the detected emotion being correct. Emotion recognition is not an easy task, even for a human; therefore, we have added a keyword <code class="docutils literal notranslate"><span class="pre">emotion_threshold</span></code> signifying the % value above which an emotion is counted as being detected. The
|
||
default is set to 50%, so that a confidence above 0.5 results in an emotion being assigned. If the confidence is lower, no emotion is assigned.</p>
|
||
<p>From the seven facial expressions, an overall dominating emotion category is identified: negative, positive, or neutral emotion. These are defined with the facial expressions angry, disgust, fear and sad for the negative category, happy for the positive category, and surprise and neutral for the neutral category.</p>
|
||
<p>A similar threshold as for the emotion recognition is set for the race/ethnicity and gender detection, <code class="docutils literal notranslate"><span class="pre">race_threshold</span></code> and <code class="docutils literal notranslate"><span class="pre">gender_threshold</span></code>, with the default set to 50% so that a confidence for race / gender above 0.5 only will return a value in the analysis.</p>
|
||
<p>For age unfortunately no confidence value is accessible so that no threshold values can be set for this type of analysis. The <a class="reference external" href="https://sefiks.com/2019/02/13/apparent-age-and-gender-prediction-in-keras/">reported MAE of the model is ± 4.65</a>.</p>
|
||
<p>You may also pass the name of the environment variable that determines if you accept or reject the ethical disclosure statement. By default, the variable is named <code class="docutils literal notranslate"><span class="pre">DISCLOSURE_AMMICO</span></code>.</p>
|
||
<p>Summarizing, the face detection is carried out using the following method call and keywords, where <code class="docutils literal notranslate"><span class="pre">emotion_threshold</span></code>, <code class="docutils literal notranslate"><span class="pre">race_threshold</span></code>, <code class="docutils literal notranslate"><span class="pre">gender_threshold</span></code>, <code class="docutils literal notranslate"><span class="pre">accept_disclosure</span></code> are optional:</p>
|
||
<div class="nbinput nblast docutils container">
|
||
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
|
||
</pre></div>
|
||
</div>
|
||
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span>for key in image_dict.keys():
|
||
image_dict[key] = ammico.EmotionDetector(
|
||
image_dict[key],
|
||
emotion_threshold=50,
|
||
race_threshold=50,
|
||
gender_threshold=50,
|
||
accept_disclosure="DISCLOSURE_AMMICO",
|
||
).analyse_image()
|
||
</pre></div>
|
||
</div>
|
||
</div>
|
||
<div class="nbinput nblast docutils container">
|
||
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
|
||
</pre></div>
|
||
</div>
|
||
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span># write output to csv
|
||
image_df = ammico.get_dataframe(image_dict)
|
||
image_df.to_csv("/content/drive/MyDrive/misinformation-data/data_out.csv")
|
||
</pre></div>
|
||
</div>
|
||
</div>
|
||
<p>The thresholds can be adapted interactively in the notebook interface and the optimal value can then be used in a subsequent analysis of the whole data set.</p>
|
||
<p>The output keys that are generated are</p>
|
||
<table class="docutils align-default">
|
||
<thead>
|
||
<tr class="row-odd"><th class="head"><p>output key</p></th>
|
||
<th class="head"><p>output type</p></th>
|
||
<th class="head"><p>output value</p></th>
|
||
</tr>
|
||
</thead>
|
||
<tbody>
|
||
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">face</span></code></p></td>
|
||
<td><p><code class="docutils literal notranslate"><span class="pre">str</span></code></p></td>
|
||
<td><p>if a face is detected</p></td>
|
||
</tr>
|
||
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">multiple_faces</span></code></p></td>
|
||
<td><p><code class="docutils literal notranslate"><span class="pre">str</span></code></p></td>
|
||
<td><p>if multiple faces are detected</p></td>
|
||
</tr>
|
||
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">no_faces</span></code></p></td>
|
||
<td><p><code class="docutils literal notranslate"><span class="pre">int</span></code></p></td>
|
||
<td><p>the number of detected faces</p></td>
|
||
</tr>
|
||
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">wears_mask</span></code></p></td>
|
||
<td><p><code class="docutils literal notranslate"><span class="pre">list[str]</span></code></p></td>
|
||
<td><p>if each of the detected faces wears a face covering, up to three faces</p></td>
|
||
</tr>
|
||
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">age</span></code></p></td>
|
||
<td><p><code class="docutils literal notranslate"><span class="pre">list[int]</span></code></p></td>
|
||
<td><p>the detected age, up to three faces</p></td>
|
||
</tr>
|
||
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">gender</span></code></p></td>
|
||
<td><p><code class="docutils literal notranslate"><span class="pre">list[str]</span></code></p></td>
|
||
<td><p>the detected gender, up to three faces</p></td>
|
||
</tr>
|
||
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">race</span></code></p></td>
|
||
<td><p><code class="docutils literal notranslate"><span class="pre">list[str]</span></code></p></td>
|
||
<td><p>the detected race, up to three faces, if above the confidence threshold</p></td>
|
||
</tr>
|
||
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">emotion</span></code></p></td>
|
||
<td><p><code class="docutils literal notranslate"><span class="pre">list[str]</span></code></p></td>
|
||
<td><p>the detected emotion, up to three faces, if above the confidence threshold</p></td>
|
||
</tr>
|
||
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">emotion</span> <span class="pre">(category)</span></code></p></td>
|
||
<td><p><code class="docutils literal notranslate"><span class="pre">list[str]</span></code></p></td>
|
||
<td><p>the detected emotion category (positive, negative, or neutral), up to three faces, if above the confidence threshold</p></td>
|
||
</tr>
|
||
</tbody>
|
||
</table>
|
||
</section>
|
||
<section id="Image-Multimodal-Search">
|
||
<h2>Image Multimodal Search<a class="headerlink" href="#Image-Multimodal-Search" title="Link to this heading"></a></h2>
|
||
<p>This module shows how to carry out an image multimodal search with the <a class="reference external" href="https://github.com/salesforce/LAVIS">LAVIS</a> library.</p>
|
||
<section id="Indexing-and-extracting-features-from-images-in-selected-folder">
|
||
<h3>Indexing and extracting features from images in selected folder<a class="headerlink" href="#Indexing-and-extracting-features-from-images-in-selected-folder" title="Link to this heading"></a></h3>
|
||
<p>First you need to select a model. You can choose one of the following models:</p>
|
||
<ul class="simple">
|
||
<li><p><a class="reference external" href="https://github.com/salesforce/BLIP">blip</a></p></li>
|
||
<li><p><a class="reference external" href="https://huggingface.co/docs/transformers/main/model_doc/blip-2">blip2</a></p></li>
|
||
<li><p><a class="reference external" href="https://github.com/salesforce/ALBEF">albef</a></p></li>
|
||
<li><p><a class="reference external" href="https://github.com/openai/CLIP/blob/main/model-card.md">clip_base</a></p></li>
|
||
<li><p><a class="reference external" href="https://github.com/mlfoundations/open_clip">clip_vitl14</a></p></li>
|
||
<li><p><a class="reference external" href="https://github.com/mlfoundations/open_clip">clip_vitl14_336</a></p></li>
|
||
</ul>
|
||
<div class="nbinput nblast docutils container">
|
||
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
|
||
</pre></div>
|
||
</div>
|
||
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span>model_type = "blip"
|
||
# model_type = "blip2"
|
||
# model_type = "albef"
|
||
# model_type = "clip_base"
|
||
# model_type = "clip_vitl14"
|
||
# model_type = "clip_vitl14_336"
|
||
</pre></div>
|
||
</div>
|
||
</div>
|
||
<p>To process the loaded images using the selected model, use the below code:</p>
|
||
<div class="nbinput nblast docutils container">
|
||
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
|
||
</pre></div>
|
||
</div>
|
||
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span>my_obj = ammico.MultimodalSearch(image_dict)
|
||
</pre></div>
|
||
</div>
|
||
</div>
|
||
<div class="nbinput nblast docutils container">
|
||
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
|
||
</pre></div>
|
||
</div>
|
||
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span>(
|
||
model,
|
||
vis_processors,
|
||
txt_processors,
|
||
image_keys,
|
||
image_names,
|
||
features_image_stacked,
|
||
) = my_obj.parsing_images(
|
||
model_type,
|
||
path_to_save_tensors="/content/drive/MyDrive/misinformation-data/",
|
||
)
|
||
</pre></div>
|
||
</div>
|
||
</div>
|
||
<p>The images are then processed and stored in a numerical representation, a tensor. These tensors do not change for the same image and same model - so if you run this analysis once, and save the tensors giving a path with the keyword <code class="docutils literal notranslate"><span class="pre">path_to_save_tensors</span></code>, a file with filename <code class="docutils literal notranslate"><span class="pre">.<Number_of_images>_<model_name>_saved_features_image.pt</span></code> will be placed there.</p>
|
||
<p>This can save you time if you want to analyse the same images with the same model but different questions. To run using the saved tensors, execute the below code giving the path and name of the tensor file. Any subsequent query of the model will run in a fraction of the time than it run in initially.</p>
|
||
<div class="nbinput nblast docutils container">
|
||
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
|
||
</pre></div>
|
||
</div>
|
||
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span># uncomment the code below if you want to load the tensors from the drive
|
||
# and just want to ask different questions for the same set of images
|
||
# (
|
||
# model,
|
||
# vis_processors,
|
||
# txt_processors,
|
||
# image_keys,
|
||
# image_names,
|
||
# features_image_stacked,
|
||
# ) = my_obj.parsing_images(
|
||
# model_type,
|
||
# path_to_load_tensors="/content/drive/MyDrive/misinformation-data/5_clip_base_saved_features_image.pt",
|
||
# )
|
||
</pre></div>
|
||
</div>
|
||
</div>
|
||
<p>Here we already processed our image folder with 5 images and the <code class="docutils literal notranslate"><span class="pre">clip_base</span></code> model. So you need just to write the name <code class="docutils literal notranslate"><span class="pre">5_clip_base_saved_features_image.pt</span></code> of the saved file that consists of tensors of all images as keyword argument for <code class="docutils literal notranslate"><span class="pre">path_to_load_tensors</span></code>.</p>
|
||
</section>
|
||
<section id="Formulate-your-search-queries">
|
||
<h3>Formulate your search queries<a class="headerlink" href="#Formulate-your-search-queries" title="Link to this heading"></a></h3>
|
||
<p>Next, you need to form search queries. You can search either by image or by text. You can search for a single query, or you can search for several queries at once, the computational time should not be much different. The format of the queries is as follows:</p>
|
||
<div class="nbinput nblast docutils container">
|
||
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
|
||
</pre></div>
|
||
</div>
|
||
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span>import importlib_resources # only require for image query example
|
||
|
||
image_example_query = str(
|
||
importlib_resources.files("ammico") / "data" / "test-crop-image.png"
|
||
) # creating the path to the image for the image query example
|
||
|
||
search_query = [
|
||
{"text_input": "politician press conference"},
|
||
{"text_input": "a world map"},
|
||
{"text_input": "a dog"}, # This is how looks text query
|
||
{
|
||
"image": image_example_query
|
||
}, # This is how looks image query, here `image_example_path` is the path to query image like "data/test-crop-image.png"
|
||
]
|
||
</pre></div>
|
||
</div>
|
||
</div>
|
||
<p>You can filter your results in 3 different ways:</p>
|
||
<ul class="simple">
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">filter_number_of_images</span></code> limits the number of images found. That is, if the parameter <code class="docutils literal notranslate"><span class="pre">filter_number_of_images</span> <span class="pre">=</span> <span class="pre">10</span></code>, then the first 10 images that best match the query will be shown. The other images ranks will be set to <code class="docutils literal notranslate"><span class="pre">None</span></code> and the similarity value to <code class="docutils literal notranslate"><span class="pre">0</span></code>.</p></li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">filter_val_limit</span></code> limits the output of images with a similarity value not bigger than <code class="docutils literal notranslate"><span class="pre">filter_val_limit</span></code>. That is, if the parameter <code class="docutils literal notranslate"><span class="pre">filter_val_limit</span> <span class="pre">=</span> <span class="pre">0.2</span></code>, all images with similarity less than 0.2 will be discarded.</p></li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">filter_rel_error</span></code> (percentage) limits the output of images with a similarity value not bigger than <code class="docutils literal notranslate"><span class="pre">100</span> <span class="pre">*</span> <span class="pre">abs(current_similarity_value</span> <span class="pre">-</span> <span class="pre">best_similarity_value_in_current_search)/best_similarity_value_in_current_search</span> <span class="pre"><</span> <span class="pre">filter_rel_error</span></code>. That is, if we set filter_rel_error = 30, it means that if the top1 image have 0.5 similarity value, we discard all image with similarity less than 0.35.</p></li>
|
||
</ul>
|
||
<div class="nbinput nblast docutils container">
|
||
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
|
||
</pre></div>
|
||
</div>
|
||
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span>similarity, sorted_lists = my_obj.multimodal_search(
|
||
model,
|
||
vis_processors,
|
||
txt_processors,
|
||
model_type,
|
||
image_keys,
|
||
features_image_stacked,
|
||
search_query,
|
||
filter_number_of_images=20,
|
||
)
|
||
</pre></div>
|
||
</div>
|
||
</div>
|
||
<div class="nbinput nblast docutils container">
|
||
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
|
||
</pre></div>
|
||
</div>
|
||
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span>similarity
|
||
</pre></div>
|
||
</div>
|
||
</div>
|
||
<div class="nbinput nblast docutils container">
|
||
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
|
||
</pre></div>
|
||
</div>
|
||
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span>sorted_lists
|
||
</pre></div>
|
||
</div>
|
||
</div>
|
||
<p>After launching <code class="docutils literal notranslate"><span class="pre">multimodal_search</span></code> function, the results of each query will be added to the source dictionary.</p>
|
||
<div class="nbinput nblast docutils container">
|
||
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
|
||
</pre></div>
|
||
</div>
|
||
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span>image_dict
|
||
</pre></div>
|
||
</div>
|
||
</div>
|
||
<p>A special function was written to present the search results conveniently.</p>
|
||
<div class="nbinput nblast docutils container">
|
||
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
|
||
</pre></div>
|
||
</div>
|
||
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span>my_obj.show_results(
|
||
search_query[0], # you can change the index to see the results for other queries
|
||
)
|
||
</pre></div>
|
||
</div>
|
||
</div>
|
||
<div class="nbinput nblast docutils container">
|
||
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
|
||
</pre></div>
|
||
</div>
|
||
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span>my_obj.show_results(
|
||
search_query[3], # you can change the index to see the results for other queries
|
||
)
|
||
</pre></div>
|
||
</div>
|
||
</div>
|
||
</section>
|
||
<section id="Improve-the-search-results">
|
||
<h3>Improve the search results<a class="headerlink" href="#Improve-the-search-results" title="Link to this heading"></a></h3>
|
||
<p>For even better results, a slightly different approach has been prepared that can improve search results. It is quite resource-intensive, so it is applied after the main algorithm has found the most relevant images. This approach works only with text queries and it skips image queries. Among the parameters you can choose 3 models: <code class="docutils literal notranslate"><span class="pre">"blip_base"</span></code>, <code class="docutils literal notranslate"><span class="pre">"blip_large"</span></code>, <code class="docutils literal notranslate"><span class="pre">"blip2_coco"</span></code>. If you get an <code class="docutils literal notranslate"><span class="pre">Out</span> <span class="pre">of</span> <span class="pre">Memory</span></code> error, try reducing the batch_size value (minimum = 1), which is the number of
|
||
images being processed simultaneously. With the parameter <code class="docutils literal notranslate"><span class="pre">need_grad_cam</span> <span class="pre">=</span> <span class="pre">True/False</span></code> you can enable the calculation of the heat map of each image to be processed and save them in <code class="docutils literal notranslate"><span class="pre">image_gradcam_with_itm</span></code>. Thus the <code class="docutils literal notranslate"><span class="pre">image_text_match_reordering()</span></code> function calculates new similarity values and new ranks for each image. The resulting values are added to the general dictionary.</p>
|
||
<div class="nbinput nblast docutils container">
|
||
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
|
||
</pre></div>
|
||
</div>
|
||
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span>itm_model = "blip_base"
|
||
# itm_model = "blip_large"
|
||
# itm_model = "blip2_coco"
|
||
</pre></div>
|
||
</div>
|
||
</div>
|
||
<div class="nbinput nblast docutils container">
|
||
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
|
||
</pre></div>
|
||
</div>
|
||
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span>itm_scores, image_gradcam_with_itm = my_obj.image_text_match_reordering(
|
||
search_query,
|
||
itm_model,
|
||
image_keys,
|
||
sorted_lists,
|
||
batch_size=1,
|
||
need_grad_cam=True,
|
||
)
|
||
</pre></div>
|
||
</div>
|
||
</div>
|
||
<p>Then using the same output function you can add the <code class="docutils literal notranslate"><span class="pre">itm=True</span></code> argument to output the new image order. Remember that for images queries, an error will be thrown with <code class="docutils literal notranslate"><span class="pre">itm=True</span></code> argument. You can also add the <code class="docutils literal notranslate"><span class="pre">image_gradcam_with_itm</span></code> along with <code class="docutils literal notranslate"><span class="pre">itm=True</span></code> argument to output the heat maps of the calculated images.</p>
|
||
<div class="nbinput nblast docutils container">
|
||
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
|
||
</pre></div>
|
||
</div>
|
||
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span>my_obj.show_results(
|
||
search_query[0], itm=True, image_gradcam_with_itm=image_gradcam_with_itm
|
||
)
|
||
</pre></div>
|
||
</div>
|
||
</div>
|
||
</section>
|
||
<section id="Save-search-results-to-csv">
|
||
<h3>Save search results to csv<a class="headerlink" href="#Save-search-results-to-csv" title="Link to this heading"></a></h3>
|
||
<p>Convert the dictionary of dictionaries into a dictionary with lists:</p>
|
||
<div class="nbinput nblast docutils container">
|
||
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
|
||
</pre></div>
|
||
</div>
|
||
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span>outdict = ammico.append_data_to_dict(image_dict)
|
||
df = ammico.dump_df(outdict)
|
||
</pre></div>
|
||
</div>
|
||
</div>
|
||
<p>Check the dataframe:</p>
|
||
<div class="nbinput nblast docutils container">
|
||
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
|
||
</pre></div>
|
||
</div>
|
||
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span>df.head(10)
|
||
</pre></div>
|
||
</div>
|
||
</div>
|
||
<p>Write the csv file:</p>
|
||
<div class="nbinput nblast docutils container">
|
||
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
|
||
</pre></div>
|
||
</div>
|
||
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span>df.to_csv("/content/drive/MyDrive/misinformation-data/data_out.csv")
|
||
</pre></div>
|
||
</div>
|
||
</div>
|
||
</section>
|
||
</section>
|
||
<section id="Color-analysis-of-pictures">
|
||
<h2>Color analysis of pictures<a class="headerlink" href="#Color-analysis-of-pictures" title="Link to this heading"></a></h2>
|
||
<p>This module shows primary color analysis of color image using K-Means algorithm. The output are N primary colors and their corresponding percentage.</p>
|
||
<p>To check the analysis, you can inspect the analyzed elements here. Loading the results takes a moment, so please be patient. If you are sure of what you are doing, you can skip this and directly export a csv file in the step below. Here, we display the color detection results provided by <code class="docutils literal notranslate"><span class="pre">colorgram</span></code> and <code class="docutils literal notranslate"><span class="pre">colour</span></code> libraries. Click on the tabs to see the results in the right sidebar. You may need to increment the <code class="docutils literal notranslate"><span class="pre">port</span></code> number if you are already running several notebook instances on the same
|
||
server.</p>
|
||
<div class="nbinput nblast docutils container">
|
||
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
|
||
</pre></div>
|
||
</div>
|
||
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span>analysis_explorer = ammico.AnalysisExplorer(image_dict)
|
||
analysis_explorer.run_server(port=8057)
|
||
</pre></div>
|
||
</div>
|
||
</div>
|
||
<p>Instead of inspecting each of the images, you can also directly carry out the analysis and export the result into a csv. This may take a while depending on how many images you have loaded.</p>
|
||
<div class="nbinput nblast docutils container">
|
||
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
|
||
</pre></div>
|
||
</div>
|
||
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span>for key in image_dict.keys():
|
||
image_dict[key] = ammico.colors.ColorDetector(image_dict[key]).analyse_image()
|
||
</pre></div>
|
||
</div>
|
||
</div>
|
||
<p>These steps are required to convert the dictionary of dictionarys into a dictionary with lists, that can be converted into a pandas dataframe and exported to a csv file.</p>
|
||
<div class="nbinput nblast docutils container">
|
||
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
|
||
</pre></div>
|
||
</div>
|
||
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span>df = ammico.get_dataframe(image_dict)
|
||
</pre></div>
|
||
</div>
|
||
</div>
|
||
<p>Check the dataframe:</p>
|
||
<div class="nbinput nblast docutils container">
|
||
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
|
||
</pre></div>
|
||
</div>
|
||
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span>df.head(10)
|
||
</pre></div>
|
||
</div>
|
||
</div>
|
||
<p>Write the csv file - here you should provide a file path and file name for the csv file to be written.</p>
|
||
<div class="nbinput nblast docutils container">
|
||
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
|
||
</pre></div>
|
||
</div>
|
||
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span>df.to_csv("/content/drive/MyDrive/misinformation-data/data_out.csv")
|
||
</pre></div>
|
||
</div>
|
||
</div>
|
||
</section>
|
||
<section id="Further-detector-modules">
|
||
<h2>Further detector modules<a class="headerlink" href="#Further-detector-modules" title="Link to this heading"></a></h2>
|
||
<p>Further detector modules exist, also it is possible to carry out a topic analysis on the text data, as well as crop social media posts automatically. These are more experimental features and have their own demonstration notebooks.</p>
|
||
</section>
|
||
</section>
|
||
|
||
|
||
</div>
|
||
</div>
|
||
<footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
|
||
<a href="../create_API_key_link.html" class="btn btn-neutral float-left" title="Instructions how to generate and enable a google Cloud Vision API key" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
|
||
<a href="../modules.html" class="btn btn-neutral float-right" title="AMMICO package modules" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
|
||
</div>
|
||
|
||
<hr/>
|
||
|
||
<div role="contentinfo">
|
||
<p>© Copyright 2022, Scientific Software Center, Heidelberg University.</p>
|
||
</div>
|
||
|
||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||
|
||
|
||
</footer>
|
||
</div>
|
||
</div>
|
||
</section>
|
||
</div>
|
||
<script>
|
||
jQuery(function () {
|
||
SphinxRtdTheme.Navigation.enable(true);
|
||
});
|
||
</script>
|
||
|
||
</body>
|
||
</html> |