зеркало из
https://github.com/ssciwr/AMMICO.git
synced 2025-10-30 21:46:04 +02:00
228 строки
18 KiB
HTML
228 строки
18 KiB
HTML
<!DOCTYPE html>
|
|
<html class="writer-html5" lang="en" data-content_root="../">
|
|
<head>
|
|
<meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
|
|
|
|
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
|
<title>Crop posts module — AMMICO 0.0.1 documentation</title>
|
|
<link rel="stylesheet" type="text/css" href="../_static/pygments.css?v=80d5e7a1" />
|
|
<link rel="stylesheet" type="text/css" href="../_static/css/theme.css?v=19f00094" />
|
|
<link rel="stylesheet" type="text/css" href="../_static/nbsphinx-code-cells.css" />
|
|
|
|
|
|
<!--[if lt IE 9]>
|
|
<script src="../_static/js/html5shiv.min.js"></script>
|
|
<![endif]-->
|
|
|
|
<script src="../_static/jquery.js?v=5d32c60e"></script>
|
|
<script src="../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
|
|
<script src="../_static/documentation_options.js?v=d45e8c67"></script>
|
|
<script src="../_static/doctools.js?v=9a2dae69"></script>
|
|
<script src="../_static/sphinx_highlight.js?v=dc90522c"></script>
|
|
<script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
|
|
<script>window.MathJax = {"tex": {"inlineMath": [["$", "$"], ["\\(", "\\)"]], "processEscapes": true}, "options": {"ignoreHtmlClass": "tex2jax_ignore|mathjax_ignore|document", "processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
|
|
<script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
|
|
<script src="../_static/js/theme.js"></script>
|
|
<link rel="index" title="Index" href="../genindex.html" />
|
|
<link rel="search" title="Search" href="../search.html" />
|
|
<link rel="next" title="AMMICO package modules" href="../modules.html" />
|
|
<link rel="prev" title="AMMICO Demonstration Notebook" href="DemoNotebook_ammico.html" />
|
|
</head>
|
|
|
|
<body class="wy-body-for-nav">
|
|
<div class="wy-grid-for-nav">
|
|
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
|
<div class="wy-side-scroll">
|
|
<div class="wy-side-nav-search" >
|
|
|
|
|
|
|
|
<a href="../index.html" class="icon icon-home">
|
|
AMMICO
|
|
</a>
|
|
<div role="search">
|
|
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
|
|
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
|
|
<input type="hidden" name="check_keywords" value="yes" />
|
|
<input type="hidden" name="area" value="default" />
|
|
</form>
|
|
</div>
|
|
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
|
<p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
|
|
<ul class="current">
|
|
<li class="toctree-l1"><a class="reference internal" href="../readme_link.html">AMMICO - AI Media and Misinformation Content Analysis Tool</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../readme_link.html#faq">FAQ</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../create_API_key_link.html">Instructions how to generate and enable a google Cloud Vision API key</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="DemoNotebook_ammico.html">AMMICO Demonstration Notebook</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="DemoNotebook_ammico.html#Step-0:-Create-and-set-a-Google-Cloud-Vision-Key">Step 0: Create and set a Google Cloud Vision Key</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="DemoNotebook_ammico.html#Step-1:-Read-your-data-into-AMMICO">Step 1: Read your data into AMMICO</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="DemoNotebook_ammico.html#The-detector-modules">The detector modules</a></li>
|
|
<li class="toctree-l1 current"><a class="current reference internal" href="#">Crop posts module</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../modules.html">AMMICO package modules</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../license_link.html">License</a></li>
|
|
</ul>
|
|
|
|
</div>
|
|
</div>
|
|
</nav>
|
|
|
|
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
|
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
|
<a href="../index.html">AMMICO</a>
|
|
</nav>
|
|
|
|
<div class="wy-nav-content">
|
|
<div class="rst-content">
|
|
<div role="navigation" aria-label="Page navigation">
|
|
<ul class="wy-breadcrumbs">
|
|
<li><a href="../index.html" class="icon icon-home" aria-label="Home"></a></li>
|
|
<li class="breadcrumb-item active">Crop posts module</li>
|
|
<li class="wy-breadcrumbs-aside">
|
|
<a href="https://github.com/ssciwr/AMMICO/blob/main/source/notebooks/Example cropposts.ipynb" class="fa fa-github"> Edit on GitHub</a>
|
|
</li>
|
|
</ul>
|
|
<hr/>
|
|
</div>
|
|
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
|
<div itemprop="articleBody">
|
|
|
|
<section id="Crop-posts-module">
|
|
<h1>Crop posts module<a class="headerlink" href="#Crop-posts-module" title="Link to this heading"></a></h1>
|
|
<p>Crop posts from social media posts images, to keep import text informations from social media posts images. We can set some manually cropped views from social media posts as reference for cropping the same type social media posts images.</p>
|
|
<div class="nbinput nblast docutils container">
|
|
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
|
|
</pre></div>
|
|
</div>
|
|
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="c1"># Please ignore this cell: extra install steps that are only executed when running the notebook on Google Colab</span>
|
|
<span class="c1"># flake8-noqa-cell</span>
|
|
<span class="kn">import</span> <span class="nn">os</span>
|
|
<span class="k">if</span> <span class="s1">'google.colab'</span> <span class="ow">in</span> <span class="nb">str</span><span class="p">(</span><span class="n">get_ipython</span><span class="p">()):</span>
|
|
<span class="c1"># we're running on colab</span>
|
|
<span class="c1"># first install pinned version of setuptools (latest version doesn't seem to work with this package on colab)</span>
|
|
<span class="o">%</span><span class="k">pip</span> install setuptools==61 -qqq
|
|
<span class="c1"># install the moralization package</span>
|
|
<span class="o">%</span><span class="k">pip</span> install git+https://github.com/ssciwr/AMMICO.git -qqq
|
|
|
|
<span class="c1"># prevent loading of the wrong opencv library</span>
|
|
<span class="o">%</span><span class="k">pip</span> uninstall -y opencv-contrib-python
|
|
<span class="o">%</span><span class="k">pip</span> install opencv-contrib-python
|
|
|
|
<span class="kn">from</span> <span class="nn">google.colab</span> <span class="kn">import</span> <span class="n">drive</span>
|
|
<span class="n">drive</span><span class="o">.</span><span class="n">mount</span><span class="p">(</span><span class="s1">'/content/drive'</span><span class="p">)</span>
|
|
|
|
<span class="k">if</span> <span class="ow">not</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">isdir</span><span class="p">(</span><span class="s1">'/content/ref'</span><span class="p">):</span>
|
|
<span class="o">!</span>wget<span class="w"> </span>https://github.com/ssciwr/AMMICO/archive/refs/heads/ref-data.zip<span class="w"> </span>-q
|
|
<span class="o">!</span>unzip<span class="w"> </span>-qq<span class="w"> </span>ref-data.zip<span class="w"> </span>-d<span class="w"> </span>.<span class="w"> </span><span class="o">&&</span><span class="w"> </span>mv<span class="w"> </span>-f<span class="w"> </span>AMMICO-ref-data/data/ref<span class="w"> </span>.<span class="w"> </span><span class="o">&&</span><span class="w"> </span>rm<span class="w"> </span>-rf<span class="w"> </span>AMMICO-ref-data<span class="w"> </span>ref-data.zip
|
|
</pre></div>
|
|
</div>
|
|
</div>
|
|
<div class="nbinput nblast docutils container">
|
|
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
|
|
</pre></div>
|
|
</div>
|
|
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">ammico.cropposts</span> <span class="k">as</span> <span class="nn">crpo</span>
|
|
<span class="kn">import</span> <span class="nn">ammico.utils</span> <span class="k">as</span> <span class="nn">utils</span>
|
|
<span class="kn">import</span> <span class="nn">matplotlib.pyplot</span> <span class="k">as</span> <span class="nn">plt</span>
|
|
<span class="kn">import</span> <span class="nn">cv2</span>
|
|
<span class="kn">import</span> <span class="nn">importlib_resources</span>
|
|
<span class="n">pkg</span> <span class="o">=</span> <span class="n">importlib_resources</span><span class="o">.</span><span class="n">files</span><span class="p">(</span><span class="s2">"ammico"</span><span class="p">)</span>
|
|
</pre></div>
|
|
</div>
|
|
</div>
|
|
<p>The cropping is carried out by finding reference images on the image to be cropped. If a reference matches a region on the image, then everything below the matched region is removed. Manually look at a reference and an example post with the code below.</p>
|
|
<div class="nbinput nblast docutils container">
|
|
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
|
|
</pre></div>
|
|
</div>
|
|
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="c1"># load ref view for cropping the same type social media posts images.</span>
|
|
<span class="c1"># substitute the below paths for your samples</span>
|
|
<span class="n">path_ref</span> <span class="o">=</span> <span class="n">pkg</span> <span class="o">/</span> <span class="s2">"data"</span> <span class="o">/</span> <span class="s2">"ref"</span> <span class="o">/</span> <span class="s2">"ref-00.png"</span>
|
|
<span class="n">ref_view</span> <span class="o">=</span> <span class="n">cv2</span><span class="o">.</span><span class="n">imread</span><span class="p">(</span><span class="n">path_ref</span><span class="o">.</span><span class="n">as_posix</span><span class="p">())</span>
|
|
<span class="n">RGB_ref_view</span> <span class="o">=</span> <span class="n">cv2</span><span class="o">.</span><span class="n">cvtColor</span><span class="p">(</span><span class="n">ref_view</span><span class="p">,</span> <span class="n">cv2</span><span class="o">.</span><span class="n">COLOR_BGR2RGB</span><span class="p">)</span>
|
|
<span class="n">plt</span><span class="o">.</span><span class="n">figure</span><span class="p">(</span><span class="n">figsize</span><span class="o">=</span><span class="p">(</span><span class="mi">10</span><span class="p">,</span> <span class="mi">15</span><span class="p">))</span>
|
|
<span class="n">plt</span><span class="o">.</span><span class="n">imshow</span><span class="p">(</span><span class="n">RGB_ref_view</span><span class="p">)</span>
|
|
<span class="n">plt</span><span class="o">.</span><span class="n">show</span><span class="p">()</span>
|
|
|
|
<span class="n">path_post</span> <span class="o">=</span> <span class="n">pkg</span> <span class="o">/</span> <span class="s2">"data"</span> <span class="o">/</span> <span class="s2">"test-crop-image.png"</span>
|
|
<span class="n">view</span> <span class="o">=</span> <span class="n">cv2</span><span class="o">.</span><span class="n">imread</span><span class="p">(</span><span class="n">path_post</span><span class="o">.</span><span class="n">as_posix</span><span class="p">())</span>
|
|
<span class="n">RGB_view</span> <span class="o">=</span> <span class="n">cv2</span><span class="o">.</span><span class="n">cvtColor</span><span class="p">(</span><span class="n">view</span><span class="p">,</span> <span class="n">cv2</span><span class="o">.</span><span class="n">COLOR_BGR2RGB</span><span class="p">)</span>
|
|
<span class="n">plt</span><span class="o">.</span><span class="n">figure</span><span class="p">(</span><span class="n">figsize</span><span class="o">=</span><span class="p">(</span><span class="mi">10</span><span class="p">,</span> <span class="mi">15</span><span class="p">))</span>
|
|
<span class="n">plt</span><span class="o">.</span><span class="n">imshow</span><span class="p">(</span><span class="n">RGB_view</span><span class="p">)</span>
|
|
<span class="n">plt</span><span class="o">.</span><span class="n">show</span><span class="p">()</span>
|
|
</pre></div>
|
|
</div>
|
|
</div>
|
|
<p>You can now crop the image and check on the way that everything looks fine. <code class="docutils literal notranslate"><span class="pre">plt_match</span></code> will plot the matches on the image and below which line content will be cropped; <code class="docutils literal notranslate"><span class="pre">plt_crop</span></code> will plot the cropped text part of the social media post with the comments removed; <code class="docutils literal notranslate"><span class="pre">plt_image</span></code> will plot the image part of the social media post if applicable.</p>
|
|
<div class="nbinput nblast docutils container">
|
|
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
|
|
</pre></div>
|
|
</div>
|
|
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="c1"># crop a posts from reference view, check the cropping</span>
|
|
<span class="c1"># this will only plot something if the reference is found on the image</span>
|
|
<span class="n">crop_view</span> <span class="o">=</span> <span class="n">crpo</span><span class="o">.</span><span class="n">crop_posts_from_refs</span><span class="p">(</span>
|
|
<span class="p">[</span><span class="n">ref_view</span><span class="p">],</span> <span class="n">view</span><span class="p">,</span>
|
|
<span class="n">plt_match</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">plt_crop</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">plt_image</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span>
|
|
<span class="p">)</span>
|
|
</pre></div>
|
|
</div>
|
|
</div>
|
|
<p>Batch crop images from the image folder given in <code class="docutils literal notranslate"><span class="pre">crop_dir</span></code>. The cropped images will save in <code class="docutils literal notranslate"><span class="pre">save_crop_dir</span></code> folder with the same file name as the original file. The reference images with the items to match are provided in <code class="docutils literal notranslate"><span class="pre">ref_dir</span></code>.</p>
|
|
<p>Sometimes the cropping will be imperfect, due to improper matches on the image. It is sometimes easier to first categorize the social media posts and then set different references in the reference folder <code class="docutils literal notranslate"><span class="pre">ref_dir</span></code>.</p>
|
|
<div class="nbinput nblast docutils container">
|
|
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
|
|
</pre></div>
|
|
</div>
|
|
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><br/><span></span><span class="n">crop_dir</span> <span class="o">=</span> <span class="s2">"data/"</span>
|
|
<span class="n">ref_dir</span> <span class="o">=</span> <span class="n">pkg</span> <span class="o">/</span> <span class="s2">"data"</span> <span class="o">/</span> <span class="s2">"ref"</span>
|
|
<span class="n">save_crop_dir</span> <span class="o">=</span> <span class="s2">"data/crop/"</span>
|
|
|
|
<span class="n">files</span> <span class="o">=</span> <span class="n">utils</span><span class="o">.</span><span class="n">find_files</span><span class="p">(</span><span class="n">path</span><span class="o">=</span><span class="n">crop_dir</span><span class="p">,</span><span class="n">limit</span><span class="o">=</span><span class="mi">10</span><span class="p">,)</span>
|
|
<span class="n">ref_files</span> <span class="o">=</span> <span class="n">utils</span><span class="o">.</span><span class="n">find_files</span><span class="p">(</span><span class="n">path</span><span class="o">=</span><span class="n">ref_dir</span><span class="o">.</span><span class="n">as_posix</span><span class="p">(),</span> <span class="n">limit</span><span class="o">=</span><span class="mi">100</span><span class="p">)</span>
|
|
|
|
<span class="n">crpo</span><span class="o">.</span><span class="n">crop_media_posts</span><span class="p">(</span><span class="n">files</span><span class="p">,</span> <span class="n">ref_files</span><span class="p">,</span> <span class="n">save_crop_dir</span><span class="p">,</span> <span class="n">plt_match</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">plt_crop</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">plt_image</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
|
|
<span class="nb">print</span><span class="p">(</span><span class="s2">"Batch cropping images done"</span><span class="p">)</span>
|
|
</pre></div>
|
|
</div>
|
|
</div>
|
|
<div class="nbinput nblast docutils container">
|
|
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
|
|
</pre></div>
|
|
</div>
|
|
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span>
|
|
</pre></div>
|
|
</div>
|
|
</div>
|
|
</section>
|
|
|
|
|
|
</div>
|
|
</div>
|
|
<footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
|
|
<a href="DemoNotebook_ammico.html" class="btn btn-neutral float-left" title="AMMICO Demonstration Notebook" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
|
|
<a href="../modules.html" class="btn btn-neutral float-right" title="AMMICO package modules" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
|
|
</div>
|
|
|
|
<hr/>
|
|
|
|
<div role="contentinfo">
|
|
<p>© Copyright 2022, Scientific Software Center, Heidelberg University.</p>
|
|
</div>
|
|
|
|
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
|
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
|
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
|
|
|
|
|
</footer>
|
|
</div>
|
|
</div>
|
|
</section>
|
|
</div>
|
|
<script>
|
|
jQuery(function () {
|
|
SphinxRtdTheme.Navigation.enable(true);
|
|
});
|
|
</script>
|
|
|
|
</body>
|
|
</html> |