* read image into nb

* test

* added keras-ocr and google vision

* google cloud vision by far the best

* setting up docker for text 1

* move widgets and analysis to display module

* move widgets and analysis to display module - 2

* text on image through widgets
Этот коммит содержится в:
Inga Ulusoy 2022-07-27 22:41:13 +02:00 коммит произвёл GitHub
родитель 7aa1247766
Коммит 446da693e3
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
10 изменённых файлов: 529 добавлений и 58 удалений

3
.gitignore поставляемый
Просмотреть файл

@ -129,3 +129,6 @@ dmypy.json
# Pyre type checker # Pyre type checker
.pyre/ .pyre/
# data folder
data/

Просмотреть файл

@ -2,7 +2,7 @@ FROM jupyter/base-notebook:2022-06-06
# Install system dependencies for computer vision packages # Install system dependencies for computer vision packages
USER root USER root
RUN apt update && apt install -y libgl1 libglib2.0-0 libsm6 libxrender1 libxext6 RUN apt update && apt install -y gcc libgl1 libglib2.0-0 libsm6 libxrender1 libxext6 tesseract-ocr
USER $NB_USER USER $NB_USER
# Copy the repository into the container # Copy the repository into the container

Просмотреть файл

@ -5,5 +5,5 @@ from importlib import metadata
__version__ = metadata.version(__package__) __version__ = metadata.version(__package__)
del metadata del metadata
from misinformation.faces import explore_face_recognition from misinformation.display import explore_analysis
from misinformation.utils import find_files from misinformation.utils import find_files

61
misinformation/display.py Обычный файл
Просмотреть файл

@ -0,0 +1,61 @@
import ipywidgets
from IPython.display import display
import misinformation.faces as faces
import misinformation.text as text
class JSONContainer:
"""Expose a Python dictionary as a JSON document in JupyterLab
rich display rendering.
"""
def __init__(self, data={}):
self._data = data
def _repr_json_(self):
return self._data
def explore_analysis(image_paths, identify="faces"):
# dictionary mapping the type of analysis to be explored
identify_dict = {
"faces": faces.facial_expression_analysis,
"text-on-image": text.detect_text,
}
# Create an image selector widget
image_select = ipywidgets.Select(
options=image_paths, layout=ipywidgets.Layout(width="20%"), rows=20
)
# Set up the facial recognition output widget
output = ipywidgets.Output(layout=ipywidgets.Layout(width="30%"))
# Set up the image selection and display widget
image_widget = ipywidgets.Box(
children=[],
layout=ipywidgets.Layout(width="50%"),
)
# Register the tab switch logic
def switch(_):
# Clear existing output
image_widget.children = ()
output.clear_output()
# Create the new content
image_widget.children = (ipywidgets.Image.from_file(image_select.value),)
# This output widget absorbes print statements that are messing with
# the widget output and cannot be disabled through the API.
with faces.NocatchOutput():
analysis = identify_dict[identify](image_select.value)
with output:
display(JSONContainer(analysis))
# Register the handler and trigger it immediately
image_select.observe(switch, names=("value",), type="change")
switch(None)
# Show the combined widget
return ipywidgets.HBox([image_select, image_widget, output])

Просмотреть файл

@ -1,13 +1,12 @@
import cv2 import cv2
import ipywidgets
import numpy as np import numpy as np
import os import os
import pathlib import pathlib
import ipywidgets
from tensorflow.keras.models import load_model from tensorflow.keras.models import load_model
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input from tensorflow.keras.applications.mobilenet_v2 import preprocess_input
from tensorflow.keras.preprocessing.image import img_to_array from tensorflow.keras.preprocessing.image import img_to_array
from IPython.display import display
from deepface import DeepFace from deepface import DeepFace
from retinaface import RetinaFace from retinaface import RetinaFace
@ -148,18 +147,6 @@ def wears_mask(face):
return bool(mask > withoutMask) return bool(mask > withoutMask)
class JSONContainer:
"""Expose a Python dictionary as a JSON document in JupyterLab
rich display rendering.
"""
def __init__(self, data={}):
self._data = data
def _repr_json_(self):
return self._data
class NocatchOutput(ipywidgets.Output): class NocatchOutput(ipywidgets.Output):
"""An output container that suppresses output, but not exceptions """An output container that suppresses output, but not exceptions
@ -168,42 +155,3 @@ class NocatchOutput(ipywidgets.Output):
def __exit__(self, *args, **kwargs): def __exit__(self, *args, **kwargs):
super().__exit__(*args, **kwargs) super().__exit__(*args, **kwargs)
def explore_face_recognition(image_paths):
# Create an image selector widget
image_select = ipywidgets.Select(
options=image_paths, layout=ipywidgets.Layout(width="20%"), rows=20
)
# Set up the facial recognition output widget
output = NocatchOutput(layout=ipywidgets.Layout(width="30%"))
# Set up the image selection and display widget
image_widget = ipywidgets.Box(
children=[],
layout=ipywidgets.Layout(width="50%"),
)
# Register the tab switch logic
def switch(_):
# Clear existing output
image_widget.children = ()
output.clear_output()
# Create the new content
image_widget.children = (ipywidgets.Image.from_file(image_select.value),)
# This output widget absorbes print statements that are messing with
# the widget output and cannot be disabled through the API.
with NocatchOutput():
analysis = facial_expression_analysis(image_select.value)
with output:
display(JSONContainer(analysis))
# Register the handler and trigger it immediately
image_select.observe(switch, names=("value",), type="change")
switch(None)
# Show the combined widget
return ipywidgets.HBox([image_select, image_widget, output])

26
misinformation/text.py Обычный файл
Просмотреть файл

@ -0,0 +1,26 @@
from google.cloud import vision
import io
def detect_text(path):
"""Detects text in the file."""
client = vision.ImageAnnotatorClient()
with io.open(path, "rb") as image_file:
content = image_file.read()
image = vision.Image(content=content)
response = client.text_detection(image=image)
texts = response.text_annotations
result = {"text": []}
for text in texts:
result["text"].append(text.description)
if response.error.message:
raise Exception(
"{}\nFor more info on error messages, check: "
"https://cloud.google.com/apis/design/errors".format(response.error.message)
)
return result

Просмотреть файл

@ -28,7 +28,7 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "949d9f00-b129-477a-bc1d-e68fed73af2d", "id": "a2bd2153",
"metadata": {}, "metadata": {},
"source": [ "source": [
"We select a subset of image files to try facial expression detection on. The `find_files` function finds image files within a given directory:" "We select a subset of image files to try facial expression detection on. The `find_files` function finds image files within a given directory:"
@ -77,8 +77,16 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"misinformation.explore_face_recognition(images)" "misinformation.explore_analysis(images, identify=\"faces\")"
] ]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b99f3f1d",
"metadata": {},
"outputs": [],
"source": []
} }
], ],
"metadata": { "metadata": {
@ -97,7 +105,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.10.5" "version": "3.10.4"
} }
}, },
"nbformat": 4, "nbformat": 4,

414
notebooks/get-text-from-image.ipynb Обычный файл
Просмотреть файл

@ -0,0 +1,414 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "dcaa3da1",
"metadata": {},
"source": [
"# Notebook for text extraction on image\n",
"Inga Ulusoy, SSC, July 2022"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "cf362e60",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"from IPython.display import Image, display\n",
"import misinformation"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6da3a7aa",
"metadata": {},
"outputs": [],
"source": [
"images = misinformation.find_files(limit=1000)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "bf811ce0",
"metadata": {},
"outputs": [],
"source": [
"for i in images:\n",
" display(Image(filename=i))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "366e2060",
"metadata": {},
"outputs": [],
"source": [
"# start with only English\n",
"mysubfiles = [i for i in images if \"eng\" in i]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b330b267",
"metadata": {},
"outputs": [],
"source": [
"for i in mysubfiles:\n",
" display(Image(filename=i))"
]
},
{
"cell_type": "markdown",
"id": "07b7a7a3",
"metadata": {},
"source": [
"# Pre-process the images: Convert to greyscale and increase contrast"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4cacfb0d",
"metadata": {},
"outputs": [],
"source": [
"import cv2\n",
"from matplotlib import pyplot as plt\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c94b02cb-9e96-4812-8448-8bc731bfd8aa",
"metadata": {},
"outputs": [],
"source": [
"! pip install matplotlib\n",
"! pip install numpy"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8a2d3057",
"metadata": {},
"outputs": [],
"source": [
"def preprocess(filename):\n",
" \"\"\"Preprocess the image to enhance features for extraction.\"\"\"\n",
" image = cv2.imread(filename)\n",
" # preserve the original image\n",
" original = image.copy()\n",
" # Grayscale, Gaussian blur, Otsu's threshold\n",
" gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)\n",
" # sharpen contrast by first smoothing and then substracting the smoothed and thresholded version\n",
" sharpened = unsharp_mask(gray, amount=1.1, threshold=0.1)\n",
" inverted = invert_image(sharpened)\n",
" return gray, sharpened, inverted\n",
"\n",
"\n",
"# use unsharp mask algorithm from opencv\n",
"# https://docs.opencv.org/4.x/d1/d10/classcv_1_1MatExpr.html#details\n",
"def unsharp_mask(image, kernel_size=(5, 5), sigma=1.0, amount=1.0, threshold=0):\n",
" \"\"\"Return a sharpened version of the image, using an unsharp mask.\n",
" Amount: 1 is neutral, higher values result in shaprer images. threshold is the value below which the difference between blurred and original image gets discarded.\"\"\"\n",
" blurred = cv2.GaussianBlur(image, kernel_size, sigma)\n",
" sharpened = float(amount + 1) * image - float(amount) * blurred\n",
" sharpened = np.maximum(sharpened, np.zeros(sharpened.shape))\n",
" sharpened = np.minimum(sharpened, 255 * np.ones(sharpened.shape))\n",
" sharpened = sharpened.round().astype(np.uint8)\n",
" if threshold > 0:\n",
" low_contrast_mask = np.absolute(image - blurred) < threshold\n",
" np.copyto(sharpened, image, where=low_contrast_mask)\n",
" return sharpened\n",
"\n",
"\n",
"def invert_image(image):\n",
" return cv2.bitwise_not(image)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a483868b",
"metadata": {},
"outputs": [],
"source": [
"grey_image = []\n",
"for i in mysubfiles:\n",
" grey_image.append(preprocess(i))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "08ed750d",
"metadata": {},
"outputs": [],
"source": [
"for image in grey_image:\n",
" # disable default colormap in imshow\n",
" plt.imshow(image[0], cmap=\"gray\", vmin=0, vmax=255)\n",
" plt.imshow(image[1], cmap=\"gray\", vmin=0, vmax=255)\n",
" plt.show()"
]
},
{
"cell_type": "markdown",
"id": "71ac2229",
"metadata": {},
"source": [
"mabe further preprocess in cropping out text regions..?"
]
},
{
"cell_type": "markdown",
"id": "7786d09c",
"metadata": {},
"source": [
"# Try out different libraries\n",
"## The standard go-to tool that is slightly complicated: pytesseract\n",
"Install tesseract and the language libraries:\n",
"```\n",
"sudo apt install tesseract-ocr \n",
"sudo apt install tesseract-ocr-all \n",
"sudo apt install imagemagick \n",
"``` "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0d69504c",
"metadata": {},
"outputs": [],
"source": [
"from pytesseract import pytesseract\n",
"\n",
"pytesseract.tesseract_cmd = r\"tesseract\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0b27c98c-b437-4c8b-8844-96d8718eea49",
"metadata": {},
"outputs": [],
"source": [
"! pip install pytesseract"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "529de8d8",
"metadata": {},
"outputs": [],
"source": [
"myimage = grey_image[1]\n",
"plt.imshow(myimage[0], cmap=\"gray\", vmin=0, vmax=255)\n",
"plt.show()\n",
"\n",
"plt.imshow(myimage[1], cmap=\"gray\", vmin=0, vmax=255)\n",
"plt.show()\n",
"\n",
"plt.imshow(myimage[2], cmap=\"gray\", vmin=0, vmax=255)\n",
"plt.show()\n",
"\n",
"text = pytesseract.image_to_string(myimage[0])\n",
"print(text)\n",
"text = pytesseract.image_to_string(myimage[1])\n",
"print(text)\n",
"text = pytesseract.image_to_string(myimage[2])\n",
"print(text)"
]
},
{
"cell_type": "markdown",
"id": "e25dd39e",
"metadata": {},
"source": [
"Here we probably would need to compare extractopm from different preprocessed images and overlay in a final text."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "36495f3f",
"metadata": {},
"outputs": [],
"source": [
"for image in mysubfiles:\n",
" # Loading image using OpenCV\n",
" img = cv2.imread(image)\n",
"\n",
" # Preprocessing image\n",
" # Converting to grayscale\n",
" gray_image = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)\n",
"\n",
" # creating Binary image by selecting proper threshold\n",
" binary_image = cv2.threshold(\n",
" gray_image, 130, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU\n",
" )[1]\n",
"\n",
" # Inverting the image\n",
" inverted_bin = cv2.bitwise_not(binary_image)\n",
"\n",
" # Some noise reduction\n",
" kernel = np.ones((2, 2), np.uint8)\n",
" processed_img = cv2.erode(inverted_bin, kernel, iterations=1)\n",
" processed_img = cv2.dilate(processed_img, kernel, iterations=1)\n",
"\n",
" # Applying image_to_string method\n",
" text = pytesseract.image_to_string(processed_img)\n",
" plt.imshow(processed_img, cmap=\"gray\", vmin=0, vmax=255)\n",
" plt.show()\n",
" print(text)"
]
},
{
"cell_type": "markdown",
"id": "d6532019",
"metadata": {},
"source": [
"## keras-ocr\n",
"Not sure how to create an image object without a url.\n",
"https://keras-ocr.readthedocs.io/en/latest/examples/using_pretrained_models.html"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "44e38871",
"metadata": {},
"outputs": [],
"source": [
"import keras_ocr\n",
"\n",
"pipeline = keras_ocr.pipeline.Pipeline()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2bb55068-ddd4-4b90-ae94-90181980d3c0",
"metadata": {},
"outputs": [],
"source": [
"! pip install keras-ocr"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0002f2c4",
"metadata": {},
"outputs": [],
"source": [
"images = [\n",
" keras_ocr.tools.read(url)\n",
" for url in [\n",
" \"https://storage.googleapis.com/gcptutorials.com/examples/keras-ocr-img-1.jpg\",\n",
" \"https://storage.googleapis.com/gcptutorials.com/examples/keras-ocr-img-2.png\",\n",
" ]\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1567dc85",
"metadata": {},
"outputs": [],
"source": [
"prediction_groups = pipeline.recognize(images)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fb1ca152",
"metadata": {},
"outputs": [],
"source": [
"predicted_image_1 = prediction_groups[0]\n",
"for text, box in predicted_image_1:\n",
" print(text)"
]
},
{
"cell_type": "markdown",
"id": "7b8b929f",
"metadata": {},
"source": [
"## google cloud vision API\n",
"First 1000 images per month are free."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7c6ecc88",
"metadata": {},
"outputs": [],
"source": [
"os.environ[\n",
" \"GOOGLE_APPLICATION_CREDENTIALS\"\n",
"] = \"/home/inga/projects/misinformation-project/misinformation-notes/seismic-bonfire-329406-412821a70264.json\"\n",
"images = mysubfiles[1:5]\n",
"misinformation.explore_analysis(images, identify=\"text-on-image\")"
]
},
{
"cell_type": "markdown",
"id": "d54407ad",
"metadata": {},
"source": [
"## MS Azure\n",
"https://docs.microsoft.com/en-us/azure/search/cognitive-search-concept-image-scenarios"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "365c78b1-7ff4-4213-86fa-6a0a2d05198f",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.4"
},
"vscode": {
"interpreter": {
"hash": "e7370f93d1d0cde622a1f8e1c04877d8463912d04d973331ad4851f04de6915a"
}
}
},
"nbformat": 4,
"nbformat_minor": 5
}

Просмотреть файл

@ -25,6 +25,7 @@ dependencies = [
"ipywidgets ==8.0.0rc1", "ipywidgets ==8.0.0rc1",
"pooch", "pooch",
"retina-face", "retina-face",
"google-cloud-vision",
] ]
[project.scripts] [project.scripts]

10
requirements.txt Обычный файл
Просмотреть файл

@ -0,0 +1,10 @@
deepface
ipywidgets==8.0.0rc1
pooch
retina-face
opencv-python
matplotlib
numpy
keras-ocr
tensorflow
google-cloud-vision