зеркало из
https://github.com/ssciwr/AMMICO.git
synced 2025-10-29 13:06:04 +02:00
Text on image 1 (#4)
* read image into nb * test * added keras-ocr and google vision * google cloud vision by far the best * setting up docker for text 1 * move widgets and analysis to display module * move widgets and analysis to display module - 2 * text on image through widgets
Этот коммит содержится в:
родитель
7aa1247766
Коммит
446da693e3
3
.gitignore
поставляемый
3
.gitignore
поставляемый
@ -129,3 +129,6 @@ dmypy.json
|
|||||||
|
|
||||||
# Pyre type checker
|
# Pyre type checker
|
||||||
.pyre/
|
.pyre/
|
||||||
|
|
||||||
|
# data folder
|
||||||
|
data/
|
||||||
@ -2,7 +2,7 @@ FROM jupyter/base-notebook:2022-06-06
|
|||||||
|
|
||||||
# Install system dependencies for computer vision packages
|
# Install system dependencies for computer vision packages
|
||||||
USER root
|
USER root
|
||||||
RUN apt update && apt install -y libgl1 libglib2.0-0 libsm6 libxrender1 libxext6
|
RUN apt update && apt install -y gcc libgl1 libglib2.0-0 libsm6 libxrender1 libxext6 tesseract-ocr
|
||||||
USER $NB_USER
|
USER $NB_USER
|
||||||
|
|
||||||
# Copy the repository into the container
|
# Copy the repository into the container
|
||||||
|
|||||||
@ -5,5 +5,5 @@ from importlib import metadata
|
|||||||
__version__ = metadata.version(__package__)
|
__version__ = metadata.version(__package__)
|
||||||
del metadata
|
del metadata
|
||||||
|
|
||||||
from misinformation.faces import explore_face_recognition
|
from misinformation.display import explore_analysis
|
||||||
from misinformation.utils import find_files
|
from misinformation.utils import find_files
|
||||||
|
|||||||
61
misinformation/display.py
Обычный файл
61
misinformation/display.py
Обычный файл
@ -0,0 +1,61 @@
|
|||||||
|
import ipywidgets
|
||||||
|
from IPython.display import display
|
||||||
|
|
||||||
|
import misinformation.faces as faces
|
||||||
|
import misinformation.text as text
|
||||||
|
|
||||||
|
|
||||||
|
class JSONContainer:
|
||||||
|
"""Expose a Python dictionary as a JSON document in JupyterLab
|
||||||
|
rich display rendering.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, data={}):
|
||||||
|
self._data = data
|
||||||
|
|
||||||
|
def _repr_json_(self):
|
||||||
|
return self._data
|
||||||
|
|
||||||
|
|
||||||
|
def explore_analysis(image_paths, identify="faces"):
|
||||||
|
# dictionary mapping the type of analysis to be explored
|
||||||
|
identify_dict = {
|
||||||
|
"faces": faces.facial_expression_analysis,
|
||||||
|
"text-on-image": text.detect_text,
|
||||||
|
}
|
||||||
|
# Create an image selector widget
|
||||||
|
image_select = ipywidgets.Select(
|
||||||
|
options=image_paths, layout=ipywidgets.Layout(width="20%"), rows=20
|
||||||
|
)
|
||||||
|
|
||||||
|
# Set up the facial recognition output widget
|
||||||
|
output = ipywidgets.Output(layout=ipywidgets.Layout(width="30%"))
|
||||||
|
|
||||||
|
# Set up the image selection and display widget
|
||||||
|
image_widget = ipywidgets.Box(
|
||||||
|
children=[],
|
||||||
|
layout=ipywidgets.Layout(width="50%"),
|
||||||
|
)
|
||||||
|
|
||||||
|
# Register the tab switch logic
|
||||||
|
def switch(_):
|
||||||
|
# Clear existing output
|
||||||
|
image_widget.children = ()
|
||||||
|
output.clear_output()
|
||||||
|
|
||||||
|
# Create the new content
|
||||||
|
image_widget.children = (ipywidgets.Image.from_file(image_select.value),)
|
||||||
|
|
||||||
|
# This output widget absorbes print statements that are messing with
|
||||||
|
# the widget output and cannot be disabled through the API.
|
||||||
|
with faces.NocatchOutput():
|
||||||
|
analysis = identify_dict[identify](image_select.value)
|
||||||
|
with output:
|
||||||
|
display(JSONContainer(analysis))
|
||||||
|
|
||||||
|
# Register the handler and trigger it immediately
|
||||||
|
image_select.observe(switch, names=("value",), type="change")
|
||||||
|
switch(None)
|
||||||
|
|
||||||
|
# Show the combined widget
|
||||||
|
return ipywidgets.HBox([image_select, image_widget, output])
|
||||||
@ -1,13 +1,12 @@
|
|||||||
import cv2
|
import cv2
|
||||||
import ipywidgets
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import os
|
import os
|
||||||
import pathlib
|
import pathlib
|
||||||
|
import ipywidgets
|
||||||
|
|
||||||
from tensorflow.keras.models import load_model
|
from tensorflow.keras.models import load_model
|
||||||
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input
|
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input
|
||||||
from tensorflow.keras.preprocessing.image import img_to_array
|
from tensorflow.keras.preprocessing.image import img_to_array
|
||||||
from IPython.display import display
|
|
||||||
from deepface import DeepFace
|
from deepface import DeepFace
|
||||||
from retinaface import RetinaFace
|
from retinaface import RetinaFace
|
||||||
|
|
||||||
@ -148,18 +147,6 @@ def wears_mask(face):
|
|||||||
return bool(mask > withoutMask)
|
return bool(mask > withoutMask)
|
||||||
|
|
||||||
|
|
||||||
class JSONContainer:
|
|
||||||
"""Expose a Python dictionary as a JSON document in JupyterLab
|
|
||||||
rich display rendering.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, data={}):
|
|
||||||
self._data = data
|
|
||||||
|
|
||||||
def _repr_json_(self):
|
|
||||||
return self._data
|
|
||||||
|
|
||||||
|
|
||||||
class NocatchOutput(ipywidgets.Output):
|
class NocatchOutput(ipywidgets.Output):
|
||||||
"""An output container that suppresses output, but not exceptions
|
"""An output container that suppresses output, but not exceptions
|
||||||
|
|
||||||
@ -168,42 +155,3 @@ class NocatchOutput(ipywidgets.Output):
|
|||||||
|
|
||||||
def __exit__(self, *args, **kwargs):
|
def __exit__(self, *args, **kwargs):
|
||||||
super().__exit__(*args, **kwargs)
|
super().__exit__(*args, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
def explore_face_recognition(image_paths):
|
|
||||||
# Create an image selector widget
|
|
||||||
image_select = ipywidgets.Select(
|
|
||||||
options=image_paths, layout=ipywidgets.Layout(width="20%"), rows=20
|
|
||||||
)
|
|
||||||
|
|
||||||
# Set up the facial recognition output widget
|
|
||||||
output = NocatchOutput(layout=ipywidgets.Layout(width="30%"))
|
|
||||||
|
|
||||||
# Set up the image selection and display widget
|
|
||||||
image_widget = ipywidgets.Box(
|
|
||||||
children=[],
|
|
||||||
layout=ipywidgets.Layout(width="50%"),
|
|
||||||
)
|
|
||||||
|
|
||||||
# Register the tab switch logic
|
|
||||||
def switch(_):
|
|
||||||
# Clear existing output
|
|
||||||
image_widget.children = ()
|
|
||||||
output.clear_output()
|
|
||||||
|
|
||||||
# Create the new content
|
|
||||||
image_widget.children = (ipywidgets.Image.from_file(image_select.value),)
|
|
||||||
|
|
||||||
# This output widget absorbes print statements that are messing with
|
|
||||||
# the widget output and cannot be disabled through the API.
|
|
||||||
with NocatchOutput():
|
|
||||||
analysis = facial_expression_analysis(image_select.value)
|
|
||||||
with output:
|
|
||||||
display(JSONContainer(analysis))
|
|
||||||
|
|
||||||
# Register the handler and trigger it immediately
|
|
||||||
image_select.observe(switch, names=("value",), type="change")
|
|
||||||
switch(None)
|
|
||||||
|
|
||||||
# Show the combined widget
|
|
||||||
return ipywidgets.HBox([image_select, image_widget, output])
|
|
||||||
|
|||||||
26
misinformation/text.py
Обычный файл
26
misinformation/text.py
Обычный файл
@ -0,0 +1,26 @@
|
|||||||
|
from google.cloud import vision
|
||||||
|
import io
|
||||||
|
|
||||||
|
|
||||||
|
def detect_text(path):
|
||||||
|
"""Detects text in the file."""
|
||||||
|
|
||||||
|
client = vision.ImageAnnotatorClient()
|
||||||
|
|
||||||
|
with io.open(path, "rb") as image_file:
|
||||||
|
content = image_file.read()
|
||||||
|
|
||||||
|
image = vision.Image(content=content)
|
||||||
|
|
||||||
|
response = client.text_detection(image=image)
|
||||||
|
texts = response.text_annotations
|
||||||
|
result = {"text": []}
|
||||||
|
for text in texts:
|
||||||
|
result["text"].append(text.description)
|
||||||
|
|
||||||
|
if response.error.message:
|
||||||
|
raise Exception(
|
||||||
|
"{}\nFor more info on error messages, check: "
|
||||||
|
"https://cloud.google.com/apis/design/errors".format(response.error.message)
|
||||||
|
)
|
||||||
|
return result
|
||||||
@ -28,7 +28,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"id": "949d9f00-b129-477a-bc1d-e68fed73af2d",
|
"id": "a2bd2153",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"We select a subset of image files to try facial expression detection on. The `find_files` function finds image files within a given directory:"
|
"We select a subset of image files to try facial expression detection on. The `find_files` function finds image files within a given directory:"
|
||||||
@ -77,8 +77,16 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"misinformation.explore_face_recognition(images)"
|
"misinformation.explore_analysis(images, identify=\"faces\")"
|
||||||
]
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "b99f3f1d",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
@ -97,7 +105,7 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.10.5"
|
"version": "3.10.4"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
|
|||||||
414
notebooks/get-text-from-image.ipynb
Обычный файл
414
notebooks/get-text-from-image.ipynb
Обычный файл
@ -0,0 +1,414 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "dcaa3da1",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Notebook for text extraction on image\n",
|
||||||
|
"Inga Ulusoy, SSC, July 2022"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "cf362e60",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import os\n",
|
||||||
|
"from IPython.display import Image, display\n",
|
||||||
|
"import misinformation"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "6da3a7aa",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"images = misinformation.find_files(limit=1000)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "bf811ce0",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"for i in images:\n",
|
||||||
|
" display(Image(filename=i))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "366e2060",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# start with only English\n",
|
||||||
|
"mysubfiles = [i for i in images if \"eng\" in i]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "b330b267",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"for i in mysubfiles:\n",
|
||||||
|
" display(Image(filename=i))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "07b7a7a3",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Pre-process the images: Convert to greyscale and increase contrast"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "4cacfb0d",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import cv2\n",
|
||||||
|
"from matplotlib import pyplot as plt\n",
|
||||||
|
"import numpy as np"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "c94b02cb-9e96-4812-8448-8bc731bfd8aa",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"! pip install matplotlib\n",
|
||||||
|
"! pip install numpy"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "8a2d3057",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"def preprocess(filename):\n",
|
||||||
|
" \"\"\"Preprocess the image to enhance features for extraction.\"\"\"\n",
|
||||||
|
" image = cv2.imread(filename)\n",
|
||||||
|
" # preserve the original image\n",
|
||||||
|
" original = image.copy()\n",
|
||||||
|
" # Grayscale, Gaussian blur, Otsu's threshold\n",
|
||||||
|
" gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)\n",
|
||||||
|
" # sharpen contrast by first smoothing and then substracting the smoothed and thresholded version\n",
|
||||||
|
" sharpened = unsharp_mask(gray, amount=1.1, threshold=0.1)\n",
|
||||||
|
" inverted = invert_image(sharpened)\n",
|
||||||
|
" return gray, sharpened, inverted\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"# use unsharp mask algorithm from opencv\n",
|
||||||
|
"# https://docs.opencv.org/4.x/d1/d10/classcv_1_1MatExpr.html#details\n",
|
||||||
|
"def unsharp_mask(image, kernel_size=(5, 5), sigma=1.0, amount=1.0, threshold=0):\n",
|
||||||
|
" \"\"\"Return a sharpened version of the image, using an unsharp mask.\n",
|
||||||
|
" Amount: 1 is neutral, higher values result in shaprer images. threshold is the value below which the difference between blurred and original image gets discarded.\"\"\"\n",
|
||||||
|
" blurred = cv2.GaussianBlur(image, kernel_size, sigma)\n",
|
||||||
|
" sharpened = float(amount + 1) * image - float(amount) * blurred\n",
|
||||||
|
" sharpened = np.maximum(sharpened, np.zeros(sharpened.shape))\n",
|
||||||
|
" sharpened = np.minimum(sharpened, 255 * np.ones(sharpened.shape))\n",
|
||||||
|
" sharpened = sharpened.round().astype(np.uint8)\n",
|
||||||
|
" if threshold > 0:\n",
|
||||||
|
" low_contrast_mask = np.absolute(image - blurred) < threshold\n",
|
||||||
|
" np.copyto(sharpened, image, where=low_contrast_mask)\n",
|
||||||
|
" return sharpened\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"def invert_image(image):\n",
|
||||||
|
" return cv2.bitwise_not(image)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "a483868b",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"grey_image = []\n",
|
||||||
|
"for i in mysubfiles:\n",
|
||||||
|
" grey_image.append(preprocess(i))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "08ed750d",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"for image in grey_image:\n",
|
||||||
|
" # disable default colormap in imshow\n",
|
||||||
|
" plt.imshow(image[0], cmap=\"gray\", vmin=0, vmax=255)\n",
|
||||||
|
" plt.imshow(image[1], cmap=\"gray\", vmin=0, vmax=255)\n",
|
||||||
|
" plt.show()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "71ac2229",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"mabe further preprocess in cropping out text regions..?"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "7786d09c",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Try out different libraries\n",
|
||||||
|
"## The standard go-to tool that is slightly complicated: pytesseract\n",
|
||||||
|
"Install tesseract and the language libraries:\n",
|
||||||
|
"```\n",
|
||||||
|
"sudo apt install tesseract-ocr \n",
|
||||||
|
"sudo apt install tesseract-ocr-all \n",
|
||||||
|
"sudo apt install imagemagick \n",
|
||||||
|
"``` "
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "0d69504c",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from pytesseract import pytesseract\n",
|
||||||
|
"\n",
|
||||||
|
"pytesseract.tesseract_cmd = r\"tesseract\""
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "0b27c98c-b437-4c8b-8844-96d8718eea49",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"! pip install pytesseract"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "529de8d8",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"myimage = grey_image[1]\n",
|
||||||
|
"plt.imshow(myimage[0], cmap=\"gray\", vmin=0, vmax=255)\n",
|
||||||
|
"plt.show()\n",
|
||||||
|
"\n",
|
||||||
|
"plt.imshow(myimage[1], cmap=\"gray\", vmin=0, vmax=255)\n",
|
||||||
|
"plt.show()\n",
|
||||||
|
"\n",
|
||||||
|
"plt.imshow(myimage[2], cmap=\"gray\", vmin=0, vmax=255)\n",
|
||||||
|
"plt.show()\n",
|
||||||
|
"\n",
|
||||||
|
"text = pytesseract.image_to_string(myimage[0])\n",
|
||||||
|
"print(text)\n",
|
||||||
|
"text = pytesseract.image_to_string(myimage[1])\n",
|
||||||
|
"print(text)\n",
|
||||||
|
"text = pytesseract.image_to_string(myimage[2])\n",
|
||||||
|
"print(text)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "e25dd39e",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Here we probably would need to compare extractopm from different preprocessed images and overlay in a final text."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "36495f3f",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"for image in mysubfiles:\n",
|
||||||
|
" # Loading image using OpenCV\n",
|
||||||
|
" img = cv2.imread(image)\n",
|
||||||
|
"\n",
|
||||||
|
" # Preprocessing image\n",
|
||||||
|
" # Converting to grayscale\n",
|
||||||
|
" gray_image = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)\n",
|
||||||
|
"\n",
|
||||||
|
" # creating Binary image by selecting proper threshold\n",
|
||||||
|
" binary_image = cv2.threshold(\n",
|
||||||
|
" gray_image, 130, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU\n",
|
||||||
|
" )[1]\n",
|
||||||
|
"\n",
|
||||||
|
" # Inverting the image\n",
|
||||||
|
" inverted_bin = cv2.bitwise_not(binary_image)\n",
|
||||||
|
"\n",
|
||||||
|
" # Some noise reduction\n",
|
||||||
|
" kernel = np.ones((2, 2), np.uint8)\n",
|
||||||
|
" processed_img = cv2.erode(inverted_bin, kernel, iterations=1)\n",
|
||||||
|
" processed_img = cv2.dilate(processed_img, kernel, iterations=1)\n",
|
||||||
|
"\n",
|
||||||
|
" # Applying image_to_string method\n",
|
||||||
|
" text = pytesseract.image_to_string(processed_img)\n",
|
||||||
|
" plt.imshow(processed_img, cmap=\"gray\", vmin=0, vmax=255)\n",
|
||||||
|
" plt.show()\n",
|
||||||
|
" print(text)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "d6532019",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## keras-ocr\n",
|
||||||
|
"Not sure how to create an image object without a url.\n",
|
||||||
|
"https://keras-ocr.readthedocs.io/en/latest/examples/using_pretrained_models.html"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "44e38871",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import keras_ocr\n",
|
||||||
|
"\n",
|
||||||
|
"pipeline = keras_ocr.pipeline.Pipeline()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "2bb55068-ddd4-4b90-ae94-90181980d3c0",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"! pip install keras-ocr"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "0002f2c4",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"images = [\n",
|
||||||
|
" keras_ocr.tools.read(url)\n",
|
||||||
|
" for url in [\n",
|
||||||
|
" \"https://storage.googleapis.com/gcptutorials.com/examples/keras-ocr-img-1.jpg\",\n",
|
||||||
|
" \"https://storage.googleapis.com/gcptutorials.com/examples/keras-ocr-img-2.png\",\n",
|
||||||
|
" ]\n",
|
||||||
|
"]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "1567dc85",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"prediction_groups = pipeline.recognize(images)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "fb1ca152",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"predicted_image_1 = prediction_groups[0]\n",
|
||||||
|
"for text, box in predicted_image_1:\n",
|
||||||
|
" print(text)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "7b8b929f",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## google cloud vision API\n",
|
||||||
|
"First 1000 images per month are free."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "7c6ecc88",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"os.environ[\n",
|
||||||
|
" \"GOOGLE_APPLICATION_CREDENTIALS\"\n",
|
||||||
|
"] = \"/home/inga/projects/misinformation-project/misinformation-notes/seismic-bonfire-329406-412821a70264.json\"\n",
|
||||||
|
"images = mysubfiles[1:5]\n",
|
||||||
|
"misinformation.explore_analysis(images, identify=\"text-on-image\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "d54407ad",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## MS Azure\n",
|
||||||
|
"https://docs.microsoft.com/en-us/azure/search/cognitive-search-concept-image-scenarios"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "365c78b1-7ff4-4213-86fa-6a0a2d05198f",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3 (ipykernel)",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.10.4"
|
||||||
|
},
|
||||||
|
"vscode": {
|
||||||
|
"interpreter": {
|
||||||
|
"hash": "e7370f93d1d0cde622a1f8e1c04877d8463912d04d973331ad4851f04de6915a"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 5
|
||||||
|
}
|
||||||
@ -25,6 +25,7 @@ dependencies = [
|
|||||||
"ipywidgets ==8.0.0rc1",
|
"ipywidgets ==8.0.0rc1",
|
||||||
"pooch",
|
"pooch",
|
||||||
"retina-face",
|
"retina-face",
|
||||||
|
"google-cloud-vision",
|
||||||
]
|
]
|
||||||
|
|
||||||
[project.scripts]
|
[project.scripts]
|
||||||
|
|||||||
10
requirements.txt
Обычный файл
10
requirements.txt
Обычный файл
@ -0,0 +1,10 @@
|
|||||||
|
deepface
|
||||||
|
ipywidgets==8.0.0rc1
|
||||||
|
pooch
|
||||||
|
retina-face
|
||||||
|
opencv-python
|
||||||
|
matplotlib
|
||||||
|
numpy
|
||||||
|
keras-ocr
|
||||||
|
tensorflow
|
||||||
|
google-cloud-vision
|
||||||
Загрузка…
x
Ссылка в новой задаче
Block a user