diff --git a/.gitignore b/.gitignore index 15c61c8..11ec5ce 100644 --- a/.gitignore +++ b/.gitignore @@ -129,3 +129,6 @@ dmypy.json # Pyre type checker .pyre/ + +# data folder +data/ \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 1f6b817..d6df14c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,7 +2,7 @@ FROM jupyter/base-notebook:2022-06-06 # Install system dependencies for computer vision packages USER root -RUN apt update && apt install -y libgl1 libglib2.0-0 libsm6 libxrender1 libxext6 +RUN apt update && apt install -y gcc libgl1 libglib2.0-0 libsm6 libxrender1 libxext6 tesseract-ocr USER $NB_USER # Copy the repository into the container diff --git a/misinformation/__init__.py b/misinformation/__init__.py index 523a655..1827151 100644 --- a/misinformation/__init__.py +++ b/misinformation/__init__.py @@ -5,5 +5,5 @@ from importlib import metadata __version__ = metadata.version(__package__) del metadata -from misinformation.faces import explore_face_recognition +from misinformation.display import explore_analysis from misinformation.utils import find_files diff --git a/misinformation/display.py b/misinformation/display.py new file mode 100644 index 0000000..587b62d --- /dev/null +++ b/misinformation/display.py @@ -0,0 +1,61 @@ +import ipywidgets +from IPython.display import display + +import misinformation.faces as faces +import misinformation.text as text + + +class JSONContainer: + """Expose a Python dictionary as a JSON document in JupyterLab + rich display rendering. + """ + + def __init__(self, data={}): + self._data = data + + def _repr_json_(self): + return self._data + + +def explore_analysis(image_paths, identify="faces"): + # dictionary mapping the type of analysis to be explored + identify_dict = { + "faces": faces.facial_expression_analysis, + "text-on-image": text.detect_text, + } + # Create an image selector widget + image_select = ipywidgets.Select( + options=image_paths, layout=ipywidgets.Layout(width="20%"), rows=20 + ) + + # Set up the facial recognition output widget + output = ipywidgets.Output(layout=ipywidgets.Layout(width="30%")) + + # Set up the image selection and display widget + image_widget = ipywidgets.Box( + children=[], + layout=ipywidgets.Layout(width="50%"), + ) + + # Register the tab switch logic + def switch(_): + # Clear existing output + image_widget.children = () + output.clear_output() + + # Create the new content + image_widget.children = (ipywidgets.Image.from_file(image_select.value),) + + # This output widget absorbes print statements that are messing with + # the widget output and cannot be disabled through the API. + with faces.NocatchOutput(): + analysis = identify_dict[identify](image_select.value) + with output: + display(JSONContainer(analysis)) + + # Register the handler and trigger it immediately + image_select.observe(switch, names=("value",), type="change") + switch(None) + + # Show the combined widget + return ipywidgets.HBox([image_select, image_widget, output]) diff --git a/misinformation/faces.py b/misinformation/faces.py index cddc383..c5560c1 100644 --- a/misinformation/faces.py +++ b/misinformation/faces.py @@ -1,13 +1,12 @@ import cv2 -import ipywidgets import numpy as np import os import pathlib +import ipywidgets from tensorflow.keras.models import load_model from tensorflow.keras.applications.mobilenet_v2 import preprocess_input from tensorflow.keras.preprocessing.image import img_to_array -from IPython.display import display from deepface import DeepFace from retinaface import RetinaFace @@ -148,18 +147,6 @@ def wears_mask(face): return bool(mask > withoutMask) -class JSONContainer: - """Expose a Python dictionary as a JSON document in JupyterLab - rich display rendering. - """ - - def __init__(self, data={}): - self._data = data - - def _repr_json_(self): - return self._data - - class NocatchOutput(ipywidgets.Output): """An output container that suppresses output, but not exceptions @@ -168,42 +155,3 @@ class NocatchOutput(ipywidgets.Output): def __exit__(self, *args, **kwargs): super().__exit__(*args, **kwargs) - - -def explore_face_recognition(image_paths): - # Create an image selector widget - image_select = ipywidgets.Select( - options=image_paths, layout=ipywidgets.Layout(width="20%"), rows=20 - ) - - # Set up the facial recognition output widget - output = NocatchOutput(layout=ipywidgets.Layout(width="30%")) - - # Set up the image selection and display widget - image_widget = ipywidgets.Box( - children=[], - layout=ipywidgets.Layout(width="50%"), - ) - - # Register the tab switch logic - def switch(_): - # Clear existing output - image_widget.children = () - output.clear_output() - - # Create the new content - image_widget.children = (ipywidgets.Image.from_file(image_select.value),) - - # This output widget absorbes print statements that are messing with - # the widget output and cannot be disabled through the API. - with NocatchOutput(): - analysis = facial_expression_analysis(image_select.value) - with output: - display(JSONContainer(analysis)) - - # Register the handler and trigger it immediately - image_select.observe(switch, names=("value",), type="change") - switch(None) - - # Show the combined widget - return ipywidgets.HBox([image_select, image_widget, output]) diff --git a/misinformation/text.py b/misinformation/text.py new file mode 100644 index 0000000..bf71d14 --- /dev/null +++ b/misinformation/text.py @@ -0,0 +1,26 @@ +from google.cloud import vision +import io + + +def detect_text(path): + """Detects text in the file.""" + + client = vision.ImageAnnotatorClient() + + with io.open(path, "rb") as image_file: + content = image_file.read() + + image = vision.Image(content=content) + + response = client.text_detection(image=image) + texts = response.text_annotations + result = {"text": []} + for text in texts: + result["text"].append(text.description) + + if response.error.message: + raise Exception( + "{}\nFor more info on error messages, check: " + "https://cloud.google.com/apis/design/errors".format(response.error.message) + ) + return result diff --git a/notebooks/facial_expressions.ipynb b/notebooks/facial_expressions.ipynb index 3262f0a..0485055 100644 --- a/notebooks/facial_expressions.ipynb +++ b/notebooks/facial_expressions.ipynb @@ -28,7 +28,7 @@ }, { "cell_type": "markdown", - "id": "949d9f00-b129-477a-bc1d-e68fed73af2d", + "id": "a2bd2153", "metadata": {}, "source": [ "We select a subset of image files to try facial expression detection on. The `find_files` function finds image files within a given directory:" @@ -77,8 +77,16 @@ "metadata": {}, "outputs": [], "source": [ - "misinformation.explore_face_recognition(images)" + "misinformation.explore_analysis(images, identify=\"faces\")" ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b99f3f1d", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { @@ -97,7 +105,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.5" + "version": "3.10.4" } }, "nbformat": 4, diff --git a/notebooks/get-text-from-image.ipynb b/notebooks/get-text-from-image.ipynb new file mode 100644 index 0000000..d69f1df --- /dev/null +++ b/notebooks/get-text-from-image.ipynb @@ -0,0 +1,414 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "dcaa3da1", + "metadata": {}, + "source": [ + "# Notebook for text extraction on image\n", + "Inga Ulusoy, SSC, July 2022" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cf362e60", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from IPython.display import Image, display\n", + "import misinformation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6da3a7aa", + "metadata": {}, + "outputs": [], + "source": [ + "images = misinformation.find_files(limit=1000)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bf811ce0", + "metadata": {}, + "outputs": [], + "source": [ + "for i in images:\n", + " display(Image(filename=i))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "366e2060", + "metadata": {}, + "outputs": [], + "source": [ + "# start with only English\n", + "mysubfiles = [i for i in images if \"eng\" in i]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b330b267", + "metadata": {}, + "outputs": [], + "source": [ + "for i in mysubfiles:\n", + " display(Image(filename=i))" + ] + }, + { + "cell_type": "markdown", + "id": "07b7a7a3", + "metadata": {}, + "source": [ + "# Pre-process the images: Convert to greyscale and increase contrast" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4cacfb0d", + "metadata": {}, + "outputs": [], + "source": [ + "import cv2\n", + "from matplotlib import pyplot as plt\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c94b02cb-9e96-4812-8448-8bc731bfd8aa", + "metadata": {}, + "outputs": [], + "source": [ + "! pip install matplotlib\n", + "! pip install numpy" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8a2d3057", + "metadata": {}, + "outputs": [], + "source": [ + "def preprocess(filename):\n", + " \"\"\"Preprocess the image to enhance features for extraction.\"\"\"\n", + " image = cv2.imread(filename)\n", + " # preserve the original image\n", + " original = image.copy()\n", + " # Grayscale, Gaussian blur, Otsu's threshold\n", + " gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)\n", + " # sharpen contrast by first smoothing and then substracting the smoothed and thresholded version\n", + " sharpened = unsharp_mask(gray, amount=1.1, threshold=0.1)\n", + " inverted = invert_image(sharpened)\n", + " return gray, sharpened, inverted\n", + "\n", + "\n", + "# use unsharp mask algorithm from opencv\n", + "# https://docs.opencv.org/4.x/d1/d10/classcv_1_1MatExpr.html#details\n", + "def unsharp_mask(image, kernel_size=(5, 5), sigma=1.0, amount=1.0, threshold=0):\n", + " \"\"\"Return a sharpened version of the image, using an unsharp mask.\n", + " Amount: 1 is neutral, higher values result in shaprer images. threshold is the value below which the difference between blurred and original image gets discarded.\"\"\"\n", + " blurred = cv2.GaussianBlur(image, kernel_size, sigma)\n", + " sharpened = float(amount + 1) * image - float(amount) * blurred\n", + " sharpened = np.maximum(sharpened, np.zeros(sharpened.shape))\n", + " sharpened = np.minimum(sharpened, 255 * np.ones(sharpened.shape))\n", + " sharpened = sharpened.round().astype(np.uint8)\n", + " if threshold > 0:\n", + " low_contrast_mask = np.absolute(image - blurred) < threshold\n", + " np.copyto(sharpened, image, where=low_contrast_mask)\n", + " return sharpened\n", + "\n", + "\n", + "def invert_image(image):\n", + " return cv2.bitwise_not(image)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a483868b", + "metadata": {}, + "outputs": [], + "source": [ + "grey_image = []\n", + "for i in mysubfiles:\n", + " grey_image.append(preprocess(i))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "08ed750d", + "metadata": {}, + "outputs": [], + "source": [ + "for image in grey_image:\n", + " # disable default colormap in imshow\n", + " plt.imshow(image[0], cmap=\"gray\", vmin=0, vmax=255)\n", + " plt.imshow(image[1], cmap=\"gray\", vmin=0, vmax=255)\n", + " plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "71ac2229", + "metadata": {}, + "source": [ + "mabe further preprocess in cropping out text regions..?" + ] + }, + { + "cell_type": "markdown", + "id": "7786d09c", + "metadata": {}, + "source": [ + "# Try out different libraries\n", + "## The standard go-to tool that is slightly complicated: pytesseract\n", + "Install tesseract and the language libraries:\n", + "```\n", + "sudo apt install tesseract-ocr \n", + "sudo apt install tesseract-ocr-all \n", + "sudo apt install imagemagick \n", + "``` " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0d69504c", + "metadata": {}, + "outputs": [], + "source": [ + "from pytesseract import pytesseract\n", + "\n", + "pytesseract.tesseract_cmd = r\"tesseract\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0b27c98c-b437-4c8b-8844-96d8718eea49", + "metadata": {}, + "outputs": [], + "source": [ + "! pip install pytesseract" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "529de8d8", + "metadata": {}, + "outputs": [], + "source": [ + "myimage = grey_image[1]\n", + "plt.imshow(myimage[0], cmap=\"gray\", vmin=0, vmax=255)\n", + "plt.show()\n", + "\n", + "plt.imshow(myimage[1], cmap=\"gray\", vmin=0, vmax=255)\n", + "plt.show()\n", + "\n", + "plt.imshow(myimage[2], cmap=\"gray\", vmin=0, vmax=255)\n", + "plt.show()\n", + "\n", + "text = pytesseract.image_to_string(myimage[0])\n", + "print(text)\n", + "text = pytesseract.image_to_string(myimage[1])\n", + "print(text)\n", + "text = pytesseract.image_to_string(myimage[2])\n", + "print(text)" + ] + }, + { + "cell_type": "markdown", + "id": "e25dd39e", + "metadata": {}, + "source": [ + "Here we probably would need to compare extractopm from different preprocessed images and overlay in a final text." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "36495f3f", + "metadata": {}, + "outputs": [], + "source": [ + "for image in mysubfiles:\n", + " # Loading image using OpenCV\n", + " img = cv2.imread(image)\n", + "\n", + " # Preprocessing image\n", + " # Converting to grayscale\n", + " gray_image = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)\n", + "\n", + " # creating Binary image by selecting proper threshold\n", + " binary_image = cv2.threshold(\n", + " gray_image, 130, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU\n", + " )[1]\n", + "\n", + " # Inverting the image\n", + " inverted_bin = cv2.bitwise_not(binary_image)\n", + "\n", + " # Some noise reduction\n", + " kernel = np.ones((2, 2), np.uint8)\n", + " processed_img = cv2.erode(inverted_bin, kernel, iterations=1)\n", + " processed_img = cv2.dilate(processed_img, kernel, iterations=1)\n", + "\n", + " # Applying image_to_string method\n", + " text = pytesseract.image_to_string(processed_img)\n", + " plt.imshow(processed_img, cmap=\"gray\", vmin=0, vmax=255)\n", + " plt.show()\n", + " print(text)" + ] + }, + { + "cell_type": "markdown", + "id": "d6532019", + "metadata": {}, + "source": [ + "## keras-ocr\n", + "Not sure how to create an image object without a url.\n", + "https://keras-ocr.readthedocs.io/en/latest/examples/using_pretrained_models.html" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "44e38871", + "metadata": {}, + "outputs": [], + "source": [ + "import keras_ocr\n", + "\n", + "pipeline = keras_ocr.pipeline.Pipeline()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2bb55068-ddd4-4b90-ae94-90181980d3c0", + "metadata": {}, + "outputs": [], + "source": [ + "! pip install keras-ocr" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0002f2c4", + "metadata": {}, + "outputs": [], + "source": [ + "images = [\n", + " keras_ocr.tools.read(url)\n", + " for url in [\n", + " \"https://storage.googleapis.com/gcptutorials.com/examples/keras-ocr-img-1.jpg\",\n", + " \"https://storage.googleapis.com/gcptutorials.com/examples/keras-ocr-img-2.png\",\n", + " ]\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1567dc85", + "metadata": {}, + "outputs": [], + "source": [ + "prediction_groups = pipeline.recognize(images)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb1ca152", + "metadata": {}, + "outputs": [], + "source": [ + "predicted_image_1 = prediction_groups[0]\n", + "for text, box in predicted_image_1:\n", + " print(text)" + ] + }, + { + "cell_type": "markdown", + "id": "7b8b929f", + "metadata": {}, + "source": [ + "## google cloud vision API\n", + "First 1000 images per month are free." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7c6ecc88", + "metadata": {}, + "outputs": [], + "source": [ + "os.environ[\n", + " \"GOOGLE_APPLICATION_CREDENTIALS\"\n", + "] = \"/home/inga/projects/misinformation-project/misinformation-notes/seismic-bonfire-329406-412821a70264.json\"\n", + "images = mysubfiles[1:5]\n", + "misinformation.explore_analysis(images, identify=\"text-on-image\")" + ] + }, + { + "cell_type": "markdown", + "id": "d54407ad", + "metadata": {}, + "source": [ + "## MS Azure\n", + "https://docs.microsoft.com/en-us/azure/search/cognitive-search-concept-image-scenarios" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "365c78b1-7ff4-4213-86fa-6a0a2d05198f", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.4" + }, + "vscode": { + "interpreter": { + "hash": "e7370f93d1d0cde622a1f8e1c04877d8463912d04d973331ad4851f04de6915a" + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/pyproject.toml b/pyproject.toml index 5a39586..2ac6cc5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,6 +25,7 @@ dependencies = [ "ipywidgets ==8.0.0rc1", "pooch", "retina-face", + "google-cloud-vision", ] [project.scripts] diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..50fc51c --- /dev/null +++ b/requirements.txt @@ -0,0 +1,10 @@ +deepface +ipywidgets==8.0.0rc1 +pooch +retina-face +opencv-python +matplotlib +numpy +keras-ocr +tensorflow +google-cloud-vision \ No newline at end of file