diff --git a/.gitignore b/.gitignore
index 15c61c8..11ec5ce 100644
--- a/.gitignore
+++ b/.gitignore
@@ -129,3 +129,6 @@ dmypy.json
 
 # Pyre type checker
 .pyre/
+
+# data folder
+data/
\ No newline at end of file
diff --git a/Dockerfile b/Dockerfile
index 1f6b817..d6df14c 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -2,7 +2,7 @@ FROM jupyter/base-notebook:2022-06-06
 
 # Install system dependencies for computer vision packages
 USER root
-RUN apt update && apt install -y libgl1 libglib2.0-0 libsm6 libxrender1 libxext6
+RUN apt update && apt install -y gcc libgl1 libglib2.0-0 libsm6 libxrender1 libxext6 tesseract-ocr
 USER $NB_USER
 
 # Copy the repository into the container
diff --git a/misinformation/__init__.py b/misinformation/__init__.py
index 523a655..1827151 100644
--- a/misinformation/__init__.py
+++ b/misinformation/__init__.py
@@ -5,5 +5,5 @@ from importlib import metadata
 __version__ = metadata.version(__package__)
 del metadata
 
-from misinformation.faces import explore_face_recognition
+from misinformation.display import explore_analysis
 from misinformation.utils import find_files
diff --git a/misinformation/display.py b/misinformation/display.py
new file mode 100644
index 0000000..587b62d
--- /dev/null
+++ b/misinformation/display.py
@@ -0,0 +1,61 @@
+import ipywidgets
+from IPython.display import display
+
+import misinformation.faces as faces
+import misinformation.text as text
+
+
+class JSONContainer:
+    """Expose a Python dictionary as a JSON document in JupyterLab
+    rich display rendering.
+    """
+
+    def __init__(self, data={}):
+        self._data = data
+
+    def _repr_json_(self):
+        return self._data
+
+
+def explore_analysis(image_paths, identify="faces"):
+    # dictionary mapping the type of analysis to be explored
+    identify_dict = {
+        "faces": faces.facial_expression_analysis,
+        "text-on-image": text.detect_text,
+    }
+    # Create an image selector widget
+    image_select = ipywidgets.Select(
+        options=image_paths, layout=ipywidgets.Layout(width="20%"), rows=20
+    )
+
+    # Set up the facial recognition output widget
+    output = ipywidgets.Output(layout=ipywidgets.Layout(width="30%"))
+
+    # Set up the image selection and display widget
+    image_widget = ipywidgets.Box(
+        children=[],
+        layout=ipywidgets.Layout(width="50%"),
+    )
+
+    # Register the tab switch logic
+    def switch(_):
+        # Clear existing output
+        image_widget.children = ()
+        output.clear_output()
+
+        # Create the new content
+        image_widget.children = (ipywidgets.Image.from_file(image_select.value),)
+
+        # This output widget absorbes print statements that are messing with
+        # the widget output and cannot be disabled through the API.
+        with faces.NocatchOutput():
+            analysis = identify_dict[identify](image_select.value)
+        with output:
+            display(JSONContainer(analysis))
+
+    # Register the handler and trigger it immediately
+    image_select.observe(switch, names=("value",), type="change")
+    switch(None)
+
+    # Show the combined widget
+    return ipywidgets.HBox([image_select, image_widget, output])
diff --git a/misinformation/faces.py b/misinformation/faces.py
index cddc383..c5560c1 100644
--- a/misinformation/faces.py
+++ b/misinformation/faces.py
@@ -1,13 +1,12 @@
 import cv2
-import ipywidgets
 import numpy as np
 import os
 import pathlib
+import ipywidgets
 
 from tensorflow.keras.models import load_model
 from tensorflow.keras.applications.mobilenet_v2 import preprocess_input
 from tensorflow.keras.preprocessing.image import img_to_array
-from IPython.display import display
 from deepface import DeepFace
 from retinaface import RetinaFace
 
@@ -148,18 +147,6 @@ def wears_mask(face):
     return bool(mask > withoutMask)
 
 
-class JSONContainer:
-    """Expose a Python dictionary as a JSON document in JupyterLab
-    rich display rendering.
-    """
-
-    def __init__(self, data={}):
-        self._data = data
-
-    def _repr_json_(self):
-        return self._data
-
-
 class NocatchOutput(ipywidgets.Output):
     """An output container that suppresses output, but not exceptions
 
@@ -168,42 +155,3 @@ class NocatchOutput(ipywidgets.Output):
 
     def __exit__(self, *args, **kwargs):
         super().__exit__(*args, **kwargs)
-
-
-def explore_face_recognition(image_paths):
-    # Create an image selector widget
-    image_select = ipywidgets.Select(
-        options=image_paths, layout=ipywidgets.Layout(width="20%"), rows=20
-    )
-
-    # Set up the facial recognition output widget
-    output = NocatchOutput(layout=ipywidgets.Layout(width="30%"))
-
-    # Set up the image selection and display widget
-    image_widget = ipywidgets.Box(
-        children=[],
-        layout=ipywidgets.Layout(width="50%"),
-    )
-
-    # Register the tab switch logic
-    def switch(_):
-        # Clear existing output
-        image_widget.children = ()
-        output.clear_output()
-
-        # Create the new content
-        image_widget.children = (ipywidgets.Image.from_file(image_select.value),)
-
-        # This output widget absorbes print statements that are messing with
-        # the widget output and cannot be disabled through the API.
-        with NocatchOutput():
-            analysis = facial_expression_analysis(image_select.value)
-        with output:
-            display(JSONContainer(analysis))
-
-    # Register the handler and trigger it immediately
-    image_select.observe(switch, names=("value",), type="change")
-    switch(None)
-
-    # Show the combined widget
-    return ipywidgets.HBox([image_select, image_widget, output])
diff --git a/misinformation/text.py b/misinformation/text.py
new file mode 100644
index 0000000..bf71d14
--- /dev/null
+++ b/misinformation/text.py
@@ -0,0 +1,26 @@
+from google.cloud import vision
+import io
+
+
+def detect_text(path):
+    """Detects text in the file."""
+
+    client = vision.ImageAnnotatorClient()
+
+    with io.open(path, "rb") as image_file:
+        content = image_file.read()
+
+    image = vision.Image(content=content)
+
+    response = client.text_detection(image=image)
+    texts = response.text_annotations
+    result = {"text": []}
+    for text in texts:
+        result["text"].append(text.description)
+
+    if response.error.message:
+        raise Exception(
+            "{}\nFor more info on error messages, check: "
+            "https://cloud.google.com/apis/design/errors".format(response.error.message)
+        )
+    return result
diff --git a/notebooks/facial_expressions.ipynb b/notebooks/facial_expressions.ipynb
index 3262f0a..0485055 100644
--- a/notebooks/facial_expressions.ipynb
+++ b/notebooks/facial_expressions.ipynb
@@ -28,7 +28,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "949d9f00-b129-477a-bc1d-e68fed73af2d",
+   "id": "a2bd2153",
    "metadata": {},
    "source": [
     "We select a subset of image files to try facial expression detection on. The `find_files` function finds image files within a given directory:"
@@ -77,8 +77,16 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "misinformation.explore_face_recognition(images)"
+    "misinformation.explore_analysis(images, identify=\"faces\")"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b99f3f1d",
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
@@ -97,7 +105,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.5"
+   "version": "3.10.4"
   }
  },
  "nbformat": 4,
diff --git a/notebooks/get-text-from-image.ipynb b/notebooks/get-text-from-image.ipynb
new file mode 100644
index 0000000..d69f1df
--- /dev/null
+++ b/notebooks/get-text-from-image.ipynb
@@ -0,0 +1,414 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "dcaa3da1",
+   "metadata": {},
+   "source": [
+    "# Notebook for text extraction on image\n",
+    "Inga Ulusoy, SSC, July 2022"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cf362e60",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "from IPython.display import Image, display\n",
+    "import misinformation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6da3a7aa",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "images = misinformation.find_files(limit=1000)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bf811ce0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for i in images:\n",
+    "    display(Image(filename=i))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "366e2060",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# start with only English\n",
+    "mysubfiles = [i for i in images if \"eng\" in i]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b330b267",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for i in mysubfiles:\n",
+    "    display(Image(filename=i))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "07b7a7a3",
+   "metadata": {},
+   "source": [
+    "# Pre-process the images: Convert to greyscale and increase contrast"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4cacfb0d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import cv2\n",
+    "from matplotlib import pyplot as plt\n",
+    "import numpy as np"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c94b02cb-9e96-4812-8448-8bc731bfd8aa",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "! pip install matplotlib\n",
+    "! pip install numpy"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8a2d3057",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def preprocess(filename):\n",
+    "    \"\"\"Preprocess the image to enhance features for extraction.\"\"\"\n",
+    "    image = cv2.imread(filename)\n",
+    "    # preserve the original image\n",
+    "    original = image.copy()\n",
+    "    # Grayscale, Gaussian blur, Otsu's threshold\n",
+    "    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)\n",
+    "    # sharpen contrast by first smoothing and then substracting the smoothed and thresholded version\n",
+    "    sharpened = unsharp_mask(gray, amount=1.1, threshold=0.1)\n",
+    "    inverted = invert_image(sharpened)\n",
+    "    return gray, sharpened, inverted\n",
+    "\n",
+    "\n",
+    "# use unsharp mask algorithm from opencv\n",
+    "# https://docs.opencv.org/4.x/d1/d10/classcv_1_1MatExpr.html#details\n",
+    "def unsharp_mask(image, kernel_size=(5, 5), sigma=1.0, amount=1.0, threshold=0):\n",
+    "    \"\"\"Return a sharpened version of the image, using an unsharp mask.\n",
+    "    Amount: 1 is neutral, higher values result in shaprer images. threshold is the value below which the difference between blurred and original image gets discarded.\"\"\"\n",
+    "    blurred = cv2.GaussianBlur(image, kernel_size, sigma)\n",
+    "    sharpened = float(amount + 1) * image - float(amount) * blurred\n",
+    "    sharpened = np.maximum(sharpened, np.zeros(sharpened.shape))\n",
+    "    sharpened = np.minimum(sharpened, 255 * np.ones(sharpened.shape))\n",
+    "    sharpened = sharpened.round().astype(np.uint8)\n",
+    "    if threshold > 0:\n",
+    "        low_contrast_mask = np.absolute(image - blurred) < threshold\n",
+    "        np.copyto(sharpened, image, where=low_contrast_mask)\n",
+    "    return sharpened\n",
+    "\n",
+    "\n",
+    "def invert_image(image):\n",
+    "    return cv2.bitwise_not(image)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a483868b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "grey_image = []\n",
+    "for i in mysubfiles:\n",
+    "    grey_image.append(preprocess(i))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "08ed750d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for image in grey_image:\n",
+    "    # disable default colormap in imshow\n",
+    "    plt.imshow(image[0], cmap=\"gray\", vmin=0, vmax=255)\n",
+    "    plt.imshow(image[1], cmap=\"gray\", vmin=0, vmax=255)\n",
+    "    plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "71ac2229",
+   "metadata": {},
+   "source": [
+    "mabe further preprocess in cropping out text regions..?"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "7786d09c",
+   "metadata": {},
+   "source": [
+    "# Try out different libraries\n",
+    "## The standard go-to tool that is slightly complicated: pytesseract\n",
+    "Install tesseract and the language libraries:\n",
+    "```\n",
+    "sudo apt install tesseract-ocr  \n",
+    "sudo apt install tesseract-ocr-all  \n",
+    "sudo apt install imagemagick  \n",
+    "```  "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0d69504c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pytesseract import pytesseract\n",
+    "\n",
+    "pytesseract.tesseract_cmd = r\"tesseract\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0b27c98c-b437-4c8b-8844-96d8718eea49",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "! pip install pytesseract"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "529de8d8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "myimage = grey_image[1]\n",
+    "plt.imshow(myimage[0], cmap=\"gray\", vmin=0, vmax=255)\n",
+    "plt.show()\n",
+    "\n",
+    "plt.imshow(myimage[1], cmap=\"gray\", vmin=0, vmax=255)\n",
+    "plt.show()\n",
+    "\n",
+    "plt.imshow(myimage[2], cmap=\"gray\", vmin=0, vmax=255)\n",
+    "plt.show()\n",
+    "\n",
+    "text = pytesseract.image_to_string(myimage[0])\n",
+    "print(text)\n",
+    "text = pytesseract.image_to_string(myimage[1])\n",
+    "print(text)\n",
+    "text = pytesseract.image_to_string(myimage[2])\n",
+    "print(text)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e25dd39e",
+   "metadata": {},
+   "source": [
+    "Here we probably would need to compare extractopm from different preprocessed images and overlay in a final text."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "36495f3f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for image in mysubfiles:\n",
+    "    # Loading image using OpenCV\n",
+    "    img = cv2.imread(image)\n",
+    "\n",
+    "    # Preprocessing image\n",
+    "    # Converting to grayscale\n",
+    "    gray_image = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)\n",
+    "\n",
+    "    # creating Binary image by selecting proper threshold\n",
+    "    binary_image = cv2.threshold(\n",
+    "        gray_image, 130, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU\n",
+    "    )[1]\n",
+    "\n",
+    "    # Inverting the image\n",
+    "    inverted_bin = cv2.bitwise_not(binary_image)\n",
+    "\n",
+    "    # Some noise reduction\n",
+    "    kernel = np.ones((2, 2), np.uint8)\n",
+    "    processed_img = cv2.erode(inverted_bin, kernel, iterations=1)\n",
+    "    processed_img = cv2.dilate(processed_img, kernel, iterations=1)\n",
+    "\n",
+    "    # Applying image_to_string method\n",
+    "    text = pytesseract.image_to_string(processed_img)\n",
+    "    plt.imshow(processed_img, cmap=\"gray\", vmin=0, vmax=255)\n",
+    "    plt.show()\n",
+    "    print(text)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d6532019",
+   "metadata": {},
+   "source": [
+    "## keras-ocr\n",
+    "Not sure how to create an image object without a url.\n",
+    "https://keras-ocr.readthedocs.io/en/latest/examples/using_pretrained_models.html"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "44e38871",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import keras_ocr\n",
+    "\n",
+    "pipeline = keras_ocr.pipeline.Pipeline()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2bb55068-ddd4-4b90-ae94-90181980d3c0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "! pip install keras-ocr"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0002f2c4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "images = [\n",
+    "    keras_ocr.tools.read(url)\n",
+    "    for url in [\n",
+    "        \"https://storage.googleapis.com/gcptutorials.com/examples/keras-ocr-img-1.jpg\",\n",
+    "        \"https://storage.googleapis.com/gcptutorials.com/examples/keras-ocr-img-2.png\",\n",
+    "    ]\n",
+    "]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1567dc85",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "prediction_groups = pipeline.recognize(images)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fb1ca152",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "predicted_image_1 = prediction_groups[0]\n",
+    "for text, box in predicted_image_1:\n",
+    "    print(text)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "7b8b929f",
+   "metadata": {},
+   "source": [
+    "## google cloud vision API\n",
+    "First 1000 images per month are free."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7c6ecc88",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "os.environ[\n",
+    "    \"GOOGLE_APPLICATION_CREDENTIALS\"\n",
+    "] = \"/home/inga/projects/misinformation-project/misinformation-notes/seismic-bonfire-329406-412821a70264.json\"\n",
+    "images = mysubfiles[1:5]\n",
+    "misinformation.explore_analysis(images, identify=\"text-on-image\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d54407ad",
+   "metadata": {},
+   "source": [
+    "## MS Azure\n",
+    "https://docs.microsoft.com/en-us/azure/search/cognitive-search-concept-image-scenarios"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "365c78b1-7ff4-4213-86fa-6a0a2d05198f",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.4"
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "e7370f93d1d0cde622a1f8e1c04877d8463912d04d973331ad4851f04de6915a"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/pyproject.toml b/pyproject.toml
index 5a39586..2ac6cc5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -25,6 +25,7 @@ dependencies = [
     "ipywidgets ==8.0.0rc1",
     "pooch",
     "retina-face",
+    "google-cloud-vision",
 ]
 
 [project.scripts]
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..50fc51c
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,10 @@
+deepface
+ipywidgets==8.0.0rc1
+pooch
+retina-face
+opencv-python
+matplotlib
+numpy
+keras-ocr
+tensorflow
+google-cloud-vision
\ No newline at end of file