зеркало из
https://github.com/ssciwr/AMMICO.git
synced 2025-10-29 13:06:04 +02:00
dict handling down to and from analysis routines (#19)
Этот коммит содержится в:
родитель
90c6087fdf
Коммит
995cfec923
5
.flake8
5
.flake8
@ -1,6 +1,5 @@
|
|||||||
[flake8]
|
[flake8]
|
||||||
ignore = E203, E266, E402, E501, W503, F403, F401, F841
|
ignore = F401, E402, E501
|
||||||
exclude = .git,__pycache__,.ipynb_checkpoints
|
exclude = .git,__pycache__,.ipynb_checkpoints
|
||||||
max-line-length = 90
|
max-line-length = 90
|
||||||
max-complexity = 18
|
max-complexity = 18
|
||||||
select = B,C,E,F,W,T4,B9
|
|
||||||
@ -1,6 +1,5 @@
|
|||||||
[flake8_nb]
|
[flake8_nb]
|
||||||
ignore = E203, E266, E402, E501, W503, F403, F401, F841
|
ignore = F401, E402, E501
|
||||||
exclude = .git,__pycache__,.ipynb_checkpoints
|
exclude = .git,__pycache__,.ipynb_checkpoints
|
||||||
max-line-length = 90
|
max-line-length = 90
|
||||||
max-complexity = 18
|
max-complexity = 18
|
||||||
select = B,C,E,F,W,T4,B9
|
|
||||||
@ -17,15 +17,18 @@ class JSONContainer:
|
|||||||
return self._data
|
return self._data
|
||||||
|
|
||||||
|
|
||||||
def explore_analysis(image_paths, identify="faces"):
|
def explore_analysis(mydict, identify="faces"):
|
||||||
# dictionary mapping the type of analysis to be explored
|
# dictionary mapping the type of analysis to be explored
|
||||||
identify_dict = {
|
identify_dict = {
|
||||||
"faces": faces.facial_expression_analysis,
|
"faces": faces.facial_expression_analysis,
|
||||||
"text-on-image": text.detect_text,
|
"text-on-image": text.detect_text,
|
||||||
}
|
}
|
||||||
|
# create a list containing the image ids for the widget
|
||||||
|
# image_paths = [mydict[key]["filename"] for key in mydict.keys()]
|
||||||
|
image_ids = [key for key in mydict.keys()]
|
||||||
# Create an image selector widget
|
# Create an image selector widget
|
||||||
image_select = ipywidgets.Select(
|
image_select = ipywidgets.Select(
|
||||||
options=image_paths, layout=ipywidgets.Layout(width="20%"), rows=20
|
options=image_ids, layout=ipywidgets.Layout(width="20%"), rows=20
|
||||||
)
|
)
|
||||||
|
|
||||||
# Set up the facial recognition output widget
|
# Set up the facial recognition output widget
|
||||||
@ -44,14 +47,18 @@ def explore_analysis(image_paths, identify="faces"):
|
|||||||
output.clear_output()
|
output.clear_output()
|
||||||
|
|
||||||
# Create the new content
|
# Create the new content
|
||||||
image_widget.children = (ipywidgets.Image.from_file(image_select.value),)
|
image_widget.children = (
|
||||||
|
ipywidgets.Image.from_file(mydict[image_select.value]["filename"]),
|
||||||
|
)
|
||||||
|
|
||||||
# This output widget absorbes print statements that are messing with
|
# This output widget absorbes print statements that are messing with
|
||||||
# the widget output and cannot be disabled through the API.
|
# the widget output and cannot be disabled through the API.
|
||||||
with faces.NocatchOutput():
|
with faces.NocatchOutput():
|
||||||
analysis = identify_dict[identify](image_select.value)
|
mydict[image_select.value] = identify_dict[identify](
|
||||||
|
mydict[image_select.value]
|
||||||
|
)
|
||||||
with output:
|
with output:
|
||||||
display(JSONContainer(analysis))
|
display(JSONContainer(mydict[image_select.value]))
|
||||||
|
|
||||||
# Register the handler and trigger it immediately
|
# Register the handler and trigger it immediately
|
||||||
image_select.observe(switch, names=("value",), type="change")
|
image_select.observe(switch, names=("value",), type="change")
|
||||||
|
|||||||
@ -72,16 +72,21 @@ retinaface_model = DownloadResource(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def facial_expression_analysis(img_path):
|
def facial_expression_analysis(subdict):
|
||||||
result = {"filename": img_path}
|
|
||||||
|
|
||||||
# Find (multiple) faces in the image and cut them
|
# Find (multiple) faces in the image and cut them
|
||||||
retinaface_model.get()
|
retinaface_model.get()
|
||||||
faces = RetinaFace.extract_faces(img_path)
|
faces = RetinaFace.extract_faces(subdict["filename"])
|
||||||
|
|
||||||
# If no faces are found, we return an empty dictionary
|
# If no faces are found, we return empty keys
|
||||||
if len(faces) == 0:
|
if len(faces) == 0:
|
||||||
return result
|
subdict["face"] = None
|
||||||
|
subdict["wears_mask"] = None
|
||||||
|
subdict["age"] = None
|
||||||
|
subdict["gender"] = None
|
||||||
|
subdict["race"] = None
|
||||||
|
subdict["emotion"] = None
|
||||||
|
return subdict
|
||||||
|
|
||||||
# Sort the faces by sight to prioritize prominent faces
|
# Sort the faces by sight to prioritize prominent faces
|
||||||
faces = list(reversed(sorted(faces, key=lambda f: f.shape[0] * f.shape[1])))
|
faces = list(reversed(sorted(faces, key=lambda f: f.shape[0] * f.shape[1])))
|
||||||
@ -120,9 +125,9 @@ def facial_expression_analysis(img_path):
|
|||||||
|
|
||||||
# We limit ourselves to three faces
|
# We limit ourselves to three faces
|
||||||
for i, face in enumerate(faces[:3]):
|
for i, face in enumerate(faces[:3]):
|
||||||
result[f"person{ i+1 }"] = analyze_single_face(face)
|
subdict[f"person{ i+1 }"] = analyze_single_face(face)
|
||||||
|
|
||||||
return result
|
return subdict
|
||||||
|
|
||||||
|
|
||||||
def wears_mask(face):
|
def wears_mask(face):
|
||||||
|
|||||||
@ -2,9 +2,10 @@ from google.cloud import vision
|
|||||||
import io
|
import io
|
||||||
|
|
||||||
|
|
||||||
def detect_text(path):
|
def detect_text(subdict):
|
||||||
"""Detects text in the file."""
|
"""Detects text in the file."""
|
||||||
|
|
||||||
|
path = subdict["filename"]
|
||||||
client = vision.ImageAnnotatorClient()
|
client = vision.ImageAnnotatorClient()
|
||||||
|
|
||||||
with io.open(path, "rb") as image_file:
|
with io.open(path, "rb") as image_file:
|
||||||
@ -14,13 +15,13 @@ def detect_text(path):
|
|||||||
|
|
||||||
response = client.text_detection(image=image)
|
response = client.text_detection(image=image)
|
||||||
texts = response.text_annotations
|
texts = response.text_annotations
|
||||||
result = {"text": []}
|
subdict = {"text": []}
|
||||||
for text in texts:
|
for text in texts:
|
||||||
result["text"].append(text.description)
|
subdict["text"].append(text.description)
|
||||||
|
|
||||||
if response.error.message:
|
if response.error.message:
|
||||||
raise Exception(
|
raise Exception(
|
||||||
"{}\nFor more info on error messages, check: "
|
"{}\nFor more info on error messages, check: "
|
||||||
"https://cloud.google.com/apis/design/errors".format(response.error.message)
|
"https://cloud.google.com/apis/design/errors".format(response.error.message)
|
||||||
)
|
)
|
||||||
return result
|
return subdict
|
||||||
|
|||||||
@ -1,5 +1,6 @@
|
|||||||
import glob
|
import glob
|
||||||
import os
|
import os
|
||||||
|
from pandas import DataFrame
|
||||||
import pooch
|
import pooch
|
||||||
|
|
||||||
|
|
||||||
@ -29,6 +30,17 @@ def misinformation_prefetch_models():
|
|||||||
res.get()
|
res.get()
|
||||||
|
|
||||||
|
|
||||||
|
class AnalysisMethod:
|
||||||
|
"""Base class to be inherited by all analysis methods."""
|
||||||
|
|
||||||
|
def __init__(self) -> None:
|
||||||
|
# define keys that will be set by the analysis
|
||||||
|
self.mykeys = ["filename"]
|
||||||
|
|
||||||
|
def analyse_image(self):
|
||||||
|
None
|
||||||
|
|
||||||
|
|
||||||
def find_files(path=None, pattern="*.png", recursive=True, limit=20):
|
def find_files(path=None, pattern="*.png", recursive=True, limit=20):
|
||||||
"""Find image files on the file system
|
"""Find image files on the file system
|
||||||
|
|
||||||
@ -55,3 +67,41 @@ def find_files(path=None, pattern="*.png", recursive=True, limit=20):
|
|||||||
result = result[:limit]
|
result = result[:limit]
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def initialize_dict(filelist: list) -> dict:
|
||||||
|
mydict = {}
|
||||||
|
for img_path in filelist:
|
||||||
|
id = img_path.split(".")[0].split("/")[-1]
|
||||||
|
mydict[id] = {"filename": img_path}
|
||||||
|
return mydict
|
||||||
|
|
||||||
|
|
||||||
|
def append_data_to_dict(mydict: dict) -> dict:
|
||||||
|
"""Append entries from list of dictionaries to keys in global dict."""
|
||||||
|
|
||||||
|
# first initialize empty list for each key that is present
|
||||||
|
outdict = {key: [] for key in list(mydict.values())[0].keys()}
|
||||||
|
# now append the values to each key in a list
|
||||||
|
for subdict in mydict.values():
|
||||||
|
for key in subdict.keys():
|
||||||
|
outdict[key].append(subdict[key])
|
||||||
|
# mydict = {key: [mydict[key] for mydict in dictlist] for key in dictlist[0]}
|
||||||
|
print(outdict)
|
||||||
|
return outdict
|
||||||
|
|
||||||
|
|
||||||
|
def dump_df(mydict: dict) -> DataFrame:
|
||||||
|
"""Utility to dump the dictionary into a dataframe."""
|
||||||
|
return DataFrame.from_dict(mydict)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
files = find_files(
|
||||||
|
path="/home/inga/projects/misinformation-project/misinformation/data/test_no_text/"
|
||||||
|
)
|
||||||
|
mydict = initialize_dict(files)
|
||||||
|
outdict = {}
|
||||||
|
outdict = append_data_to_dict(mydict)
|
||||||
|
df = dump_df(outdict)
|
||||||
|
print(df.head(10))
|
||||||
|
|||||||
@ -41,7 +41,10 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"images = misinformation.find_files(limit=1000)"
|
"images = misinformation.find_files(\n",
|
||||||
|
" path=\"/home/inga/projects/misinformation-project/misinformation/data/test_no_text\",\n",
|
||||||
|
" limit=1000,\n",
|
||||||
|
")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -64,7 +67,25 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"id": "d8067ad1-ef8a-4e91-bcc6-f8dbef771854",
|
"id": "705e7328",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"We need to initialize the main dictionary that contains all information for the images and is updated through each subsequent analysis:"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "b37c0c91",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"mydict = misinformation.utils.initialize_dict(images)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "a9372561",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"Next, we display the face recognition results provided by the DeepFace library. Click on the tabs to see the results in the right sidebar:"
|
"Next, we display the face recognition results provided by the DeepFace library. Click on the tabs to see the results in the right sidebar:"
|
||||||
@ -77,7 +98,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"misinformation.explore_analysis(images, identify=\"faces\")"
|
"misinformation.explore_analysis(mydict, identify=\"faces\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
@ -64,6 +64,16 @@
|
|||||||
" display(Image(filename=i))"
|
" display(Image(filename=i))"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "8b32409f",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"mydict = misinformation.utils.initialize_dict(mysubfiles)"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"id": "07b7a7a3",
|
"id": "07b7a7a3",
|
||||||
@ -106,7 +116,7 @@
|
|||||||
" \"\"\"Preprocess the image to enhance features for extraction.\"\"\"\n",
|
" \"\"\"Preprocess the image to enhance features for extraction.\"\"\"\n",
|
||||||
" image = cv2.imread(filename)\n",
|
" image = cv2.imread(filename)\n",
|
||||||
" # preserve the original image\n",
|
" # preserve the original image\n",
|
||||||
" original = image.copy()\n",
|
" # original = image.copy()\n",
|
||||||
" # Grayscale, Gaussian blur, Otsu's threshold\n",
|
" # Grayscale, Gaussian blur, Otsu's threshold\n",
|
||||||
" gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)\n",
|
" gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)\n",
|
||||||
" # sharpen contrast by first smoothing and then substracting the smoothed and thresholded version\n",
|
" # sharpen contrast by first smoothing and then substracting the smoothed and thresholded version\n",
|
||||||
@ -363,8 +373,7 @@
|
|||||||
"os.environ[\n",
|
"os.environ[\n",
|
||||||
" \"GOOGLE_APPLICATION_CREDENTIALS\"\n",
|
" \"GOOGLE_APPLICATION_CREDENTIALS\"\n",
|
||||||
"] = \"/home/inga/projects/misinformation-project/misinformation-notes/seismic-bonfire-329406-412821a70264.json\"\n",
|
"] = \"/home/inga/projects/misinformation-project/misinformation-notes/seismic-bonfire-329406-412821a70264.json\"\n",
|
||||||
"images = mysubfiles[1:5]\n",
|
"misinformation.explore_analysis(mydict, identify=\"text-on-image\")"
|
||||||
"misinformation.explore_analysis(images, identify=\"text-on-image\")"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
11
requirements-dev.txt
Обычный файл
11
requirements-dev.txt
Обычный файл
@ -0,0 +1,11 @@
|
|||||||
|
deepface
|
||||||
|
ipywidgets==8.0.0rc1
|
||||||
|
pooch
|
||||||
|
retina-face
|
||||||
|
opencv-python
|
||||||
|
matplotlib
|
||||||
|
numpy
|
||||||
|
keras-ocr
|
||||||
|
tensorflow
|
||||||
|
google-cloud-vision
|
||||||
|
pytesseract
|
||||||
@ -2,10 +2,4 @@ deepface
|
|||||||
ipywidgets==8.0.0rc1
|
ipywidgets==8.0.0rc1
|
||||||
pooch
|
pooch
|
||||||
retina-face
|
retina-face
|
||||||
opencv-python
|
google-cloud-vision
|
||||||
matplotlib
|
|
||||||
numpy
|
|
||||||
keras-ocr
|
|
||||||
tensorflow
|
|
||||||
google-cloud-vision
|
|
||||||
pytesseract
|
|
||||||
Загрузка…
x
Ссылка в новой задаче
Block a user