зеркало из
https://github.com/ssciwr/AMMICO.git
synced 2025-10-29 13:06:04 +02:00
Switching ipywidgets to dash (#73)
* added dash app instead of ipywidgets * added pre-commit * removed ipywidgets function * added dash to dependencies * exempted dash server from tests * put das explorer app in new class * added variable for img path in tests * added new explorer class to all notebooks * added identif class variable * removed unused argument in test_display * more cleanup, more text analysis in notebook * update NER test * update with more comments and fix dash calls * update text description multimodal * update objects description --------- Co-authored-by: Inga Ulusoy <inga.ulusoy@uni-heidelberg.de>
Этот коммит содержится в:
родитель
3f2f9bbde3
Коммит
530c8525a8
@ -1,12 +1,17 @@
|
||||
import ipywidgets
|
||||
from IPython.display import display
|
||||
|
||||
import ammico.faces as faces
|
||||
import ammico.text as text
|
||||
import ammico.objects as objects
|
||||
from ammico.utils import is_interactive
|
||||
|
||||
import ammico.summary as summary
|
||||
|
||||
import dash_renderjson
|
||||
from dash import html, Input, Output, dcc, State
|
||||
import jupyter_dash
|
||||
from PIL import Image
|
||||
|
||||
|
||||
class JSONContainer:
|
||||
"""Expose a Python dictionary as a JSON document in JupyterLab
|
||||
@ -22,54 +27,163 @@ class JSONContainer:
|
||||
return self._data
|
||||
|
||||
|
||||
def explore_analysis(mydict, identify="faces"):
|
||||
# dictionary mapping the type of analysis to be explored
|
||||
identify_dict = {
|
||||
"faces": faces.EmotionDetector,
|
||||
"text-on-image": text.TextDetector,
|
||||
"objects": objects.ObjectDetector,
|
||||
"summary": summary.SummaryDetector,
|
||||
}
|
||||
# create a list containing the image ids for the widget
|
||||
# image_paths = [mydict[key]["filename"] for key in mydict.keys()]
|
||||
image_ids = [key for key in mydict.keys()]
|
||||
# Create an image selector widget
|
||||
image_select = ipywidgets.Select(
|
||||
options=image_ids, layout=ipywidgets.Layout(width="20%"), rows=20
|
||||
)
|
||||
class AnalysisExplorer:
|
||||
def __init__(self, mydict, identify="faces") -> None:
|
||||
self.app = jupyter_dash.JupyterDash(__name__)
|
||||
self.mydict = mydict
|
||||
self.identify = identify
|
||||
self.theme = {
|
||||
"scheme": "monokai",
|
||||
"author": "wimer hazenberg (http://www.monokai.nl)",
|
||||
"base00": "#272822",
|
||||
"base01": "#383830",
|
||||
"base02": "#49483e",
|
||||
"base03": "#75715e",
|
||||
"base04": "#a59f85",
|
||||
"base05": "#f8f8f2",
|
||||
"base06": "#f5f4f1",
|
||||
"base07": "#f9f8f5",
|
||||
"base08": "#f92672",
|
||||
"base09": "#fd971f",
|
||||
"base0A": "#f4bf75",
|
||||
"base0B": "#a6e22e",
|
||||
"base0C": "#a1efe4",
|
||||
"base0D": "#66d9ef",
|
||||
"base0E": "#ae81ff",
|
||||
"base0F": "#cc6633",
|
||||
}
|
||||
|
||||
# Set up the facial recognition output widget
|
||||
output = ipywidgets.Output(layout=ipywidgets.Layout(width="30%"))
|
||||
|
||||
# Set up the image selection and display widget
|
||||
image_widget = ipywidgets.Box(
|
||||
children=[],
|
||||
layout=ipywidgets.Layout(width="50%"),
|
||||
)
|
||||
|
||||
# Register the tab switch logic
|
||||
def switch(_):
|
||||
# Clear existing output
|
||||
image_widget.children = ()
|
||||
output.clear_output()
|
||||
|
||||
# Create the new content
|
||||
image_widget.children = (
|
||||
ipywidgets.Image.from_file(mydict[image_select.value]["filename"]),
|
||||
# setup the layout
|
||||
app_layout = html.Div(
|
||||
[
|
||||
# top
|
||||
html.Div(
|
||||
["Identify: ", identify, self._top_file_explorer(mydict)],
|
||||
id="Div_top",
|
||||
style={
|
||||
"width": "30%",
|
||||
# "display": "inline-block",
|
||||
},
|
||||
),
|
||||
# middle
|
||||
html.Div(
|
||||
[self._middle_picture_frame()],
|
||||
id="Div_middle",
|
||||
style={
|
||||
"width": "60%",
|
||||
"display": "inline-block",
|
||||
"verticalAlign": "top",
|
||||
},
|
||||
),
|
||||
# right
|
||||
html.Div(
|
||||
[self._right_output_json()],
|
||||
id="Div_right",
|
||||
style={
|
||||
"width": "30%",
|
||||
"display": "inline-block",
|
||||
"verticalAlign": "top",
|
||||
},
|
||||
),
|
||||
],
|
||||
style={"width": "80%", "display": "inline-block"},
|
||||
)
|
||||
self.app.layout = app_layout
|
||||
# add callbacks to app
|
||||
self.app.callback(
|
||||
Output("img_middle_picture_id", "src"),
|
||||
Input("left_select_id", "value"),
|
||||
prevent_initial_call=True,
|
||||
)(self.update_picture)
|
||||
|
||||
# This output widget absorbes print statements that are messing with
|
||||
# the widget output and cannot be disabled through the API.
|
||||
with faces.NocatchOutput():
|
||||
mydict[image_select.value] = identify_dict[identify](
|
||||
mydict[image_select.value]
|
||||
).analyse_image()
|
||||
with output:
|
||||
display(JSONContainer(mydict[image_select.value]))
|
||||
self.app.callback(
|
||||
Output("right_json_viewer", "data"),
|
||||
Input("img_middle_picture_id", "src"),
|
||||
State("left_select_id", "options"),
|
||||
State("left_select_id", "value"),
|
||||
prevent_initial_call=True,
|
||||
)(self._right_output_analysis)
|
||||
|
||||
# Register the handler and trigger it immediately
|
||||
image_select.observe(switch, names=("value",), type="change")
|
||||
switch(None)
|
||||
# I split the different sections into subfunctions for better clarity
|
||||
def _top_file_explorer(self, mydict):
|
||||
# initilizes the dropdown that selects which file is to be analyzed.
|
||||
left_layout = html.Div(
|
||||
[
|
||||
dcc.Dropdown(
|
||||
options={value["filename"]: key for key, value in mydict.items()},
|
||||
id="left_select_id",
|
||||
)
|
||||
]
|
||||
)
|
||||
return left_layout
|
||||
|
||||
# Show the combined widget
|
||||
return ipywidgets.HBox([image_select, image_widget, output])
|
||||
def _middle_picture_frame(self):
|
||||
# This just holds the image
|
||||
middle_layout = html.Div(
|
||||
[
|
||||
html.Img(
|
||||
id="img_middle_picture_id",
|
||||
style={
|
||||
"width": "80%",
|
||||
},
|
||||
)
|
||||
]
|
||||
)
|
||||
return middle_layout
|
||||
|
||||
def _right_output_json(self):
|
||||
# provides the json viewer for the analysis output.
|
||||
right_layout = html.Div(
|
||||
[
|
||||
dcc.Loading(
|
||||
id="loading-2",
|
||||
children=[
|
||||
html.Div(
|
||||
[
|
||||
dash_renderjson.DashRenderjson(
|
||||
id="right_json_viewer",
|
||||
data={},
|
||||
max_depth=-1,
|
||||
theme=self.theme,
|
||||
invert_theme=True,
|
||||
)
|
||||
]
|
||||
)
|
||||
],
|
||||
type="circle",
|
||||
)
|
||||
]
|
||||
)
|
||||
return right_layout
|
||||
|
||||
def run_server(self, port=8050):
|
||||
if not is_interactive():
|
||||
raise EnvironmentError(
|
||||
"Dash server should only be called in interactive an interactive environment like jupyter notebooks."
|
||||
)
|
||||
|
||||
self.app.run_server(debug=True, mode="inline", port=port)
|
||||
|
||||
# Dash callbacks
|
||||
def update_picture(self, img_path):
|
||||
if img_path is not None:
|
||||
image = Image.open(img_path)
|
||||
return image
|
||||
else:
|
||||
return None
|
||||
|
||||
def _right_output_analysis(self, image, all_options, current_value):
|
||||
# calls the analysis function and returns the output
|
||||
identify_dict = {
|
||||
"faces": faces.EmotionDetector,
|
||||
"text-on-image": text.TextDetector,
|
||||
"objects": objects.ObjectDetector,
|
||||
"summary": summary.SummaryDetector,
|
||||
}
|
||||
# get image ID from dropdown value, which is the filepath.
|
||||
image_id = all_options[current_value]
|
||||
|
||||
identify_function = identify_dict[self.identify]
|
||||
|
||||
self.mydict[image_id] = identify_function(self.mydict[image_id]).analyse_image()
|
||||
return self.mydict[image_id]
|
||||
|
||||
@ -1,11 +1,10 @@
|
||||
import json
|
||||
import ammico.display as ammico_display
|
||||
import pytest
|
||||
|
||||
|
||||
def test_explore_analysis_faces(get_path):
|
||||
mydict = {"IMG_2746": {"filename": get_path + "IMG_2746.png"}}
|
||||
temp = ammico_display.explore_analysis(mydict, identify="faces") # noqa
|
||||
temp = None # noqa
|
||||
with open(get_path + "example_faces.json", "r") as file:
|
||||
outs = json.load(file)
|
||||
mydict["IMG_2746"].pop("filename", None)
|
||||
@ -17,8 +16,6 @@ def test_explore_analysis_faces(get_path):
|
||||
|
||||
def test_explore_analysis_objects(get_path):
|
||||
mydict = {"IMG_2809": {"filename": get_path + "IMG_2809.png"}}
|
||||
temp = ammico_display.explore_analysis(mydict, identify="objects") # noqa
|
||||
temp = None # noqa
|
||||
with open(get_path + "example_analysis_objects.json", "r") as file:
|
||||
outs = json.load(file)
|
||||
mydict["IMG_2809"].pop("filename", None)
|
||||
@ -26,3 +23,36 @@ def test_explore_analysis_objects(get_path):
|
||||
sub_dict = mydict[im_key]
|
||||
for key in sub_dict.keys():
|
||||
assert sub_dict[key] == outs[key]
|
||||
|
||||
|
||||
def test_AnalysisExplorer(get_path):
|
||||
path_img_1 = get_path + "IMG_2809.png"
|
||||
path_img_2 = get_path + "IMG_2746.png"
|
||||
|
||||
mydict = {
|
||||
"IMG_2809": {"filename": path_img_1},
|
||||
"IMG_2746": {"filename": path_img_2},
|
||||
}
|
||||
|
||||
all_options_dict = {
|
||||
path_img_1: "IMG_2809",
|
||||
path_img_2: "IMG_2746",
|
||||
}
|
||||
|
||||
analysis_explorer_faces = ammico_display.AnalysisExplorer(mydict, identify="faces")
|
||||
analysis_explorer_objects = ammico_display.AnalysisExplorer(
|
||||
mydict, identify="objects"
|
||||
)
|
||||
|
||||
analysis_explorer_faces.update_picture(path_img_1)
|
||||
analysis_explorer_objects.update_picture(path_img_2)
|
||||
|
||||
assert analysis_explorer_objects.update_picture(None) is None
|
||||
|
||||
analysis_explorer_faces._right_output_analysis(None, all_options_dict, path_img_1)
|
||||
analysis_explorer_objects._right_output_analysis(None, all_options_dict, path_img_2)
|
||||
|
||||
with pytest.raises(EnvironmentError):
|
||||
analysis_explorer_faces.run_server(port=8050)
|
||||
with pytest.raises(EnvironmentError):
|
||||
analysis_explorer_objects.run_server(port=8050)
|
||||
|
||||
@ -124,8 +124,8 @@ def test_text_summary(get_path):
|
||||
reference_text = file.read()
|
||||
mydict["text_english"] = reference_text
|
||||
test_obj.text_summary()
|
||||
reference_summary = " I’m sorry, but I don’t want to be an emperor. That’s not my business. I should like to help everyone - if possible - Jew, Gentile - black man - white . We all want to help one another. In this world there is room for everyone. The way of life can be free and beautiful, but we have lost the way ."
|
||||
assert mydict["summary_text"] == reference_summary
|
||||
reference_summary = " I’m sorry, but I don’t want to be an emperor"
|
||||
assert mydict["text_summary"] == reference_summary
|
||||
|
||||
|
||||
def test_text_sentiment_transformers():
|
||||
@ -142,8 +142,8 @@ def test_text_ner():
|
||||
test_obj = tt.TextDetector(mydict, analyse_text=True)
|
||||
mydict["text_english"] = "Bill Gates was born in Seattle."
|
||||
test_obj.text_ner()
|
||||
assert mydict["entity"] == ["Bill", "Gates", "Seattle"]
|
||||
assert mydict["entity_type"] == ["I-PER", "I-PER", "I-LOC"]
|
||||
assert mydict["entity"] == ["Bill Gates", "Seattle"]
|
||||
assert mydict["entity_type"] == ["PER", "LOC"]
|
||||
|
||||
|
||||
def test_PostprocessText(set_testdict, get_path):
|
||||
|
||||
@ -53,6 +53,9 @@ class TextDetector(utils.AnalysisMethod):
|
||||
self.clean_text()
|
||||
self.correct_spelling()
|
||||
self.sentiment_analysis()
|
||||
self.text_summary()
|
||||
self.text_sentiment_transformers()
|
||||
self.text_ner()
|
||||
return self.subdict
|
||||
|
||||
def get_text_from_image(self):
|
||||
@ -124,8 +127,15 @@ class TextDetector(utils.AnalysisMethod):
|
||||
# use the current default model - 03/2023
|
||||
model_name = "sshleifer/distilbart-cnn-12-6"
|
||||
model_revision = "a4f8f3e"
|
||||
pipe = pipeline("summarization", model=model_name, revision=model_revision)
|
||||
self.subdict.update(pipe(self.subdict["text_english"])[0])
|
||||
pipe = pipeline(
|
||||
"summarization",
|
||||
model=model_name,
|
||||
revision=model_revision,
|
||||
min_length=5,
|
||||
max_length=20,
|
||||
)
|
||||
summary = pipe(self.subdict["text_english"])
|
||||
self.subdict["text_summary"] = summary[0]["summary_text"]
|
||||
|
||||
def text_sentiment_transformers(self):
|
||||
# use the transformers pipeline for text classification
|
||||
@ -145,14 +155,18 @@ class TextDetector(utils.AnalysisMethod):
|
||||
model_name = "dbmdz/bert-large-cased-finetuned-conll03-english"
|
||||
model_revision = "f2482bf"
|
||||
pipe = pipeline(
|
||||
"token-classification", model=model_name, revision=model_revision
|
||||
"token-classification",
|
||||
model=model_name,
|
||||
revision=model_revision,
|
||||
aggregation_strategy="simple",
|
||||
)
|
||||
result = pipe(self.subdict["text_english"])
|
||||
# self.subdict["entity"] = result
|
||||
self.subdict["entity"] = []
|
||||
self.subdict["entity_type"] = []
|
||||
for entity in result:
|
||||
self.subdict["entity"].append(entity["word"])
|
||||
self.subdict["entity_type"].append(entity["entity"])
|
||||
self.subdict["entity_type"].append(entity["entity_group"])
|
||||
|
||||
|
||||
class PostprocessText:
|
||||
|
||||
@ -95,3 +95,9 @@ def append_data_to_dict(mydict: dict) -> dict:
|
||||
def dump_df(mydict: dict) -> DataFrame:
|
||||
"""Utility to dump the dictionary into a dataframe."""
|
||||
return DataFrame.from_dict(mydict)
|
||||
|
||||
|
||||
def is_interactive():
|
||||
import __main__ as main
|
||||
|
||||
return not hasattr(main, "__file__")
|
||||
|
||||
1
notebooks/.~lock.data_out.csv#
сгенерированный
Обычный файл
1
notebooks/.~lock.data_out.csv#
сгенерированный
Обычный файл
@ -0,0 +1 @@
|
||||
,iulusoy,ssc08,03.05.2023 12:16,file:///home/iulusoy/.config/libreoffice/4;
|
||||
11
notebooks/data_out.csv
сгенерированный
Обычный файл
11
notebooks/data_out.csv
сгенерированный
Обычный файл
@ -0,0 +1,11 @@
|
||||
,filename,text,text_language,text_english,text_clean,text_english_correct,polarity,subjectivity,summary_text,sentiment,sentiment_score,entity,entity_type
|
||||
0,../data/images-little-text/102784S_eng.png,"0:04 / 1:08 89 89 THE WHITE HOUSE So, ask Bill a question that probably some of you are CORONAVIRUS RESPONSE Best Practices CIERY AMERICAN LIVE 500 TES COMMONLY AVAILABLE DISINFECTANTS (Bleach &hopropyl Alcohol) work to kill the virus C-SPAN2",en,"0:04 / 1:08 89 89 THE WHITE HOUSE So, ask Bill a question that probably some of you are CORONAVIRUS RESPONSE Best Practices CIERY AMERICAN LIVE 500 TES COMMONLY AVAILABLE DISINFECTANTS (Bleach &hopropyl Alcohol) work to kill the virus C-SPAN2","/ THE WHITE HOUSE So , ask Bill a question that probably some of you are CORONAVIRUS RESPONSE Best Practices AMERICAN LIVE TES AVAILABLE ( Bleach & Alcohol ) work to kill the virus C -","0:04 / 1:08 89 89 THE WHITE HOUSE To, ask Will a question that probably some of you are CORONAVIRUS RESPONSE West Practices CIERY AMERICAN LIVE 500 TES COMMONLY AVAILABLE DISINFECTANTS (Each &hopropyl Alcohol) work to kill the virus C-SPAN2",0.30727272727272725,0.24000000000000005," So, ask Bill a question that probably some of you are CORONAVIRUS RESPONSE Best Practices . The White House asks Bill to explain how to kill the virus . C-SPAN2: Ask Bill about what you know about the White House and how you can help you kill a virus .",NEGATIVE,0.9956117272377014,"['Bill', 'CO', '##RO', '##NA', 'AM', '##ER', '##IC', '##AN', 'C', 'SP']","['I-PER', 'I-MISC', 'I-MISC', 'I-MISC', 'I-MISC', 'I-MISC', 'I-MISC', 'I-MISC', 'I-MISC', 'I-MISC']"
|
||||
1,../data/images-little-text/102174_eng.png,"COVID-19 BREAKING PM ADDRESSES NATION ALL-INDIA LOCKDOWN INCREASED TILL 04TH MAY COURTESY: DD INDIA TODAY 6:10 PM NARENDRA MODI, PRIME MINISTER COVID-19 INDIA UNDER LOCKDOWN DATES INCREASED BREAKING NEWS PM announces to shut down everything till 04th May All India lock down increased till 4th may...",en,"COVID-19 BREAKING PM ADDRESSES NATION ALL-INDIA LOCKDOWN INCREASED TILL 04TH MAY COURTESY: DD INDIA TODAY 6:10 PM NARENDRA MODI, PRIME MINISTER COVID-19 INDIA UNDER LOCKDOWN DATES INCREASED BREAKING NEWS PM announces to shut down everything till 04th May All India lock down increased till 4th may...","COVID-19 BREAKING PM ADDRESSES NATION ALL - INDIA LOCKDOWN INCREASED TILL MAY COURTESY : DD INDIA TODAY PM MODI , PRIME MINISTER COVID-19 INDIA UNDER LOCKDOWN DATES INCREASED BREAKING NEWS PM announces to shut down everything till 04th May All India lock down increased till 4th may ...","COVID-19 BREAKING of ADDRESSES NATION ALL-INDIA LOCKDOWN INCREASED TILL 04TH MAY COURTESY: of INDIA TODAY 6:10 of NARENDRA MODI, PRIME MINISTER COVID-19 INDIA UNDER LOCKDOWN DATES INCREASED BREAKING NEWS of announces to shut down everything till with May All India lock down increased till th may...",-0.15555555555555559,0.2888888888888889, All India lock down increased till 4th may... PM announces to shut down everything till 04th May . All-INDIA LOCK DOWN DATE DATES increased to 4th May. All-India LOCKDOWN DATE increased to 04th MAY. COVID-19 BREAKING PM ADDRESSES NATION All-India Locked Down .,NEGATIVE,0.9822246432304382,"['IN', '##DI', '##A', 'IN', '##DI', '##A', 'N', '##AR', '##EN', '##DR', '##A', 'M', 'IN', '##DI', '##A', 'All', 'India']","['I-LOC', 'I-LOC', 'I-LOC', 'I-LOC', 'I-LOC', 'I-LOC', 'I-LOC', 'I-LOC', 'I-LOC', 'I-LOC', 'I-LOC', 'I-ORG', 'I-LOC', 'I-LOC', 'I-LOC', 'I-LOC', 'I-LOC']"
|
||||
2,../data/images-little-text/110117_tur.png,ANS Flaş Gelişme! Koronadan Ölenlerin Yakılmasına Karar Verildi,tr,ANS Flash Progress! Those Who Died From Corona Decided to Burn,ANS Flash Progress ! Those Who Died From Corona Decided to Burn,ANS Clash Progress! Those Who Died From Corona Decided to Turn,0.0,0.0, Those who died from Corona Decided to Burn decided to burn themselves . Those who perished from Corona decided to set themselves alight in the ashes . Corona was the largest fire in the history of the U.S. Department of National Fire Department . Corona is the largest inferno in the country and was the first to burn itself .,POSITIVE,0.9344154596328735,"['Co', '##rona']","['I-MISC', 'I-MISC']"
|
||||
3,../data/images-little-text/105821_mar.png,भारत के प्रधानमंत्री मोदी जी पहली बार WHO का चेयरमैन बने.. बधाइयां रुकनी नहीं चाहिए.. INDIA'S BEST LEADER,hi,Prime Minister of India Modi ji for the first time Became the chairman of WHO.. Congratulations Rukni Do not want.. INDIA'S BEST LEADER,Prime Minister of India Modi ji for the first time Became the chairman of WHO .. Congratulations Do not want .. INDIA 'S BEST LEADER,Crime Minister of India Lodi i for the first time Became the chairman of WHO.. Congratulations Rukni To not want.. INDIA'S BEST LEADER,0.625,0.31666666666666665, Prime Minister of India Modi ji for the first time Became the chairman of WHO . Congratulations Rukni Do not want.. Do not . Do you want.. INDIA'S BEST LEADER? Do not need to want to know what you want to do . Do not know what it wants to do with your best friend .,POSITIVE,0.994724452495575,"['India', 'Mo', '##di', 'WHO', 'R', '##uk', '##ni', 'IN', '##DI', '##A']","['I-LOC', 'I-PER', 'I-PER', 'I-ORG', 'I-PER', 'I-PER', 'I-PER', 'I-LOC', 'I-LOC', 'I-LOC']"
|
||||
4,../data/images-little-text/102141_3_eng.png,0 -5 -10 -15 Dow Jones Industrial Average -20% 29500 29250 29000 28750 28500 S&P 500 28250 28000 27750 Feb. 18 Source: FactSet MARCH 10 Ever SOURCE: BLOOMBERG MARCH 11 Feb. 20 NASDAQ MARCH 12 Feb. 24 Mand MARCH 13 DOW MARCH 16: -12.3% MARCH 16 FORTUNE,en,0 -5 -10 -15 Dow Jones Industrial Average -20% 29500 29250 29000 28750 28500 S&P 500 28250 28000 27750 Feb. 18 Source: FactSet MARCH 10 Ever SOURCE: BLOOMBERG MARCH 11 Feb. 20 NASDAQ MARCH 12 Feb. 24 Mand MARCH 13 DOW MARCH 16: -12.3% MARCH 16 FORTUNE,0 -5 -10 Dow Jones Industrial Average -20 % S&P Feb. Source : FactSet MARCH Ever SOURCE : BLOOMBERG MARCH Feb. NASDAQ MARCH Feb. Mand MARCH DOW MARCH : % MARCH FORTUNE,0 -5 -10 -15 Now Bones Industrial Average -20% 29500 29250 29000 28750 28500 S&P 500 28250 28000 27750 Web. 18 Source: FactSet MARCH 10 Over SOURCE: BLOOMBERG MARCH 11 Web. 20 NASDAQ MARCH 12 Web. 24 And MARCH 13 DOW MARCH 16: -12.3% MARCH 16 FORTUNE,-0.15,0.39999999999999997, Dow Jones Industrial Average -20% 29500 29250 29000 28750 28500 S&P 500 28250 28000 27750 Feb. 18 Source: FactSet MARCH 10 Ever SOURCE: -12.3% MARCH 11 Feb. 20 NASDAQ MARCH 12 Feb. 24 Mand MARCH 13 MarCH 13 DOW MARCH .,NEGATIVE,0.9472292065620422,"['Dow', 'Jones', 'Industrial', 'Average', 'S', '&', 'P', 'F', 'B', '##L', 'NAS', '##DA']","['I-MISC', 'I-MISC', 'I-MISC', 'I-MISC', 'I-ORG', 'I-ORG', 'I-ORG', 'I-ORG', 'I-PER', 'I-ORG', 'I-MISC', 'I-MISC']"
|
||||
5,../data/images-little-text/100605_mya.png,"Infection 1. Treatment of COVID-19 Upper Respiratory Tract Infection (Fervcr, runny nost cough without lung infiltrate positive PCR) 2. Treatment of COVID-19 Pneumonia PO SPECIMIN COLLECTION: 1-Combined nasopharyngeal oropharyngeal swab 2-If positive repeat every 3 days till negative 3-If negative repeat second test after 24 hours ရောဂါကာကွယ်ဆေးကို ရှာတွေ့ ပါပြီလို ကျန်းမာရေးဝန်ကြီး သတင်းကောင်းပေးပြီ Airborne and contact isolation is recommended for further information contact your infection control) practitioner Prepared by CDC MICC Team, version 1 (28-02-2020)",my,"Infection 1. Treatment of COVID-19 Upper Respiratory Tract Infection (Fervcr, runny nost cough without lung infiltrate positive PCR) 2. Treatment of COVID-19 Pneumonia P.O SPECIMIN COLLECTION: 1-Combined nasopharyngeal oropharyngeal swab 2-If positive repeat every 3 days until negative 3-If negative repeat second test after 24 hours The Minister of Health has given good news that a vaccine has been found Airborne and contact isolation is recommended for further information contact your infection control) practitioner Prepared by CDC MICC Team, version 1 (28-02-2020)","Infection . Treatment of COVID-19 Upper Respiratory Tract Infection ( , runny cough without lung infiltrate positive PCR ) . Treatment of COVID-19 Pneumonia P.O COLLECTION : 1 - Combined nasopharyngeal oropharyngeal swab - If positive repeat every days until negative - If negative repeat second test after hours The Minister of Health has given good news that a vaccine has been found Airborne and contact isolation is recommended for further information contact your infection control ) practitioner Prepared by CDC MICC Team , version ( - )","Infection 1. Treatment of COVID-19 Upper Respiratory Tract Infection (Fervor, funny not cough without lung infiltrate positive PCR) 2. Treatment of COVID-19 Pneumonia P.O SPECIMIN COLLECTION: 1-Combined nasopharyngeal oropharyngeal swab 2-Of positive repeat every 3 days until negative 3-Of negative repeat second test after 24 hours The Minister of Health has given good news that a vaccine has been found Airborne and contact isolation is recommended for further information contact your infection control) practitioner Prepared by CDC MICC Team, version 1 (28-02-2020)",0.0693181818181818,0.37386363636363634," The Minister of Health has given good news that a vaccine has been found . Airborne and contact isolation is recommended for contact isolation . CDC MICC Team, version 1 (28-02-2020) has prepared a vaccine for COVID-19 Pneumonia P.O. If positive repeat every 3 days until negative 3-If negative repeat every 2 days .",NEGATIVE,0.9936286807060242,"['CO', '##D', 'CO', '##D', 'Health', 'CD', '##C', 'MI', '##CC', 'Team']","['I-MISC', 'I-MISC', 'I-MISC', 'I-MISC', 'I-ORG', 'I-ORG', 'I-ORG', 'I-ORG', 'I-ORG', 'I-ORG']"
|
||||
6,../data/images-little-text/105169S_ell.png,Η ΣΥΝΘΕΣΗ ΤΟΥ ΕΜΒΟΛΙΟΥ ΜΠΟΡΕΙ ΝΑ ΠΡΟΚΑΛΕΣΕΙ PNOY MIOPE NA MPOKANESE METAMA Social Media ORM Group ΣΤΕΦΑΝΟΣ ΔΑΜΙΑΝΙΔΗΣ FOCUS FOCUS Ma 11 ΔΗΜΗΤΡΗΣ ΑΝΤΩΝΙΟΥ 0:08/18:49 ΙΑΤΡΟΣ ΜΕΤΑΛΛΑΞΗ ΣΕ ΣΠΕΡΜ... ΟΙ ΕΛΕΓΧΟΙ ΤΩΝ ΕΜΒΟΛΙΩΝ ΓΙΑ ΤΟΝ GOVID» «Η ΣΥΝΘΕΣΗ ΤΟΥ ΕΜΒΟΛΙΟΥ ΜΠΟΡΕΙ ΝΑ ΠΡΟΚΑΛΕΣΕΙ ΜΕΤΑΛΛΑΞΕΙΣ ΣΤΟ DNA ΣΠΕΡΜΑΤΟΖΩΑΡΙΩΝ ΚΑΙ ΩΑΡΙΩΝ» Recorded live ΓΕΝΕΤΙΚΑ ΤΡΟΠΟΠΟΙΗΜΕΝΟΣ ΟΡΓΑΝΙΣΜΟΣ ΜΕ ΑΠΡΟΒΛΕΠΤΕΣ ΣΥΝΕΠΕΙΕΣ Ο ΙΟΣ ΠΟΥ ΕΙΣΕΡΧΕΤΑΙ ΣΤΟ ΑΝΘΡΩΠΙΝΟ ΣΩΜΑ ΜΕ ΤΟ ΕΜΒΟΛΙΟ,el,"THE VACCINE COMPOSITION MAY CAUSE PNOY MIOPE NA MPOKANESE METAMA Social Media ORM Group STEFANOS DAMIANIDIS FOCUS FOCUS Ma 11 DIMITRIS ANTONIOU 0:08/18:49 PHYSICIAN MUTATION IN SPERM... THE CONTROLS OF VACCINES ABOUT GOVID"" ""THE COMPOSITION OF THE VACCINE CAN CAUSE MUTATIONS IN DNA SPERM AND OVA"" Recorded live GENETICALLY MODIFIED ORGANISM WITH UNEXPECTED CONSEQUENCES THE INCOMING VIRUS IN THE HUMAN BODY WITH THE VACCINE","THE VACCINE COMPOSITION MAY CAUSE NA Social Media ORM Group FOCUS FOCUS Ma PHYSICIAN MUTATION IN SPERM ... THE CONTROLS OF ABOUT "" "" THE COMPOSITION OF THE VACCINE CAN CAUSE IN DNA SPERM AND OVA "" Recorded live MODIFIED WITH UNEXPECTED CONSEQUENCES THE INCOMING VIRUS IN THE HUMAN BODY WITH THE VACCINE","THE VACCINE COMPOSITION MAY CAUSE PNOY MIOPE of MPOKANESE METAMA Social Media ORM Group STEFANOS DAMIANIDIS FOCUS FOCUS A 11 DIMITRIS ANTONIOU 0:08/18:49 PHYSICIAN MUTATION of SPERM... THE CONTROLS of VACCINES ABOUT GOVID"" ""THE COMPOSITION of THE VACCINE CAN CAUSE MUTATIONS of DNA SPERM AND OVA"" Recorded live GENETICALLY MODIFIED ORGANISM WITH UNEXPECTED CONSEQUENCES THE INCOMING VIRUS of THE HUMAN BODY WITH THE VACCINE",0.06742424242424241,0.4166666666666667," ""THE COMPOSITION of the VACCINE CAN CAUSE MUTATIONS in DNA SPERM AND OVA"" ""The VACCine may CAuse PNOY MIOPE NA MPOKANESE METAMA"" ""VACCine can CAuse MUTATION in the DNA of the human body... The Controls of VACCINES about GOVID""",NEGATIVE,0.9872211813926697,"['MP', '##OK', 'OR', '##M', 'Group', 'ST', '##EF', '##AN', '##OS', 'D', '##AM', '##NI', '##DI', '##IM', '##CI', 'H', '##UM']","['I-MISC', 'I-MISC', 'I-ORG', 'I-ORG', 'I-ORG', 'I-ORG', 'I-ORG', 'I-ORG', 'I-ORG', 'I-ORG', 'I-ORG', 'I-ORG', 'I-ORG', 'I-ORG', 'I-MISC', 'I-MISC', 'I-MISC']"
|
||||
7,../data/images-little-text/107454_sin.png,"න කොරෝනාව ලංකාවෙන්, He කර each onesian ජනපතිට හා අගමැතිට # වlima tha සී. 4 ပညာ ကြာပ ආඅපෙන් සුභ පැතුම් අවාධාරණ මුණ | o use laga lnya / 92k-to-tim",si,N Corona is from Sri Lanka. He did each onesian To the President and the Prime Minister # Valima tha C. 4 Wednesday Greetings from us Unreasonable meeting | o use laga lnya / 92k-to-tim,N Corona is from Sri Lanka . He did each To the President and the Prime Minister # tha C. Wednesday Greetings from us Unreasonable meeting | o use laga / - to - tim,N Corona is from Ri Vanka. He did each onesian To the President and the Crime Minister # Saliva the C. 4 Wednesday Greetings from us Unreasonable meeting | o use lata onya / ask-to-tim,0.0,0.0, N Corona is from Sri Lanka . He did each onesian To the President and the Prime Minister # Valima tha C. 4 Wednesday Greetings from us Unreasonable meeting . Corona is a member of the Sri Lankan National Democratic Party . He is a former President of Sri Lanka and Prime Minister of India .,NEGATIVE,0.9782203435897827,"['N', 'Co', '##rona', 'Sri', 'Lanka', 'Val']","['I-PER', 'I-PER', 'I-PER', 'I-LOC', 'I-LOC', 'I-PER']"
|
||||
8,../data/images-little-text/105822_mar.png,SH कोरोना से निर्णायक युद्ध... भारत कब तक वायरस मुक्त : प्रधानमंत्री की बड़ी बातें 1 करोड़ कोरोना मरीज़ों 6 का फ्री में इलाज किया गया LIVE BREAKING NEWS आयुष्मान भारत योजना से गरीबों को लाभ मिला- पीएम इंडिया टीवी SUN 11:24,hi,SH Decisive war with Corona... till when India is virus free: Prime Minister's big things 1 crore corona patients 6 treated for free Went LIVE BREAKING NEWS The poor benefited from the Ayushman Bharat scheme - PM India Tv Sun 11:24,SH Decisive war with Corona ... till when India is virus free : Prime Minister 's big things crore corona patients treated for free Went LIVE BREAKING NEWS The poor benefited from the Ayushman Bharat scheme - PM India Tv Sun,of Decisive war with Corona... till when India is virus free: Crime Minister's big things 1 core corona patients 6 treated for free Went LIVE BREAKING NEWS The poor benefited from the Ayushman Hart scheme - of India Iv Run 11:24,0.10727272727272727,0.56, The poor benefited from the Ayushman Bharat scheme - PM India Tv. SH Decisive war with Corona... till when India is virus free: Prime Minister's big things . 1 crore corona patients 6 treated for free; 6 treated free for free . PM India tv: The poor are among those who benefit from the scheme .,POSITIVE,0.969579815864563,"['Co', '##rona', 'India', 'A', '##yu', '##sh', '##man', 'B', '##hara', '##t', 'India', 'T', 'Sun']","['I-ORG', 'I-ORG', 'I-LOC', 'I-MISC', 'I-MISC', 'I-MISC', 'I-MISC', 'I-MISC', 'I-MISC', 'I-MISC', 'I-ORG', 'I-ORG', 'I-ORG']"
|
||||
9,../data/images-little-text/105840_ori.png,"NDTV India 57m 3 agr भूख-प्यास से बेहाल मां की स्टेशन पर ही मौत, जगाने की कोशिश करता रहा बच्चा NDTV INDIA संकट में प्रवासी मजटर भख-प्यास से बेहाल मां की ⠀",hi,"NDTV India 57m 3 agr Mother suffering from hunger and thirst died at the station itself, to wake up baby trying NDTV INDIA Migrant farmer in distress, mother suffering from hunger and thirst ","NDTV India agr Mother suffering from hunger and thirst died at the station itself , to wake up baby trying NDTV INDIA Migrant farmer in distress , mother suffering from hunger and thirst","NDTV India him 3 air Other suffering from hunger and thirst died at the station itself, to wake up baby trying NDTV INDIA Grant farmer in distress, mother suffering from hunger and thirst ",0.0,0.0," Mother suffering from hunger and thirst died at the station itself, to wake up baby trying . NDTV INDIA: Migrant farmer in distress in distress, mother in distress . Mother suffered from hunger, thirst and died at station itself . She was trying to wake her baby up from inside to wake him up .",NEGATIVE,0.9970040917396545,"['N', '##D', '##TV', 'India', 'N', '##D', '##TV', 'IN', '##DI', '##A']","['I-ORG', 'I-ORG', 'I-ORG', 'I-ORG', 'I-ORG', 'I-ORG', 'I-ORG', 'I-ORG', 'I-ORG', 'I-ORG']"
|
||||
|
42
notebooks/facial_expressions.ipynb
сгенерированный
42
notebooks/facial_expressions.ipynb
сгенерированный
@ -9,12 +9,15 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "51f8888b-d1a3-4b85-a596-95c0993fa192",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"This notebooks shows some preliminary work on detecting facial expressions with DeepFace. It is mainly meant to explore its capabilities and to decide on future research directions. We package our code into a `ammico` package that is imported here:"
|
||||
"Facial expressions can be detected using [DeepFace](https://github.com/serengil/deepface) and [RetinaFace](https://github.com/serengil/retinaface).\n",
|
||||
"\n",
|
||||
"The first cell is only run on google colab and installs the [ammico](https://github.com/ssciwr/AMMICO) package.\n",
|
||||
"\n",
|
||||
"After that, we can import `ammico` and read in the files given a folder path."
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -57,7 +60,7 @@
|
||||
"id": "a2bd2153",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We select a subset of image files to try facial expression detection on. The `find_files` function finds image files within a given directory:"
|
||||
"We select a subset of image files to try facial expression detection on, see the `limit` keyword. The `find_files` function finds image files within a given directory:"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -75,24 +78,6 @@
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e149bfe5-90b0-49b2-af3d-688e41aab019",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you want to fine tune the discovery of image files, you can provide more parameters:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f38bb8ed-1004-4e33-8ed6-793cb5869400",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"?mutils.find_files"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "705e7328",
|
||||
@ -117,7 +102,7 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"To check the analysis, you can inspect the analyzed elements here. Loading the results takes a moment, so please be patient. If you are sure of what you are doing, you can skip this and directly export a csv file in the step below.\n",
|
||||
"Here, we display the face recognition results provided by the DeepFace library. Click on the tabs to see the results in the right sidebar:"
|
||||
"Here, we display the face recognition results provided by the DeepFace and RetinaFace libraries. Click on the tabs to see the results in the right sidebar. You may need to increment the `port` number if you are already running several notebook instances on the same server."
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -127,7 +112,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"mdisplay.explore_analysis(mydict, identify=\"faces\")"
|
||||
"analysis_explorer = mdisplay.AnalysisExplorer(mydict, identify=\"faces\")\n",
|
||||
"analysis_explorer.run_server(port = 8050)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -135,7 +121,7 @@
|
||||
"id": "6f974341",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Directly carry out the analysis and export the result into a csv: Analysis - "
|
||||
"Instead of inspecting each of the images, you can also directly carry out the analysis and export the result into a csv. This may take a while depending on how many images you have loaded."
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -154,7 +140,7 @@
|
||||
"id": "174357b1",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Convert the dictionary of dictionarys into a dictionary with lists:"
|
||||
"These steps are required to convert the dictionary of dictionarys into a dictionary with lists, that can be converted into a pandas dataframe and exported to a csv file."
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -191,7 +177,7 @@
|
||||
"id": "579cd59f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Write the csv file:"
|
||||
"Write the csv file - here you should provide a file path and file name for the csv file to be written."
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -201,7 +187,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df.to_csv(\"drive/MyDrive/misinformation-data//data_out.csv\")"
|
||||
"df.to_csv(\"/content/drive/MyDrive/misinformation-data/data_out.csv\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -229,7 +215,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.5"
|
||||
"version": "3.9.16"
|
||||
},
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
|
||||
81
notebooks/get-text-from-image.ipynb
сгенерированный
81
notebooks/get-text-from-image.ipynb
сгенерированный
@ -6,7 +6,23 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Notebook for text extraction on image\n",
|
||||
"Inga Ulusoy, SSC, July 2022"
|
||||
"\n",
|
||||
"The text extraction and analysis is carried out using a variety of tools: \n",
|
||||
"\n",
|
||||
"1. Text extraction from the image using [google-cloud-vision](https://cloud.google.com/vision) \n",
|
||||
"1. Language detection of the extracted text using [Googletrans](https://py-googletrans.readthedocs.io/en/latest/) \n",
|
||||
"1. Translation into English or other languages using [Googletrans](https://py-googletrans.readthedocs.io/en/latest/) \n",
|
||||
"1. Cleaning of the text using [spacy](https://spacy.io/) \n",
|
||||
"1. Spell-check using [TextBlob](https://textblob.readthedocs.io/en/dev/index.html) \n",
|
||||
"1. Subjectivity analysis using [TextBlob](https://textblob.readthedocs.io/en/dev/index.html) \n",
|
||||
"1. Text summarization using [transformers](https://huggingface.co/docs/transformers/index) pipelines\n",
|
||||
"1. Sentiment analysis using [transformers](https://huggingface.co/docs/transformers/index) pipelines \n",
|
||||
"1. Named entity recognition using [transformers](https://huggingface.co/docs/transformers/index) pipelines \n",
|
||||
"1. Topic analysis using [BERTopic](https://github.com/MaartenGr/BERTopic) \n",
|
||||
"\n",
|
||||
"The first cell is only run on google colab and installs the [ammico](https://github.com/ssciwr/AMMICO) package.\n",
|
||||
"\n",
|
||||
"After that, we can import `ammico` and read in the files given a folder path."
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -45,6 +61,14 @@
|
||||
"from ammico import display as mdisplay"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "fddba721",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We select a subset of image files to try the text extraction on, see the `limit` keyword. The `find_files` function finds image files within a given directory: "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
@ -60,6 +84,14 @@
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3a7dfe11",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We need to initialize the main dictionary that contains all information for the images and is updated through each subsequent analysis:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
@ -75,8 +107,9 @@
|
||||
"id": "7b8b929f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# google cloud vision API\n",
|
||||
"First 1000 images per month are free."
|
||||
"# Google cloud vision API\n",
|
||||
"\n",
|
||||
"For this you need an API key and have the app activated in your google console. The first 1000 images per month are free (July 2022)."
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -96,7 +129,9 @@
|
||||
"id": "0891b795-c7fe-454c-a45d-45fadf788142",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Inspect the elements per image"
|
||||
"## Inspect the elements per image\n",
|
||||
"To check the analysis, you can inspect the analyzed elements here. Loading the results takes a moment, so please be patient. If you are sure of what you are doing, you can skip this and directly export a csv file in the step below.\n",
|
||||
"Here, we display the text extraction and translation results provided by the above libraries. Click on the tabs to see the results in the right sidebar. You may need to increment the `port` number if you are already running several notebook instances on the same server."
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -106,7 +141,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"mdisplay.explore_analysis(mydict, identify=\"text-on-image\")"
|
||||
"analysis_explorer = mdisplay.AnalysisExplorer(mydict, identify=\"text-on-image\")\n",
|
||||
"analysis_explorer.run_server(port=8054)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -114,7 +150,8 @@
|
||||
"id": "9c3e72b5-0e57-4019-b45e-3e36a74e7f52",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Or directly analyze for further processing"
|
||||
"## Or directly analyze for further processing\n",
|
||||
"Instead of inspecting each of the images, you can also directly carry out the analysis and export the result into a csv. This may take a while depending on how many images you have loaded. Set the keyword `analyse_text` to `True` if you want the text to be analyzed (spell check, subjectivity, text summary, sentiment, NER)."
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -125,7 +162,6 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"for key in mydict:\n",
|
||||
" print(key)\n",
|
||||
" mydict[key] = ammico.text.TextDetector(\n",
|
||||
" mydict[key], analyse_text=True\n",
|
||||
" ).analyse_image()"
|
||||
@ -136,7 +172,8 @@
|
||||
"id": "3c063eda",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Convert to dataframe and write csv"
|
||||
"## Convert to dataframe and write csv\n",
|
||||
"These steps are required to convert the dictionary of dictionarys into a dictionary with lists, that can be converted into a pandas dataframe and exported to a csv file."
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -150,6 +187,14 @@
|
||||
"df = mutils.dump_df(outdict)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ae182eb7",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Check the dataframe:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
@ -157,10 +202,17 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# check the dataframe\n",
|
||||
"df.head(10)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "eedf1e47",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Write the csv file - here you should provide a file path and file name for the csv file to be written."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
@ -169,7 +221,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Write the csv\n",
|
||||
"df.to_csv(\"./data_out.csv\")"
|
||||
"df.to_csv(\"/content/drive/MyDrive/misinformation-data/data_out.csv\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -187,6 +239,11 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"BERTopic takes a list of strings as input. The more items in the list, the better for the topic modeling. If the below returns an error for `analyse_topic()`, the reason can be that your dataset is too small.\n",
|
||||
"\n",
|
||||
"You can pass which dataframe entry you would like to have analyzed. The default is `text_english`, but you could for example also select `text_summary` or `text_english_correct` setting the keyword `analyze_text` as so:\n",
|
||||
"\n",
|
||||
"`ammico.text.PostprocessText(mydict=mydict, analyze_text=\"text_summary\").analyse_topic()`\n",
|
||||
"\n",
|
||||
"### Option 1: Use the dictionary as obtained from the above analysis."
|
||||
]
|
||||
},
|
||||
@ -219,7 +276,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"input_file_path = \"data_out.csv\"\n",
|
||||
"input_file_path = \"/content/drive/MyDrive/misinformation-data/data_out.csv\"\n",
|
||||
"topic_model, topic_df, most_frequent_topics = ammico.text.PostprocessText(\n",
|
||||
" use_csv=True, csv_path=input_file_path\n",
|
||||
").analyse_topic(return_topics=10)"
|
||||
@ -327,7 +384,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.5"
|
||||
"version": "3.9.16"
|
||||
},
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
|
||||
64
notebooks/image_summary.ipynb
сгенерированный
64
notebooks/image_summary.ipynb
сгенерированный
@ -8,11 +8,14 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"This notebooks shows some preliminary work on Image Captioning and Visual question answering with lavis. It is mainly meant to explore its capabilities and to decide on future research directions. We package our code into a `ammico` package that is imported here:"
|
||||
"This notebooks shows how to generate image captions and use the visual question answering with [LAVIS](https://github.com/salesforce/LAVIS). \n",
|
||||
"\n",
|
||||
"The first cell is only run on google colab and installs the [ammico](https://github.com/ssciwr/AMMICO) package.\n",
|
||||
"\n",
|
||||
"After that, we can import `ammico` and read in the files given a folder path."
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -51,13 +54,6 @@
|
||||
"import ammico.summary as sm"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Set an image path as input file path."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
@ -96,7 +92,9 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Here you can choose between two models: \"base\" or \"large\""
|
||||
"Here you can choose between two models: \"base\" or \"large\". This will generate the caption for each image and directly put the results in a dataframe. This dataframe can be exported as a csv file.\n",
|
||||
"\n",
|
||||
"The results are written into the columns `const_image_summary` - this will always be the same result (as always the same seed will be used). The column `3_non-deterministic summary` displays three different answers generated with different seeds, these are most likely different when you run the analysis again."
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -178,7 +176,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df.to_csv(\"./data_out.csv\")"
|
||||
"df.to_csv(\"/content/drive/MyDrive/misinformation-data/data_out.csv\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -202,7 +200,8 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"mdisplay.explore_analysis(mydict, identify=\"summary\")"
|
||||
"analysis_explorer = mdisplay.AnalysisExplorer(mydict, identify=\"summary\")\n",
|
||||
"analysis_explorer.run_server(port=8055)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -216,7 +215,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Set the list of questions"
|
||||
"Set the list of questions as a list of strings:"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -232,6 +231,31 @@
|
||||
"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Explore the analysis using the interface:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"analysis_explorer = mdisplay.AnalysisExplorer(mydict, identify=\"summary\")\n",
|
||||
"analysis_explorer.run_server(port=8055)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Or directly analyze for further processing\n",
|
||||
"Instead of inspecting each of the images, you can also directly carry out the analysis and export the result into a csv. This may take a while depending on how many images you have loaded."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
@ -242,20 +266,12 @@
|
||||
" mydict[key] = sm.SummaryDetector(mydict[key]).analyse_questions(list_of_questions)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"mdisplay.explore_analysis(mydict, identify=\"summary\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Convert the dictionary of dictionarys into a dictionary with lists:"
|
||||
"## Convert to dataframe and write csv\n",
|
||||
"These steps are required to convert the dictionary of dictionarys into a dictionary with lists, that can be converted into a pandas dataframe and exported to a csv file."
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -283,7 +299,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df2.to_csv(\"./data_out2.csv\")"
|
||||
"df2.to_csv(\"/content/drive/MyDrive/misinformation-data/data_out2.csv\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
57
notebooks/multimodal_search.ipynb
сгенерированный
57
notebooks/multimodal_search.ipynb
сгенерированный
@ -9,12 +9,15 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "9eeeb302-296e-48dc-86c7-254aa02f2b3a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"This notebooks shows some preliminary work on Image Multimodal Search with lavis library. It is mainly meant to explore its capabilities and to decide on future research directions. We package our code into a `ammico` package that is imported here:"
|
||||
"This notebooks shows how to carry out an image multimodal search with the [LAVIS](https://github.com/salesforce/LAVIS) library. \n",
|
||||
"\n",
|
||||
"The first cell is only run on google colab and installs the [ammico](https://github.com/ssciwr/AMMICO) package.\n",
|
||||
"\n",
|
||||
"After that, we can import `ammico` and read in the files given a folder path."
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -53,14 +56,6 @@
|
||||
"import ammico.multimodal_search as ms"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "acf08b44-3ea6-44cd-926d-15c0fd9f39e0",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Set an image path as input file path."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
@ -70,8 +65,6 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Here you need to provide the path to your google drive folder\n",
|
||||
"# or local folder containing the images\n",
|
||||
"images = mutils.find_files(\n",
|
||||
" path=\"/content/drive/MyDrive/misinformation-data/\",\n",
|
||||
" limit=10,\n",
|
||||
@ -103,7 +96,13 @@
|
||||
"id": "66d6ede4-00bc-4aeb-9a36-e52d7de33fe5",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can choose one of the following models: blip, blip2, albef, clip_base, clip_vitl14, clip_vitl14_336"
|
||||
"First you need to select a model. You can choose one of the following models: \n",
|
||||
"- [blip](https://github.com/salesforce/BLIP)\n",
|
||||
"- [blip2](https://huggingface.co/docs/transformers/main/model_doc/blip-2) \n",
|
||||
"- [albef](https://github.com/salesforce/ALBEF) \n",
|
||||
"- [clip_base](https://github.com/openai/CLIP/blob/main/model-card.md)\n",
|
||||
"- [clip_vitl14](https://github.com/mlfoundations/open_clip) \n",
|
||||
"- [clip_vitl14_336](https://github.com/mlfoundations/open_clip)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -123,6 +122,14 @@
|
||||
"# model_type = \"clip_vitl14_336\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "357828c9",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"To process the loaded images using the selected model, use the below code:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
@ -139,7 +146,11 @@
|
||||
" image_keys,\n",
|
||||
" image_names,\n",
|
||||
" features_image_stacked,\n",
|
||||
") = ms.MultimodalSearch.parsing_images(mydict, model_type, path_to_saved_tensors=\".\")"
|
||||
") = ms.MultimodalSearch.parsing_images(\n",
|
||||
" mydict, \n",
|
||||
" model_type, \n",
|
||||
" path_to_saved_tensors=\"/content/drive/MyDrive/misinformation-data/\"\n",
|
||||
" )"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -147,7 +158,9 @@
|
||||
"id": "9ff8a894-566b-4c4f-acca-21c50b5b1f52",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The tensors of all images `features_image_stacked` was saved in `<Number_of_images>_<model_name>_saved_features_image.pt`. If you run it once for current model and current set of images you do not need to repeat it again. Instead you can load this features with the command:"
|
||||
"The images are then processed and stored in a numerical representation, a tensor. These tensors do not change for the same image and same model - so if you run this analysis once, and save the tensors giving a path with the keyword `path_to_saved_tensors`, a file with filename `.<Number_of_images>_<model_name>_saved_features_image.pt` will be placed there.\n",
|
||||
"\n",
|
||||
"This will save you a lot of time if you want to analyse same images with the same model but different questions. To run using the saved tensors, execute the below code giving the path and name of the tensor file."
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -178,7 +191,7 @@
|
||||
"id": "309923c1-d6f8-4424-8fca-bde5f3a98b38",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Here we already processed our image folder with 18 images with `clip_base` model. So you need just write the name `18_clip_base_saved_features_image.pt` of the saved file that consists of tensors of all images as a 3rd argument to the previous function. "
|
||||
"Here we already processed our image folder with 5 images and the `clip_base` model. So you need just to write the name `5_clip_base_saved_features_image.pt` of the saved file that consists of tensors of all images as keyword argument for `path_to_load_tensors`. "
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -186,6 +199,8 @@
|
||||
"id": "162a52e8-6652-4897-b92e-645cab07aaef",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Formulate your search queries\n",
|
||||
"\n",
|
||||
"Next, you need to form search queries. You can search either by image or by text. You can search for a single query, or you can search for several queries at once, the computational time should not be much different. The format of the queries is as follows:"
|
||||
]
|
||||
},
|
||||
@ -286,7 +301,9 @@
|
||||
"id": "0b750e9f-fe64-4028-9caf-52d7187462f1",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"For even better results, a slightly different approach has been prepared that can improve search results. It is quite resource-intensive, so it is applied after the main algorithm has found the most relevant images. This approach works only with text queries. Among the parameters you can choose 3 models: `\"blip_base\"`, `\"blip_large\"`, `\"blip2_coco\"`. If you get the Out of Memory error, try reducing the batch_size value (minimum = 1), which is the number of images being processed simultaneously. With the parameter `need_grad_cam = True/False` you can enable the calculation of the heat map of each image to be processed. Thus the `image_text_match_reordering` function calculates new similarity values and new ranks for each image. The resulting values are added to the general dictionary."
|
||||
"## Improve the search results\n",
|
||||
"\n",
|
||||
"For even better results, a slightly different approach has been prepared that can improve search results. It is quite resource-intensive, so it is applied after the main algorithm has found the most relevant images. This approach works only with text queries. Among the parameters you can choose 3 models: `\"blip_base\"`, `\"blip_large\"`, `\"blip2_coco\"`. If you get an `Out of Memory` error, try reducing the batch_size value (minimum = 1), which is the number of images being processed simultaneously. With the parameter `need_grad_cam = True/False` you can enable the calculation of the heat map of each image to be processed. Thus the `image_text_match_reordering` function calculates new similarity values and new ranks for each image. The resulting values are added to the general dictionary."
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -352,7 +369,7 @@
|
||||
"tags": []
|
||||
},
|
||||
"source": [
|
||||
"## Save searhing results to csv"
|
||||
"## Save search results to csv"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -417,7 +434,7 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df.to_csv(\"./data_out.csv\")"
|
||||
"df.to_csv(\"/content/drive/MyDrive/misinformation-data/data_out.csv\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -445,7 +462,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.0"
|
||||
"version": "3.9.16"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
Двоичные данные
notebooks/obj_dect_cvlib/image.jpg
Двоичные данные
notebooks/obj_dect_cvlib/image.jpg
Двоичный файл не отображается.
|
До Ширина: | Высота: | Размер: 121 KiB |
Двоичные данные
notebooks/obj_dect_cvlib/image02.jpg
Двоичные данные
notebooks/obj_dect_cvlib/image02.jpg
Двоичный файл не отображается.
|
До Ширина: | Высота: | Размер: 792 KiB |
@ -1,103 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<span style =\" color : green ;font - weight : bold \">ImageAI for Object Detection</span>\n",
|
||||
"http://imageai.org/#features"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"A simple, high level, easy-to-use open source Computer Vision library for Python.\n",
|
||||
"\n",
|
||||
"It was developed with a focus on enabling easy and fast experimentation. Being able to go from an idea to prototype with least amount of delay is key to doing good research.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<p>cvlib detect_common_objects pretrained on coco dataset.</p>\n",
|
||||
"Underneath it uses YOLOv3 model trained on COCO dataset capable of detecting 80 common objects in context."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import cv2\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"import cvlib as cv\n",
|
||||
"from cvlib.object_detection import draw_bbox"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"im = cv2.imread(\"image.jpg\")\n",
|
||||
"\n",
|
||||
"bbox, label, conf = cv.detect_common_objects(im)\n",
|
||||
"\n",
|
||||
"output_image = draw_bbox(im, bbox, label, conf)\n",
|
||||
"\n",
|
||||
"plt.imshow(output_image)\n",
|
||||
"\n",
|
||||
"plt.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"im = cv2.imread(\"image02.jpg\")\n",
|
||||
"\n",
|
||||
"bbox, label, conf = cv.detect_common_objects(im)\n",
|
||||
"\n",
|
||||
"output_image = draw_bbox(im, bbox, label, conf)\n",
|
||||
"\n",
|
||||
"plt.imshow(output_image)\n",
|
||||
"\n",
|
||||
"plt.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@ -1,80 +0,0 @@
|
||||
person
|
||||
bicycle
|
||||
car
|
||||
motorcycle
|
||||
airplane
|
||||
bus
|
||||
train
|
||||
truck
|
||||
boat
|
||||
traffic light
|
||||
fire hydrant
|
||||
stop sign
|
||||
parking meter
|
||||
bench
|
||||
bird
|
||||
cat
|
||||
dog
|
||||
horse
|
||||
sheep
|
||||
cow
|
||||
elephant
|
||||
bear
|
||||
zebra
|
||||
giraffe
|
||||
backpack
|
||||
umbrella
|
||||
handbag
|
||||
tie
|
||||
suitcase
|
||||
frisbee
|
||||
skis
|
||||
snowboard
|
||||
sports ball
|
||||
kite
|
||||
baseball bat
|
||||
baseball glove
|
||||
skateboard
|
||||
surfboard
|
||||
tennis racket
|
||||
bottle
|
||||
wine glass
|
||||
cup
|
||||
fork
|
||||
knife
|
||||
spoon
|
||||
bowl
|
||||
banana
|
||||
apple
|
||||
sandwich
|
||||
orange
|
||||
broccoli
|
||||
carrot
|
||||
hot dog
|
||||
pizza
|
||||
donut
|
||||
cake
|
||||
chair
|
||||
couch
|
||||
potted plant
|
||||
bed
|
||||
dining table
|
||||
toilet
|
||||
tv
|
||||
laptop
|
||||
mouse
|
||||
remote
|
||||
keyboard
|
||||
cell phone
|
||||
microwave
|
||||
oven
|
||||
toaster
|
||||
sink
|
||||
refrigerator
|
||||
book
|
||||
clock
|
||||
vase
|
||||
scissors
|
||||
teddy bear
|
||||
hair drier
|
||||
toothbrush
|
||||
Двоичные данные
notebooks/obj_dect_imageai/image.jpg
Двоичные данные
notebooks/obj_dect_imageai/image.jpg
Двоичный файл не отображается.
|
До Ширина: | Высота: | Размер: 121 KiB |
Двоичные данные
notebooks/obj_dect_imageai/imagenew.jpg
Двоичные данные
notebooks/obj_dect_imageai/imagenew.jpg
Двоичный файл не отображается.
|
До Ширина: | Высота: | Размер: 126 KiB |
@ -1,147 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<span style =\" color : green ;font - weight : bold \">ImageAI for Object Detection</span>\n",
|
||||
"http://imageai.org/#features"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"ImageAI provides API to recognize 1000 different objects in a picture using pre-trained models that were trained on the ImageNet-1000 dataset. The model implementations provided are SqueezeNet, ResNet, InceptionV3 and DenseNet.\n",
|
||||
"</p>\n",
|
||||
"ImageAI provides API to detect, locate and identify 80 most common objects in everyday life in a picture using pre-trained models that were trained on the COCO Dataset. The model implementations provided include RetinaNet, YOLOv3 and TinyYOLOv3."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"There are 80 possible objects that you can detect with the\n",
|
||||
"ObjectDetection class, and they are as seen below.\n",
|
||||
"\n",
|
||||
" person, bicycle, car, motorcycle, airplane,\n",
|
||||
" bus, train, truck, boat, traffic light, fire hydrant, stop_sign,\n",
|
||||
" parking meter, bench, bird, cat, dog, horse, sheep, cow, elephant, bear, zebra,\n",
|
||||
" giraffe, backpack, umbrella, handbag, tie, suitcase, frisbee, skis, snowboard,\n",
|
||||
" sports ball, kite, baseball bat, baseball glove, skateboard, surfboard, tennis racket,\n",
|
||||
" bottle, wine glass, cup, fork, knife, spoon, bowl, banana, apple, sandwich, orange,\n",
|
||||
" broccoli, carrot, hot dog, pizza, donot, cake, chair, couch, potted plant, bed,\n",
|
||||
" dining table, toilet, tv, laptop, mouse, remote, keyboard, cell phone, microwave,\n",
|
||||
" oven, toaster, sink, refrigerator, book, clock, vase, scissors, teddy bear, hair dryer,\n",
|
||||
" toothbrush."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<p>requirements:</p>\n",
|
||||
"<p>tensorflow==1.15.0</p>\n",
|
||||
"<p>numpy==1.19.5</p>\n",
|
||||
"<p>scipy==1.4.1</p>\n",
|
||||
"<p>keras==2.1.0</p>\n",
|
||||
"<p>imageai==2.0.2</p>\n",
|
||||
"\n",
|
||||
"<p>Or update to newest version, see https://github.com/OlafenwaMoses/ImageAI</p>"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Download the RetinaNet model file for object detection\n",
|
||||
"\n",
|
||||
"https://github.com/OlafenwaMoses/ImageAI/releases/download/1.0/resnet50_coco_best_v2.0.1.h5"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from imageai.Detection import ObjectDetection\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"import skimage.io\n",
|
||||
"import os"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"execution_path = os.getcwd()\n",
|
||||
"\n",
|
||||
"detector = ObjectDetection()\n",
|
||||
"detector.setModelTypeAsRetinaNet()\n",
|
||||
"detector.setModelPath(os.path.join(execution_path, \"resnet50_coco_best_v2.0.1.h5\"))\n",
|
||||
"detector.loadModel()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"detections = detector.detectObjectsFromImage(\n",
|
||||
" input_image=os.path.join(execution_path, \"image.jpg\"),\n",
|
||||
" output_image_path=os.path.join(execution_path, \"imagenew.jpg\"),\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"for eachObject in detections:\n",
|
||||
" print(eachObject[\"name\"], \" : \", eachObject[\"percentage_probability\"])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"image = skimage.io.imread(\"image.jpg\")\n",
|
||||
"imagenew = skimage.io.imread(\"imagenew.jpg\")\n",
|
||||
"\n",
|
||||
"_, axis = plt.subplots(1, 2)\n",
|
||||
"axis[0].imshow(image, cmap=\"gray\")\n",
|
||||
"axis[1].imshow(imagenew, cmap=\"gray\")\n",
|
||||
"plt.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
66
notebooks/objects_expression.ipynb
сгенерированный
66
notebooks/objects_expression.ipynb
сгенерированный
@ -4,15 +4,18 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Objects Expression recognition"
|
||||
"# Objects recognition"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"This notebooks shows some preliminary work on detecting objects expressions with cvlib. It is mainly meant to explore its capabilities and to decide on future research directions. We package our code into a `ammico` package that is imported here:"
|
||||
"This notebooks shows how to detect objects quickly using [cvlib](https://github.com/arunponnusamy/cvlib) and the [YOLOv4](https://github.com/AlexeyAB/darknet) model. This library detects faces, people, and several inanimate objects; we currently have restricted the output to person, bicycle, car, motorcycle, airplane, bus, train, truck, boat, traffic light, cell phone.\n",
|
||||
"\n",
|
||||
"The first cell is only run on google colab and installs the [ammico](https://github.com/ssciwr/AMMICO) package.\n",
|
||||
"\n",
|
||||
"After that, we can import `ammico` and read in the files given a folder path."
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -83,7 +86,8 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Detect objects and directly write to csv"
|
||||
"## Detect objects and directly write to csv\n",
|
||||
"You can directly carry out the analysis and export the result into a csv. This may take a while depending on how many images you have loaded."
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -142,7 +146,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df.to_csv(\"./data_out.csv\")"
|
||||
"df.to_csv(\"/content/drive/MyDrive/misinformation-data/data_out.csv\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -151,7 +155,8 @@
|
||||
"source": [
|
||||
"## Manually inspect what was detected\n",
|
||||
"\n",
|
||||
"To check the analysis, you can inspect the analyzed elements here. Loading the results takes a moment, so please be patient. If you are sure of what you are doing."
|
||||
"To check the analysis, you can inspect the analyzed elements here. Loading the results takes a moment, so please be patient. If you are sure of what you are doing, you can directly export a csv file in the step above.\n",
|
||||
"Here, we display the object detection results provided by the above library. Click on the tabs to see the results in the right sidebar. You may need to increment the `port` number if you are already running several notebook instances on the same server."
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -160,51 +165,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"mdisplay.explore_analysis(mydict, identify=\"objects\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def localize_objects(path):\n",
|
||||
" \"\"\"Localize objects in the local image.\n",
|
||||
"\n",
|
||||
" Args:\n",
|
||||
" path: The path to the local file.\n",
|
||||
" \"\"\"\n",
|
||||
" from google.cloud import vision\n",
|
||||
"\n",
|
||||
" client = vision.ImageAnnotatorClient()\n",
|
||||
"\n",
|
||||
" with open(path, \"rb\") as image_file:\n",
|
||||
" content = image_file.read()\n",
|
||||
" image = vision.Image(content=content)\n",
|
||||
"\n",
|
||||
" objects = client.object_localization(image=image).localized_object_annotations\n",
|
||||
"\n",
|
||||
" print(\"Number of objects found: {}\".format(len(objects)))\n",
|
||||
" for object_ in objects:\n",
|
||||
" print(\"\\n{} (confidence: {})\".format(object_.name, object_.score))\n",
|
||||
" print(\"Normalized bounding polygon vertices: \")\n",
|
||||
" for vertex in object_.bounding_poly.normalized_vertices:\n",
|
||||
" print(\" - ({}, {})\".format(vertex.x, vertex.y))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\n",
|
||||
" \"GOOGLE_APPLICATION_CREDENTIALS\"\n",
|
||||
"] = \"../../misinformation-notes/seismic-bonfire-329406-412821a70264.json\"\n",
|
||||
"localize_objects(\"/home/iulusoy/Desktop/102141_2_eng.png\")"
|
||||
"analysis_explorer = mdisplay.AnalysisExplorer(mydict, identify=\"objects\")\n",
|
||||
"analysis_explorer.run_server(port=8056)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -231,7 +193,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.5"
|
||||
"version": "3.9.16"
|
||||
},
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
|
||||
@ -52,6 +52,9 @@ dependencies = [
|
||||
"google-cloud-vision",
|
||||
"setuptools",
|
||||
"opencv-contrib-python",
|
||||
"dash",
|
||||
"jupyter_dash",
|
||||
"dash_renderjson",
|
||||
]
|
||||
|
||||
[project.scripts]
|
||||
|
||||
Загрузка…
x
Ссылка в новой задаче
Block a user