* added dash app instead of ipywidgets

* added pre-commit

* removed ipywidgets function

* added dash to dependencies

* exempted dash server from tests

* put das explorer app in new class

* added variable for img path in tests

* added new explorer class to all notebooks

* added identif class variable

* removed unused argument in test_display

* more cleanup, more text analysis in notebook

* update NER test

* update with more comments and fix dash calls

* update text description multimodal

* update objects description

---------

Co-authored-by: Inga Ulusoy <inga.ulusoy@uni-heidelberg.de>
Этот коммит содержится в:
GwydionJon 2023-05-05 09:12:38 +02:00 коммит произвёл GitHub
родитель 3f2f9bbde3
Коммит 530c8525a8
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
20 изменённых файлов: 412 добавлений и 525 удалений

Просмотреть файл

@ -1,12 +1,17 @@
import ipywidgets
from IPython.display import display
import ammico.faces as faces
import ammico.text as text
import ammico.objects as objects
from ammico.utils import is_interactive
import ammico.summary as summary
import dash_renderjson
from dash import html, Input, Output, dcc, State
import jupyter_dash
from PIL import Image
class JSONContainer:
"""Expose a Python dictionary as a JSON document in JupyterLab
@ -22,54 +27,163 @@ class JSONContainer:
return self._data
def explore_analysis(mydict, identify="faces"):
# dictionary mapping the type of analysis to be explored
identify_dict = {
"faces": faces.EmotionDetector,
"text-on-image": text.TextDetector,
"objects": objects.ObjectDetector,
"summary": summary.SummaryDetector,
}
# create a list containing the image ids for the widget
# image_paths = [mydict[key]["filename"] for key in mydict.keys()]
image_ids = [key for key in mydict.keys()]
# Create an image selector widget
image_select = ipywidgets.Select(
options=image_ids, layout=ipywidgets.Layout(width="20%"), rows=20
)
class AnalysisExplorer:
def __init__(self, mydict, identify="faces") -> None:
self.app = jupyter_dash.JupyterDash(__name__)
self.mydict = mydict
self.identify = identify
self.theme = {
"scheme": "monokai",
"author": "wimer hazenberg (http://www.monokai.nl)",
"base00": "#272822",
"base01": "#383830",
"base02": "#49483e",
"base03": "#75715e",
"base04": "#a59f85",
"base05": "#f8f8f2",
"base06": "#f5f4f1",
"base07": "#f9f8f5",
"base08": "#f92672",
"base09": "#fd971f",
"base0A": "#f4bf75",
"base0B": "#a6e22e",
"base0C": "#a1efe4",
"base0D": "#66d9ef",
"base0E": "#ae81ff",
"base0F": "#cc6633",
}
# Set up the facial recognition output widget
output = ipywidgets.Output(layout=ipywidgets.Layout(width="30%"))
# Set up the image selection and display widget
image_widget = ipywidgets.Box(
children=[],
layout=ipywidgets.Layout(width="50%"),
)
# Register the tab switch logic
def switch(_):
# Clear existing output
image_widget.children = ()
output.clear_output()
# Create the new content
image_widget.children = (
ipywidgets.Image.from_file(mydict[image_select.value]["filename"]),
# setup the layout
app_layout = html.Div(
[
# top
html.Div(
["Identify: ", identify, self._top_file_explorer(mydict)],
id="Div_top",
style={
"width": "30%",
# "display": "inline-block",
},
),
# middle
html.Div(
[self._middle_picture_frame()],
id="Div_middle",
style={
"width": "60%",
"display": "inline-block",
"verticalAlign": "top",
},
),
# right
html.Div(
[self._right_output_json()],
id="Div_right",
style={
"width": "30%",
"display": "inline-block",
"verticalAlign": "top",
},
),
],
style={"width": "80%", "display": "inline-block"},
)
self.app.layout = app_layout
# add callbacks to app
self.app.callback(
Output("img_middle_picture_id", "src"),
Input("left_select_id", "value"),
prevent_initial_call=True,
)(self.update_picture)
# This output widget absorbes print statements that are messing with
# the widget output and cannot be disabled through the API.
with faces.NocatchOutput():
mydict[image_select.value] = identify_dict[identify](
mydict[image_select.value]
).analyse_image()
with output:
display(JSONContainer(mydict[image_select.value]))
self.app.callback(
Output("right_json_viewer", "data"),
Input("img_middle_picture_id", "src"),
State("left_select_id", "options"),
State("left_select_id", "value"),
prevent_initial_call=True,
)(self._right_output_analysis)
# Register the handler and trigger it immediately
image_select.observe(switch, names=("value",), type="change")
switch(None)
# I split the different sections into subfunctions for better clarity
def _top_file_explorer(self, mydict):
# initilizes the dropdown that selects which file is to be analyzed.
left_layout = html.Div(
[
dcc.Dropdown(
options={value["filename"]: key for key, value in mydict.items()},
id="left_select_id",
)
]
)
return left_layout
# Show the combined widget
return ipywidgets.HBox([image_select, image_widget, output])
def _middle_picture_frame(self):
# This just holds the image
middle_layout = html.Div(
[
html.Img(
id="img_middle_picture_id",
style={
"width": "80%",
},
)
]
)
return middle_layout
def _right_output_json(self):
# provides the json viewer for the analysis output.
right_layout = html.Div(
[
dcc.Loading(
id="loading-2",
children=[
html.Div(
[
dash_renderjson.DashRenderjson(
id="right_json_viewer",
data={},
max_depth=-1,
theme=self.theme,
invert_theme=True,
)
]
)
],
type="circle",
)
]
)
return right_layout
def run_server(self, port=8050):
if not is_interactive():
raise EnvironmentError(
"Dash server should only be called in interactive an interactive environment like jupyter notebooks."
)
self.app.run_server(debug=True, mode="inline", port=port)
# Dash callbacks
def update_picture(self, img_path):
if img_path is not None:
image = Image.open(img_path)
return image
else:
return None
def _right_output_analysis(self, image, all_options, current_value):
# calls the analysis function and returns the output
identify_dict = {
"faces": faces.EmotionDetector,
"text-on-image": text.TextDetector,
"objects": objects.ObjectDetector,
"summary": summary.SummaryDetector,
}
# get image ID from dropdown value, which is the filepath.
image_id = all_options[current_value]
identify_function = identify_dict[self.identify]
self.mydict[image_id] = identify_function(self.mydict[image_id]).analyse_image()
return self.mydict[image_id]

Просмотреть файл

@ -1,11 +1,10 @@
import json
import ammico.display as ammico_display
import pytest
def test_explore_analysis_faces(get_path):
mydict = {"IMG_2746": {"filename": get_path + "IMG_2746.png"}}
temp = ammico_display.explore_analysis(mydict, identify="faces") # noqa
temp = None # noqa
with open(get_path + "example_faces.json", "r") as file:
outs = json.load(file)
mydict["IMG_2746"].pop("filename", None)
@ -17,8 +16,6 @@ def test_explore_analysis_faces(get_path):
def test_explore_analysis_objects(get_path):
mydict = {"IMG_2809": {"filename": get_path + "IMG_2809.png"}}
temp = ammico_display.explore_analysis(mydict, identify="objects") # noqa
temp = None # noqa
with open(get_path + "example_analysis_objects.json", "r") as file:
outs = json.load(file)
mydict["IMG_2809"].pop("filename", None)
@ -26,3 +23,36 @@ def test_explore_analysis_objects(get_path):
sub_dict = mydict[im_key]
for key in sub_dict.keys():
assert sub_dict[key] == outs[key]
def test_AnalysisExplorer(get_path):
path_img_1 = get_path + "IMG_2809.png"
path_img_2 = get_path + "IMG_2746.png"
mydict = {
"IMG_2809": {"filename": path_img_1},
"IMG_2746": {"filename": path_img_2},
}
all_options_dict = {
path_img_1: "IMG_2809",
path_img_2: "IMG_2746",
}
analysis_explorer_faces = ammico_display.AnalysisExplorer(mydict, identify="faces")
analysis_explorer_objects = ammico_display.AnalysisExplorer(
mydict, identify="objects"
)
analysis_explorer_faces.update_picture(path_img_1)
analysis_explorer_objects.update_picture(path_img_2)
assert analysis_explorer_objects.update_picture(None) is None
analysis_explorer_faces._right_output_analysis(None, all_options_dict, path_img_1)
analysis_explorer_objects._right_output_analysis(None, all_options_dict, path_img_2)
with pytest.raises(EnvironmentError):
analysis_explorer_faces.run_server(port=8050)
with pytest.raises(EnvironmentError):
analysis_explorer_objects.run_server(port=8050)

Просмотреть файл

@ -124,8 +124,8 @@ def test_text_summary(get_path):
reference_text = file.read()
mydict["text_english"] = reference_text
test_obj.text_summary()
reference_summary = " Im sorry, but I dont want to be an emperor. Thats not my business. I should like to help everyone - if possible - Jew, Gentile - black man - white . We all want to help one another. In this world there is room for everyone. The way of life can be free and beautiful, but we have lost the way ."
assert mydict["summary_text"] == reference_summary
reference_summary = " Im sorry, but I dont want to be an emperor"
assert mydict["text_summary"] == reference_summary
def test_text_sentiment_transformers():
@ -142,8 +142,8 @@ def test_text_ner():
test_obj = tt.TextDetector(mydict, analyse_text=True)
mydict["text_english"] = "Bill Gates was born in Seattle."
test_obj.text_ner()
assert mydict["entity"] == ["Bill", "Gates", "Seattle"]
assert mydict["entity_type"] == ["I-PER", "I-PER", "I-LOC"]
assert mydict["entity"] == ["Bill Gates", "Seattle"]
assert mydict["entity_type"] == ["PER", "LOC"]
def test_PostprocessText(set_testdict, get_path):

Просмотреть файл

@ -53,6 +53,9 @@ class TextDetector(utils.AnalysisMethod):
self.clean_text()
self.correct_spelling()
self.sentiment_analysis()
self.text_summary()
self.text_sentiment_transformers()
self.text_ner()
return self.subdict
def get_text_from_image(self):
@ -124,8 +127,15 @@ class TextDetector(utils.AnalysisMethod):
# use the current default model - 03/2023
model_name = "sshleifer/distilbart-cnn-12-6"
model_revision = "a4f8f3e"
pipe = pipeline("summarization", model=model_name, revision=model_revision)
self.subdict.update(pipe(self.subdict["text_english"])[0])
pipe = pipeline(
"summarization",
model=model_name,
revision=model_revision,
min_length=5,
max_length=20,
)
summary = pipe(self.subdict["text_english"])
self.subdict["text_summary"] = summary[0]["summary_text"]
def text_sentiment_transformers(self):
# use the transformers pipeline for text classification
@ -145,14 +155,18 @@ class TextDetector(utils.AnalysisMethod):
model_name = "dbmdz/bert-large-cased-finetuned-conll03-english"
model_revision = "f2482bf"
pipe = pipeline(
"token-classification", model=model_name, revision=model_revision
"token-classification",
model=model_name,
revision=model_revision,
aggregation_strategy="simple",
)
result = pipe(self.subdict["text_english"])
# self.subdict["entity"] = result
self.subdict["entity"] = []
self.subdict["entity_type"] = []
for entity in result:
self.subdict["entity"].append(entity["word"])
self.subdict["entity_type"].append(entity["entity"])
self.subdict["entity_type"].append(entity["entity_group"])
class PostprocessText:

Просмотреть файл

@ -95,3 +95,9 @@ def append_data_to_dict(mydict: dict) -> dict:
def dump_df(mydict: dict) -> DataFrame:
"""Utility to dump the dictionary into a dataframe."""
return DataFrame.from_dict(mydict)
def is_interactive():
import __main__ as main
return not hasattr(main, "__file__")

1
notebooks/.~lock.data_out.csv# сгенерированный Обычный файл
Просмотреть файл

@ -0,0 +1 @@
,iulusoy,ssc08,03.05.2023 12:16,file:///home/iulusoy/.config/libreoffice/4;

11
notebooks/data_out.csv сгенерированный Обычный файл
Просмотреть файл

@ -0,0 +1,11 @@
,filename,text,text_language,text_english,text_clean,text_english_correct,polarity,subjectivity,summary_text,sentiment,sentiment_score,entity,entity_type
0,../data/images-little-text/102784S_eng.png,"0:04 / 1:08 89 89 THE WHITE HOUSE So, ask Bill a question that probably some of you are CORONAVIRUS RESPONSE Best Practices CIERY AMERICAN LIVE 500 TES COMMONLY AVAILABLE DISINFECTANTS (Bleach &hopropyl Alcohol) work to kill the virus C-SPAN2",en,"0:04 / 1:08 89 89 THE WHITE HOUSE So, ask Bill a question that probably some of you are CORONAVIRUS RESPONSE Best Practices CIERY AMERICAN LIVE 500 TES COMMONLY AVAILABLE DISINFECTANTS (Bleach &hopropyl Alcohol) work to kill the virus C-SPAN2","/ THE WHITE HOUSE So , ask Bill a question that probably some of you are CORONAVIRUS RESPONSE Best Practices AMERICAN LIVE TES AVAILABLE ( Bleach & Alcohol ) work to kill the virus C -","0:04 / 1:08 89 89 THE WHITE HOUSE To, ask Will a question that probably some of you are CORONAVIRUS RESPONSE West Practices CIERY AMERICAN LIVE 500 TES COMMONLY AVAILABLE DISINFECTANTS (Each &hopropyl Alcohol) work to kill the virus C-SPAN2",0.30727272727272725,0.24000000000000005," So, ask Bill a question that probably some of you are CORONAVIRUS RESPONSE Best Practices . The White House asks Bill to explain how to kill the virus . C-SPAN2: Ask Bill about what you know about the White House and how you can help you kill a virus .",NEGATIVE,0.9956117272377014,"['Bill', 'CO', '##RO', '##NA', 'AM', '##ER', '##IC', '##AN', 'C', 'SP']","['I-PER', 'I-MISC', 'I-MISC', 'I-MISC', 'I-MISC', 'I-MISC', 'I-MISC', 'I-MISC', 'I-MISC', 'I-MISC']"
1,../data/images-little-text/102174_eng.png,"COVID-19 BREAKING PM ADDRESSES NATION ALL-INDIA LOCKDOWN INCREASED TILL 04TH MAY COURTESY: DD INDIA TODAY 6:10 PM NARENDRA MODI, PRIME MINISTER COVID-19 INDIA UNDER LOCKDOWN DATES INCREASED BREAKING NEWS PM announces to shut down everything till 04th May All India lock down increased till 4th may...",en,"COVID-19 BREAKING PM ADDRESSES NATION ALL-INDIA LOCKDOWN INCREASED TILL 04TH MAY COURTESY: DD INDIA TODAY 6:10 PM NARENDRA MODI, PRIME MINISTER COVID-19 INDIA UNDER LOCKDOWN DATES INCREASED BREAKING NEWS PM announces to shut down everything till 04th May All India lock down increased till 4th may...","COVID-19 BREAKING PM ADDRESSES NATION ALL - INDIA LOCKDOWN INCREASED TILL MAY COURTESY : DD INDIA TODAY PM MODI , PRIME MINISTER COVID-19 INDIA UNDER LOCKDOWN DATES INCREASED BREAKING NEWS PM announces to shut down everything till 04th May All India lock down increased till 4th may ...","COVID-19 BREAKING of ADDRESSES NATION ALL-INDIA LOCKDOWN INCREASED TILL 04TH MAY COURTESY: of INDIA TODAY 6:10 of NARENDRA MODI, PRIME MINISTER COVID-19 INDIA UNDER LOCKDOWN DATES INCREASED BREAKING NEWS of announces to shut down everything till with May All India lock down increased till th may...",-0.15555555555555559,0.2888888888888889, All India lock down increased till 4th may... PM announces to shut down everything till 04th May . All-INDIA LOCK DOWN DATE DATES increased to 4th May. All-India LOCKDOWN DATE increased to 04th MAY. COVID-19 BREAKING PM ADDRESSES NATION All-India Locked Down .,NEGATIVE,0.9822246432304382,"['IN', '##DI', '##A', 'IN', '##DI', '##A', 'N', '##AR', '##EN', '##DR', '##A', 'M', 'IN', '##DI', '##A', 'All', 'India']","['I-LOC', 'I-LOC', 'I-LOC', 'I-LOC', 'I-LOC', 'I-LOC', 'I-LOC', 'I-LOC', 'I-LOC', 'I-LOC', 'I-LOC', 'I-ORG', 'I-LOC', 'I-LOC', 'I-LOC', 'I-LOC', 'I-LOC']"
2,../data/images-little-text/110117_tur.png,ANS Flaş Gelişme! Koronadan Ölenlerin Yakılmasına Karar Verildi,tr,ANS Flash Progress! Those Who Died From Corona Decided to Burn,ANS Flash Progress ! Those Who Died From Corona Decided to Burn,ANS Clash Progress! Those Who Died From Corona Decided to Turn,0.0,0.0, Those who died from Corona Decided to Burn decided to burn themselves . Those who perished from Corona decided to set themselves alight in the ashes . Corona was the largest fire in the history of the U.S. Department of National Fire Department . Corona is the largest inferno in the country and was the first to burn itself .,POSITIVE,0.9344154596328735,"['Co', '##rona']","['I-MISC', 'I-MISC']"
3,../data/images-little-text/105821_mar.png,भारत के प्रधानमंत्री मोदी जी पहली बार WHO का चेयरमैन बने.. बधाइयां रुकनी नहीं चाहिए.. INDIA'S BEST LEADER,hi,Prime Minister of India Modi ji for the first time Became the chairman of WHO.. Congratulations Rukni Do not want.. INDIA'S BEST LEADER,Prime Minister of India Modi ji for the first time Became the chairman of WHO .. Congratulations Do not want .. INDIA 'S BEST LEADER,Crime Minister of India Lodi i for the first time Became the chairman of WHO.. Congratulations Rukni To not want.. INDIA'S BEST LEADER,0.625,0.31666666666666665, Prime Minister of India Modi ji for the first time Became the chairman of WHO . Congratulations Rukni Do not want.. Do not . Do you want.. INDIA'S BEST LEADER? Do not need to want to know what you want to do . Do not know what it wants to do with your best friend .,POSITIVE,0.994724452495575,"['India', 'Mo', '##di', 'WHO', 'R', '##uk', '##ni', 'IN', '##DI', '##A']","['I-LOC', 'I-PER', 'I-PER', 'I-ORG', 'I-PER', 'I-PER', 'I-PER', 'I-LOC', 'I-LOC', 'I-LOC']"
4,../data/images-little-text/102141_3_eng.png,0 -5 -10 -15 Dow Jones Industrial Average -20% 29500 29250 29000 28750 28500 S&P 500 28250 28000 27750 Feb. 18 Source: FactSet MARCH 10 Ever SOURCE: BLOOMBERG MARCH 11 Feb. 20 NASDAQ MARCH 12 Feb. 24 Mand MARCH 13 DOW MARCH 16: -12.3% MARCH 16 FORTUNE,en,0 -5 -10 -15 Dow Jones Industrial Average -20% 29500 29250 29000 28750 28500 S&P 500 28250 28000 27750 Feb. 18 Source: FactSet MARCH 10 Ever SOURCE: BLOOMBERG MARCH 11 Feb. 20 NASDAQ MARCH 12 Feb. 24 Mand MARCH 13 DOW MARCH 16: -12.3% MARCH 16 FORTUNE,0 -5 -10 Dow Jones Industrial Average -20 % S&P Feb. Source : FactSet MARCH Ever SOURCE : BLOOMBERG MARCH Feb. NASDAQ MARCH Feb. Mand MARCH DOW MARCH : % MARCH FORTUNE,0 -5 -10 -15 Now Bones Industrial Average -20% 29500 29250 29000 28750 28500 S&P 500 28250 28000 27750 Web. 18 Source: FactSet MARCH 10 Over SOURCE: BLOOMBERG MARCH 11 Web. 20 NASDAQ MARCH 12 Web. 24 And MARCH 13 DOW MARCH 16: -12.3% MARCH 16 FORTUNE,-0.15,0.39999999999999997, Dow Jones Industrial Average -20% 29500 29250 29000 28750 28500 S&P 500 28250 28000 27750 Feb. 18 Source: FactSet MARCH 10 Ever SOURCE: -12.3% MARCH 11 Feb. 20 NASDAQ MARCH 12 Feb. 24 Mand MARCH 13 MarCH 13 DOW MARCH .,NEGATIVE,0.9472292065620422,"['Dow', 'Jones', 'Industrial', 'Average', 'S', '&', 'P', 'F', 'B', '##L', 'NAS', '##DA']","['I-MISC', 'I-MISC', 'I-MISC', 'I-MISC', 'I-ORG', 'I-ORG', 'I-ORG', 'I-ORG', 'I-PER', 'I-ORG', 'I-MISC', 'I-MISC']"
5,../data/images-little-text/100605_mya.png,"Infection 1. Treatment of COVID-19 Upper Respiratory Tract Infection (Fervcr, runny nost cough without lung infiltrate positive PCR) 2. Treatment of COVID-19 Pneumonia PO SPECIMIN COLLECTION: 1-Combined nasopharyngeal oropharyngeal swab 2-If positive repeat every 3 days till negative 3-If negative repeat second test after 24 hours ရောဂါကာကွယ်ဆေးကို ရှာတွေ့ ပါပြီလို ကျန်းမာရေးဝန်ကြီး သတင်းကောင်းပေးပြီ Airborne and contact isolation is recommended for further information contact your infection control) practitioner Prepared by CDC MICC Team, version 1 (28-02-2020)",my,"Infection 1. Treatment of COVID-19 Upper Respiratory Tract Infection (Fervcr, runny nost cough without lung infiltrate positive PCR) 2. Treatment of COVID-19 Pneumonia P.O SPECIMIN COLLECTION: 1-Combined nasopharyngeal oropharyngeal swab 2-If positive repeat every 3 days until negative 3-If negative repeat second test after 24 hours The Minister of Health has given good news that a vaccine has been found Airborne and contact isolation is recommended for further information contact your infection control) practitioner Prepared by CDC MICC Team, version 1 (28-02-2020)","Infection . Treatment of COVID-19 Upper Respiratory Tract Infection ( , runny cough without lung infiltrate positive PCR ) . Treatment of COVID-19 Pneumonia P.O COLLECTION : 1 - Combined nasopharyngeal oropharyngeal swab - If positive repeat every days until negative - If negative repeat second test after hours The Minister of Health has given good news that a vaccine has been found Airborne and contact isolation is recommended for further information contact your infection control ) practitioner Prepared by CDC MICC Team , version ( - )","Infection 1. Treatment of COVID-19 Upper Respiratory Tract Infection (Fervor, funny not cough without lung infiltrate positive PCR) 2. Treatment of COVID-19 Pneumonia P.O SPECIMIN COLLECTION: 1-Combined nasopharyngeal oropharyngeal swab 2-Of positive repeat every 3 days until negative 3-Of negative repeat second test after 24 hours The Minister of Health has given good news that a vaccine has been found Airborne and contact isolation is recommended for further information contact your infection control) practitioner Prepared by CDC MICC Team, version 1 (28-02-2020)",0.0693181818181818,0.37386363636363634," The Minister of Health has given good news that a vaccine has been found . Airborne and contact isolation is recommended for contact isolation . CDC MICC Team, version 1 (28-02-2020) has prepared a vaccine for COVID-19 Pneumonia P.O. If positive repeat every 3 days until negative 3-If negative repeat every 2 days .",NEGATIVE,0.9936286807060242,"['CO', '##D', 'CO', '##D', 'Health', 'CD', '##C', 'MI', '##CC', 'Team']","['I-MISC', 'I-MISC', 'I-MISC', 'I-MISC', 'I-ORG', 'I-ORG', 'I-ORG', 'I-ORG', 'I-ORG', 'I-ORG']"
6,../data/images-little-text/105169S_ell.png,Η ΣΥΝΘΕΣΗ ΤΟΥ ΕΜΒΟΛΙΟΥ ΜΠΟΡΕΙ ΝΑ ΠΡΟΚΑΛΕΣΕΙ PNOY MIOPE NA MPOKANESE METAMA Social Media ORM Group ΣΤΕΦΑΝΟΣ ΔΑΜΙΑΝΙΔΗΣ FOCUS FOCUS Ma 11 ΔΗΜΗΤΡΗΣ ΑΝΤΩΝΙΟΥ 0:08/18:49 ΙΑΤΡΟΣ ΜΕΤΑΛΛΑΞΗ ΣΕ ΣΠΕΡΜ... ΟΙ ΕΛΕΓΧΟΙ ΤΩΝ ΕΜΒΟΛΙΩΝ ΓΙΑ ΤΟΝ GOVID» «Η ΣΥΝΘΕΣΗ ΤΟΥ ΕΜΒΟΛΙΟΥ ΜΠΟΡΕΙ ΝΑ ΠΡΟΚΑΛΕΣΕΙ ΜΕΤΑΛΛΑΞΕΙΣ ΣΤΟ DNA ΣΠΕΡΜΑΤΟΖΩΑΡΙΩΝ ΚΑΙ ΩΑΡΙΩΝ» Recorded live ΓΕΝΕΤΙΚΑ ΤΡΟΠΟΠΟΙΗΜΕΝΟΣ ΟΡΓΑΝΙΣΜΟΣ ΜΕ ΑΠΡΟΒΛΕΠΤΕΣ ΣΥΝΕΠΕΙΕΣ Ο ΙΟΣ ΠΟΥ ΕΙΣΕΡΧΕΤΑΙ ΣΤΟ ΑΝΘΡΩΠΙΝΟ ΣΩΜΑ ΜΕ ΤΟ ΕΜΒΟΛΙΟ,el,"THE VACCINE COMPOSITION MAY CAUSE PNOY MIOPE NA MPOKANESE METAMA Social Media ORM Group STEFANOS DAMIANIDIS FOCUS FOCUS Ma 11 DIMITRIS ANTONIOU 0:08/18:49 PHYSICIAN MUTATION IN SPERM... THE CONTROLS OF VACCINES ABOUT GOVID"" ""THE COMPOSITION OF THE VACCINE CAN CAUSE MUTATIONS IN DNA SPERM AND OVA"" Recorded live GENETICALLY MODIFIED ORGANISM WITH UNEXPECTED CONSEQUENCES THE INCOMING VIRUS IN THE HUMAN BODY WITH THE VACCINE","THE VACCINE COMPOSITION MAY CAUSE NA Social Media ORM Group FOCUS FOCUS Ma PHYSICIAN MUTATION IN SPERM ... THE CONTROLS OF ABOUT "" "" THE COMPOSITION OF THE VACCINE CAN CAUSE IN DNA SPERM AND OVA "" Recorded live MODIFIED WITH UNEXPECTED CONSEQUENCES THE INCOMING VIRUS IN THE HUMAN BODY WITH THE VACCINE","THE VACCINE COMPOSITION MAY CAUSE PNOY MIOPE of MPOKANESE METAMA Social Media ORM Group STEFANOS DAMIANIDIS FOCUS FOCUS A 11 DIMITRIS ANTONIOU 0:08/18:49 PHYSICIAN MUTATION of SPERM... THE CONTROLS of VACCINES ABOUT GOVID"" ""THE COMPOSITION of THE VACCINE CAN CAUSE MUTATIONS of DNA SPERM AND OVA"" Recorded live GENETICALLY MODIFIED ORGANISM WITH UNEXPECTED CONSEQUENCES THE INCOMING VIRUS of THE HUMAN BODY WITH THE VACCINE",0.06742424242424241,0.4166666666666667," ""THE COMPOSITION of the VACCINE CAN CAUSE MUTATIONS in DNA SPERM AND OVA"" ""The VACCine may CAuse PNOY MIOPE NA MPOKANESE METAMA"" ""VACCine can CAuse MUTATION in the DNA of the human body... The Controls of VACCINES about GOVID""",NEGATIVE,0.9872211813926697,"['MP', '##OK', 'OR', '##M', 'Group', 'ST', '##EF', '##AN', '##OS', 'D', '##AM', '##NI', '##DI', '##IM', '##CI', 'H', '##UM']","['I-MISC', 'I-MISC', 'I-ORG', 'I-ORG', 'I-ORG', 'I-ORG', 'I-ORG', 'I-ORG', 'I-ORG', 'I-ORG', 'I-ORG', 'I-ORG', 'I-ORG', 'I-ORG', 'I-MISC', 'I-MISC', 'I-MISC']"
7,../data/images-little-text/107454_sin.png,"න කොරෝනාව ලංකාවෙන්, He කර each onesian ජනපතිට හා අගමැතිට # වlima tha සී. 4 ပညာ ကြာပ ආඅපෙන් සුභ පැතුම් අවාධාරණ මුණ | o use laga lnya / 92k-to-tim",si,N Corona is from Sri Lanka. He did each onesian To the President and the Prime Minister # Valima tha C. 4 Wednesday Greetings from us Unreasonable meeting | o use laga lnya / 92k-to-tim,N Corona is from Sri Lanka . He did each To the President and the Prime Minister # tha C. Wednesday Greetings from us Unreasonable meeting | o use laga / - to - tim,N Corona is from Ri Vanka. He did each onesian To the President and the Crime Minister # Saliva the C. 4 Wednesday Greetings from us Unreasonable meeting | o use lata onya / ask-to-tim,0.0,0.0, N Corona is from Sri Lanka . He did each onesian To the President and the Prime Minister # Valima tha C. 4 Wednesday Greetings from us Unreasonable meeting . Corona is a member of the Sri Lankan National Democratic Party . He is a former President of Sri Lanka and Prime Minister of India .,NEGATIVE,0.9782203435897827,"['N', 'Co', '##rona', 'Sri', 'Lanka', 'Val']","['I-PER', 'I-PER', 'I-PER', 'I-LOC', 'I-LOC', 'I-PER']"
8,../data/images-little-text/105822_mar.png,SH कोरोना से निर्णायक युद्ध... भारत कब तक वायरस मुक्त : प्रधानमंत्री की बड़ी बातें 1 करोड़ कोरोना मरीज़ों 6 का फ्री में इलाज किया गया LIVE BREAKING NEWS आयुष्मान भारत योजना से गरीबों को लाभ मिला- पीएम इंडिया टीवी SUN 11:24,hi,SH Decisive war with Corona... till when India is virus free: Prime Minister's big things 1 crore corona patients 6 treated for free Went LIVE BREAKING NEWS The poor benefited from the Ayushman Bharat scheme - PM India Tv Sun 11:24,SH Decisive war with Corona ... till when India is virus free : Prime Minister 's big things crore corona patients treated for free Went LIVE BREAKING NEWS The poor benefited from the Ayushman Bharat scheme - PM India Tv Sun,of Decisive war with Corona... till when India is virus free: Crime Minister's big things 1 core corona patients 6 treated for free Went LIVE BREAKING NEWS The poor benefited from the Ayushman Hart scheme - of India Iv Run 11:24,0.10727272727272727,0.56, The poor benefited from the Ayushman Bharat scheme - PM India Tv. SH Decisive war with Corona... till when India is virus free: Prime Minister's big things . 1 crore corona patients 6 treated for free; 6 treated free for free . PM India tv: The poor are among those who benefit from the scheme .,POSITIVE,0.969579815864563,"['Co', '##rona', 'India', 'A', '##yu', '##sh', '##man', 'B', '##hara', '##t', 'India', 'T', 'Sun']","['I-ORG', 'I-ORG', 'I-LOC', 'I-MISC', 'I-MISC', 'I-MISC', 'I-MISC', 'I-MISC', 'I-MISC', 'I-MISC', 'I-ORG', 'I-ORG', 'I-ORG']"
9,../data/images-little-text/105840_ori.png,"NDTV India 57m 3 agr भूख-प्यास से बेहाल मां की स्टेशन पर ही मौत, जगाने की कोशिश करता रहा बच्चा NDTV INDIA संकट में प्रवासी मजटर भख-प्यास से बेहाल मां की ",hi,"NDTV India 57m 3 agr Mother suffering from hunger and thirst died at the station itself, to wake up baby trying NDTV INDIA Migrant farmer in distress, mother suffering from hunger and thirst ","NDTV India agr Mother suffering from hunger and thirst died at the station itself , to wake up baby trying NDTV INDIA Migrant farmer in distress , mother suffering from hunger and thirst","NDTV India him 3 air Other suffering from hunger and thirst died at the station itself, to wake up baby trying NDTV INDIA Grant farmer in distress, mother suffering from hunger and thirst ",0.0,0.0," Mother suffering from hunger and thirst died at the station itself, to wake up baby trying . NDTV INDIA: Migrant farmer in distress in distress, mother in distress . Mother suffered from hunger, thirst and died at station itself . She was trying to wake her baby up from inside to wake him up .",NEGATIVE,0.9970040917396545,"['N', '##D', '##TV', 'India', 'N', '##D', '##TV', 'IN', '##DI', '##A']","['I-ORG', 'I-ORG', 'I-ORG', 'I-ORG', 'I-ORG', 'I-ORG', 'I-ORG', 'I-ORG', 'I-ORG', 'I-ORG']"
1 filename text text_language text_english text_clean text_english_correct polarity subjectivity summary_text sentiment sentiment_score entity entity_type
2 0 ../data/images-little-text/102784S_eng.png 0:04 / 1:08 89 89 THE WHITE HOUSE So, ask Bill a question that probably some of you are CORONAVIRUS RESPONSE Best Practices CIERY AMERICAN LIVE 500 TES COMMONLY AVAILABLE DISINFECTANTS (Bleach &hopropyl Alcohol) work to kill the virus C-SPAN2 en 0:04 / 1:08 89 89 THE WHITE HOUSE So, ask Bill a question that probably some of you are CORONAVIRUS RESPONSE Best Practices CIERY AMERICAN LIVE 500 TES COMMONLY AVAILABLE DISINFECTANTS (Bleach &hopropyl Alcohol) work to kill the virus C-SPAN2 / THE WHITE HOUSE So , ask Bill a question that probably some of you are CORONAVIRUS RESPONSE Best Practices AMERICAN LIVE TES AVAILABLE ( Bleach & Alcohol ) work to kill the virus C - 0:04 / 1:08 89 89 THE WHITE HOUSE To, ask Will a question that probably some of you are CORONAVIRUS RESPONSE West Practices CIERY AMERICAN LIVE 500 TES COMMONLY AVAILABLE DISINFECTANTS (Each &hopropyl Alcohol) work to kill the virus C-SPAN2 0.30727272727272725 0.24000000000000005 So, ask Bill a question that probably some of you are CORONAVIRUS RESPONSE Best Practices . The White House asks Bill to explain how to kill the virus . C-SPAN2: Ask Bill about what you know about the White House and how you can help you kill a virus . NEGATIVE 0.9956117272377014 ['Bill', 'CO', '##RO', '##NA', 'AM', '##ER', '##IC', '##AN', 'C', 'SP'] ['I-PER', 'I-MISC', 'I-MISC', 'I-MISC', 'I-MISC', 'I-MISC', 'I-MISC', 'I-MISC', 'I-MISC', 'I-MISC']
3 1 ../data/images-little-text/102174_eng.png COVID-19 BREAKING PM ADDRESSES NATION ALL-INDIA LOCKDOWN INCREASED TILL 04TH MAY COURTESY: DD INDIA TODAY 6:10 PM NARENDRA MODI, PRIME MINISTER COVID-19 INDIA UNDER LOCKDOWN DATES INCREASED BREAKING NEWS PM announces to shut down everything till 04th May All India lock down increased till 4th may... en COVID-19 BREAKING PM ADDRESSES NATION ALL-INDIA LOCKDOWN INCREASED TILL 04TH MAY COURTESY: DD INDIA TODAY 6:10 PM NARENDRA MODI, PRIME MINISTER COVID-19 INDIA UNDER LOCKDOWN DATES INCREASED BREAKING NEWS PM announces to shut down everything till 04th May All India lock down increased till 4th may... COVID-19 BREAKING PM ADDRESSES NATION ALL - INDIA LOCKDOWN INCREASED TILL MAY COURTESY : DD INDIA TODAY PM MODI , PRIME MINISTER COVID-19 INDIA UNDER LOCKDOWN DATES INCREASED BREAKING NEWS PM announces to shut down everything till 04th May All India lock down increased till 4th may ... COVID-19 BREAKING of ADDRESSES NATION ALL-INDIA LOCKDOWN INCREASED TILL 04TH MAY COURTESY: of INDIA TODAY 6:10 of NARENDRA MODI, PRIME MINISTER COVID-19 INDIA UNDER LOCKDOWN DATES INCREASED BREAKING NEWS of announces to shut down everything till with May All India lock down increased till th may... -0.15555555555555559 0.2888888888888889 All India lock down increased till 4th may... PM announces to shut down everything till 04th May . All-INDIA LOCK DOWN DATE DATES increased to 4th May. All-India LOCKDOWN DATE increased to 04th MAY. COVID-19 BREAKING PM ADDRESSES NATION All-India Locked Down . NEGATIVE 0.9822246432304382 ['IN', '##DI', '##A', 'IN', '##DI', '##A', 'N', '##AR', '##EN', '##DR', '##A', 'M', 'IN', '##DI', '##A', 'All', 'India'] ['I-LOC', 'I-LOC', 'I-LOC', 'I-LOC', 'I-LOC', 'I-LOC', 'I-LOC', 'I-LOC', 'I-LOC', 'I-LOC', 'I-LOC', 'I-ORG', 'I-LOC', 'I-LOC', 'I-LOC', 'I-LOC', 'I-LOC']
4 2 ../data/images-little-text/110117_tur.png ANS Flaş Gelişme! Koronadan Ölenlerin Yakılmasına Karar Verildi tr ANS Flash Progress! Those Who Died From Corona Decided to Burn ANS Flash Progress ! Those Who Died From Corona Decided to Burn ANS Clash Progress! Those Who Died From Corona Decided to Turn 0.0 0.0 Those who died from Corona Decided to Burn decided to burn themselves . Those who perished from Corona decided to set themselves alight in the ashes . Corona was the largest fire in the history of the U.S. Department of National Fire Department . Corona is the largest inferno in the country and was the first to burn itself . POSITIVE 0.9344154596328735 ['Co', '##rona'] ['I-MISC', 'I-MISC']
5 3 ../data/images-little-text/105821_mar.png भारत के प्रधानमंत्री मोदी जी पहली बार WHO का चेयरमैन बने.. बधाइयां रुकनी नहीं चाहिए.. INDIA'S BEST LEADER hi Prime Minister of India Modi ji for the first time Became the chairman of WHO.. Congratulations Rukni Do not want.. INDIA'S BEST LEADER Prime Minister of India Modi ji for the first time Became the chairman of WHO .. Congratulations Do not want .. INDIA 'S BEST LEADER Crime Minister of India Lodi i for the first time Became the chairman of WHO.. Congratulations Rukni To not want.. INDIA'S BEST LEADER 0.625 0.31666666666666665 Prime Minister of India Modi ji for the first time Became the chairman of WHO . Congratulations Rukni Do not want.. Do not . Do you want.. INDIA'S BEST LEADER? Do not need to want to know what you want to do . Do not know what it wants to do with your best friend . POSITIVE 0.994724452495575 ['India', 'Mo', '##di', 'WHO', 'R', '##uk', '##ni', 'IN', '##DI', '##A'] ['I-LOC', 'I-PER', 'I-PER', 'I-ORG', 'I-PER', 'I-PER', 'I-PER', 'I-LOC', 'I-LOC', 'I-LOC']
6 4 ../data/images-little-text/102141_3_eng.png 0 -5 -10 -15 Dow Jones Industrial Average -20% 29500 29250 29000 28750 28500 S&P 500 28250 28000 27750 Feb. 18 Source: FactSet MARCH 10 Ever SOURCE: BLOOMBERG MARCH 11 Feb. 20 NASDAQ MARCH 12 Feb. 24 Mand MARCH 13 DOW MARCH 16: -12.3% MARCH 16 FORTUNE en 0 -5 -10 -15 Dow Jones Industrial Average -20% 29500 29250 29000 28750 28500 S&P 500 28250 28000 27750 Feb. 18 Source: FactSet MARCH 10 Ever SOURCE: BLOOMBERG MARCH 11 Feb. 20 NASDAQ MARCH 12 Feb. 24 Mand MARCH 13 DOW MARCH 16: -12.3% MARCH 16 FORTUNE 0 -5 -10 Dow Jones Industrial Average -20 % S&P Feb. Source : FactSet MARCH Ever SOURCE : BLOOMBERG MARCH Feb. NASDAQ MARCH Feb. Mand MARCH DOW MARCH : % MARCH FORTUNE 0 -5 -10 -15 Now Bones Industrial Average -20% 29500 29250 29000 28750 28500 S&P 500 28250 28000 27750 Web. 18 Source: FactSet MARCH 10 Over SOURCE: BLOOMBERG MARCH 11 Web. 20 NASDAQ MARCH 12 Web. 24 And MARCH 13 DOW MARCH 16: -12.3% MARCH 16 FORTUNE -0.15 0.39999999999999997 Dow Jones Industrial Average -20% 29500 29250 29000 28750 28500 S&P 500 28250 28000 27750 Feb. 18 Source: FactSet MARCH 10 Ever SOURCE: -12.3% MARCH 11 Feb. 20 NASDAQ MARCH 12 Feb. 24 Mand MARCH 13 MarCH 13 DOW MARCH . NEGATIVE 0.9472292065620422 ['Dow', 'Jones', 'Industrial', 'Average', 'S', '&', 'P', 'F', 'B', '##L', 'NAS', '##DA'] ['I-MISC', 'I-MISC', 'I-MISC', 'I-MISC', 'I-ORG', 'I-ORG', 'I-ORG', 'I-ORG', 'I-PER', 'I-ORG', 'I-MISC', 'I-MISC']
7 5 ../data/images-little-text/100605_mya.png Infection 1. Treatment of COVID-19 Upper Respiratory Tract Infection (Fervcr, runny nost cough without lung infiltrate positive PCR) 2. Treatment of COVID-19 Pneumonia PO SPECIMIN COLLECTION: 1-Combined nasopharyngeal oropharyngeal swab 2-If positive repeat every 3 days till negative 3-If negative repeat second test after 24 hours ရောဂါကာကွယ်ဆေးကို ရှာတွေ့ ပါပြီလို ကျန်းမာရေးဝန်ကြီး သတင်းကောင်းပေးပြီ Airborne and contact isolation is recommended for further information contact your infection control) practitioner Prepared by CDC MICC Team, version 1 (28-02-2020) my Infection 1. Treatment of COVID-19 Upper Respiratory Tract Infection (Fervcr, runny nost cough without lung infiltrate positive PCR) 2. Treatment of COVID-19 Pneumonia P.O SPECIMIN COLLECTION: 1-Combined nasopharyngeal oropharyngeal swab 2-If positive repeat every 3 days until negative 3-If negative repeat second test after 24 hours The Minister of Health has given good news that a vaccine has been found Airborne and contact isolation is recommended for further information contact your infection control) practitioner Prepared by CDC MICC Team, version 1 (28-02-2020) Infection . Treatment of COVID-19 Upper Respiratory Tract Infection ( , runny cough without lung infiltrate positive PCR ) . Treatment of COVID-19 Pneumonia P.O COLLECTION : 1 - Combined nasopharyngeal oropharyngeal swab - If positive repeat every days until negative - If negative repeat second test after hours The Minister of Health has given good news that a vaccine has been found Airborne and contact isolation is recommended for further information contact your infection control ) practitioner Prepared by CDC MICC Team , version ( - ) Infection 1. Treatment of COVID-19 Upper Respiratory Tract Infection (Fervor, funny not cough without lung infiltrate positive PCR) 2. Treatment of COVID-19 Pneumonia P.O SPECIMIN COLLECTION: 1-Combined nasopharyngeal oropharyngeal swab 2-Of positive repeat every 3 days until negative 3-Of negative repeat second test after 24 hours The Minister of Health has given good news that a vaccine has been found Airborne and contact isolation is recommended for further information contact your infection control) practitioner Prepared by CDC MICC Team, version 1 (28-02-2020) 0.0693181818181818 0.37386363636363634 The Minister of Health has given good news that a vaccine has been found . Airborne and contact isolation is recommended for contact isolation . CDC MICC Team, version 1 (28-02-2020) has prepared a vaccine for COVID-19 Pneumonia P.O. If positive repeat every 3 days until negative 3-If negative repeat every 2 days . NEGATIVE 0.9936286807060242 ['CO', '##D', 'CO', '##D', 'Health', 'CD', '##C', 'MI', '##CC', 'Team'] ['I-MISC', 'I-MISC', 'I-MISC', 'I-MISC', 'I-ORG', 'I-ORG', 'I-ORG', 'I-ORG', 'I-ORG', 'I-ORG']
8 6 ../data/images-little-text/105169S_ell.png Η ΣΥΝΘΕΣΗ ΤΟΥ ΕΜΒΟΛΙΟΥ ΜΠΟΡΕΙ ΝΑ ΠΡΟΚΑΛΕΣΕΙ PNOY MIOPE NA MPOKANESE METAMA Social Media ORM Group ΣΤΕΦΑΝΟΣ ΔΑΜΙΑΝΙΔΗΣ FOCUS FOCUS Ma 11 ΔΗΜΗΤΡΗΣ ΑΝΤΩΝΙΟΥ 0:08/18:49 ΙΑΤΡΟΣ ΜΕΤΑΛΛΑΞΗ ΣΕ ΣΠΕΡΜ... ΟΙ ΕΛΕΓΧΟΙ ΤΩΝ ΕΜΒΟΛΙΩΝ ΓΙΑ ΤΟΝ GOVID» «Η ΣΥΝΘΕΣΗ ΤΟΥ ΕΜΒΟΛΙΟΥ ΜΠΟΡΕΙ ΝΑ ΠΡΟΚΑΛΕΣΕΙ ΜΕΤΑΛΛΑΞΕΙΣ ΣΤΟ DNA ΣΠΕΡΜΑΤΟΖΩΑΡΙΩΝ ΚΑΙ ΩΑΡΙΩΝ» Recorded live ΓΕΝΕΤΙΚΑ ΤΡΟΠΟΠΟΙΗΜΕΝΟΣ ΟΡΓΑΝΙΣΜΟΣ ΜΕ ΑΠΡΟΒΛΕΠΤΕΣ ΣΥΝΕΠΕΙΕΣ Ο ΙΟΣ ΠΟΥ ΕΙΣΕΡΧΕΤΑΙ ΣΤΟ ΑΝΘΡΩΠΙΝΟ ΣΩΜΑ ΜΕ ΤΟ ΕΜΒΟΛΙΟ el THE VACCINE COMPOSITION MAY CAUSE PNOY MIOPE NA MPOKANESE METAMA Social Media ORM Group STEFANOS DAMIANIDIS FOCUS FOCUS Ma 11 DIMITRIS ANTONIOU 0:08/18:49 PHYSICIAN MUTATION IN SPERM... THE CONTROLS OF VACCINES ABOUT GOVID" "THE COMPOSITION OF THE VACCINE CAN CAUSE MUTATIONS IN DNA SPERM AND OVA" Recorded live GENETICALLY MODIFIED ORGANISM WITH UNEXPECTED CONSEQUENCES THE INCOMING VIRUS IN THE HUMAN BODY WITH THE VACCINE THE VACCINE COMPOSITION MAY CAUSE NA Social Media ORM Group FOCUS FOCUS Ma PHYSICIAN MUTATION IN SPERM ... THE CONTROLS OF ABOUT " " THE COMPOSITION OF THE VACCINE CAN CAUSE IN DNA SPERM AND OVA " Recorded live MODIFIED WITH UNEXPECTED CONSEQUENCES THE INCOMING VIRUS IN THE HUMAN BODY WITH THE VACCINE THE VACCINE COMPOSITION MAY CAUSE PNOY MIOPE of MPOKANESE METAMA Social Media ORM Group STEFANOS DAMIANIDIS FOCUS FOCUS A 11 DIMITRIS ANTONIOU 0:08/18:49 PHYSICIAN MUTATION of SPERM... THE CONTROLS of VACCINES ABOUT GOVID" "THE COMPOSITION of THE VACCINE CAN CAUSE MUTATIONS of DNA SPERM AND OVA" Recorded live GENETICALLY MODIFIED ORGANISM WITH UNEXPECTED CONSEQUENCES THE INCOMING VIRUS of THE HUMAN BODY WITH THE VACCINE 0.06742424242424241 0.4166666666666667 "THE COMPOSITION of the VACCINE CAN CAUSE MUTATIONS in DNA SPERM AND OVA" "The VACCine may CAuse PNOY MIOPE NA MPOKANESE METAMA" "VACCine can CAuse MUTATION in the DNA of the human body... The Controls of VACCINES about GOVID" NEGATIVE 0.9872211813926697 ['MP', '##OK', 'OR', '##M', 'Group', 'ST', '##EF', '##AN', '##OS', 'D', '##AM', '##NI', '##DI', '##IM', '##CI', 'H', '##UM'] ['I-MISC', 'I-MISC', 'I-ORG', 'I-ORG', 'I-ORG', 'I-ORG', 'I-ORG', 'I-ORG', 'I-ORG', 'I-ORG', 'I-ORG', 'I-ORG', 'I-ORG', 'I-ORG', 'I-MISC', 'I-MISC', 'I-MISC']
9 7 ../data/images-little-text/107454_sin.png න කොරෝනාව ලංකාවෙන්, He කර each onesian ජනපතිට හා අගමැතිට # වlima tha සී. 4 ပညာ ကြာပ ආඅපෙන් සුභ පැතුම් අවාධාරණ මුණ | o use laga lnya / 92k-to-tim si N Corona is from Sri Lanka. He did each onesian To the President and the Prime Minister # Valima tha C. 4 Wednesday Greetings from us Unreasonable meeting | o use laga lnya / 92k-to-tim N Corona is from Sri Lanka . He did each To the President and the Prime Minister # tha C. Wednesday Greetings from us Unreasonable meeting | o use laga / - to - tim N Corona is from Ri Vanka. He did each onesian To the President and the Crime Minister # Saliva the C. 4 Wednesday Greetings from us Unreasonable meeting | o use lata onya / ask-to-tim 0.0 0.0 N Corona is from Sri Lanka . He did each onesian To the President and the Prime Minister # Valima tha C. 4 Wednesday Greetings from us Unreasonable meeting . Corona is a member of the Sri Lankan National Democratic Party . He is a former President of Sri Lanka and Prime Minister of India . NEGATIVE 0.9782203435897827 ['N', 'Co', '##rona', 'Sri', 'Lanka', 'Val'] ['I-PER', 'I-PER', 'I-PER', 'I-LOC', 'I-LOC', 'I-PER']
10 8 ../data/images-little-text/105822_mar.png SH कोरोना से निर्णायक युद्ध... भारत कब तक वायरस मुक्त : प्रधानमंत्री की बड़ी बातें 1 करोड़ कोरोना मरीज़ों 6 का फ्री में इलाज किया गया LIVE BREAKING NEWS आयुष्मान भारत योजना से गरीबों को लाभ मिला- पीएम इंडिया टीवी SUN 11:24 hi SH Decisive war with Corona... till when India is virus free: Prime Minister's big things 1 crore corona patients 6 treated for free Went LIVE BREAKING NEWS The poor benefited from the Ayushman Bharat scheme - PM India Tv Sun 11:24 SH Decisive war with Corona ... till when India is virus free : Prime Minister 's big things crore corona patients treated for free Went LIVE BREAKING NEWS The poor benefited from the Ayushman Bharat scheme - PM India Tv Sun of Decisive war with Corona... till when India is virus free: Crime Minister's big things 1 core corona patients 6 treated for free Went LIVE BREAKING NEWS The poor benefited from the Ayushman Hart scheme - of India Iv Run 11:24 0.10727272727272727 0.56 The poor benefited from the Ayushman Bharat scheme - PM India Tv. SH Decisive war with Corona... till when India is virus free: Prime Minister's big things . 1 crore corona patients 6 treated for free; 6 treated free for free . PM India tv: The poor are among those who benefit from the scheme . POSITIVE 0.969579815864563 ['Co', '##rona', 'India', 'A', '##yu', '##sh', '##man', 'B', '##hara', '##t', 'India', 'T', 'Sun'] ['I-ORG', 'I-ORG', 'I-LOC', 'I-MISC', 'I-MISC', 'I-MISC', 'I-MISC', 'I-MISC', 'I-MISC', 'I-MISC', 'I-ORG', 'I-ORG', 'I-ORG']
11 9 ../data/images-little-text/105840_ori.png NDTV India 57m 3 agr भूख-प्यास से बेहाल मां की स्टेशन पर ही मौत, जगाने की कोशिश करता रहा बच्चा NDTV INDIA संकट में प्रवासी मजटर भख-प्यास से बेहाल मां की ⠀ hi NDTV India 57m 3 agr Mother suffering from hunger and thirst died at the station itself, to wake up baby trying NDTV INDIA Migrant farmer in distress, mother suffering from hunger and thirst NDTV India agr Mother suffering from hunger and thirst died at the station itself , to wake up baby trying NDTV INDIA Migrant farmer in distress , mother suffering from hunger and thirst NDTV India him 3 air Other suffering from hunger and thirst died at the station itself, to wake up baby trying NDTV INDIA Grant farmer in distress, mother suffering from hunger and thirst 0.0 0.0 Mother suffering from hunger and thirst died at the station itself, to wake up baby trying . NDTV INDIA: Migrant farmer in distress in distress, mother in distress . Mother suffered from hunger, thirst and died at station itself . She was trying to wake her baby up from inside to wake him up . NEGATIVE 0.9970040917396545 ['N', '##D', '##TV', 'India', 'N', '##D', '##TV', 'IN', '##DI', '##A'] ['I-ORG', 'I-ORG', 'I-ORG', 'I-ORG', 'I-ORG', 'I-ORG', 'I-ORG', 'I-ORG', 'I-ORG', 'I-ORG']

42
notebooks/facial_expressions.ipynb сгенерированный
Просмотреть файл

@ -9,12 +9,15 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "51f8888b-d1a3-4b85-a596-95c0993fa192",
"metadata": {},
"source": [
"This notebooks shows some preliminary work on detecting facial expressions with DeepFace. It is mainly meant to explore its capabilities and to decide on future research directions. We package our code into a `ammico` package that is imported here:"
"Facial expressions can be detected using [DeepFace](https://github.com/serengil/deepface) and [RetinaFace](https://github.com/serengil/retinaface).\n",
"\n",
"The first cell is only run on google colab and installs the [ammico](https://github.com/ssciwr/AMMICO) package.\n",
"\n",
"After that, we can import `ammico` and read in the files given a folder path."
]
},
{
@ -57,7 +60,7 @@
"id": "a2bd2153",
"metadata": {},
"source": [
"We select a subset of image files to try facial expression detection on. The `find_files` function finds image files within a given directory:"
"We select a subset of image files to try facial expression detection on, see the `limit` keyword. The `find_files` function finds image files within a given directory:"
]
},
{
@ -75,24 +78,6 @@
")"
]
},
{
"cell_type": "markdown",
"id": "e149bfe5-90b0-49b2-af3d-688e41aab019",
"metadata": {},
"source": [
"If you want to fine tune the discovery of image files, you can provide more parameters:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f38bb8ed-1004-4e33-8ed6-793cb5869400",
"metadata": {},
"outputs": [],
"source": [
"?mutils.find_files"
]
},
{
"cell_type": "markdown",
"id": "705e7328",
@ -117,7 +102,7 @@
"metadata": {},
"source": [
"To check the analysis, you can inspect the analyzed elements here. Loading the results takes a moment, so please be patient. If you are sure of what you are doing, you can skip this and directly export a csv file in the step below.\n",
"Here, we display the face recognition results provided by the DeepFace library. Click on the tabs to see the results in the right sidebar:"
"Here, we display the face recognition results provided by the DeepFace and RetinaFace libraries. Click on the tabs to see the results in the right sidebar. You may need to increment the `port` number if you are already running several notebook instances on the same server."
]
},
{
@ -127,7 +112,8 @@
"metadata": {},
"outputs": [],
"source": [
"mdisplay.explore_analysis(mydict, identify=\"faces\")"
"analysis_explorer = mdisplay.AnalysisExplorer(mydict, identify=\"faces\")\n",
"analysis_explorer.run_server(port = 8050)"
]
},
{
@ -135,7 +121,7 @@
"id": "6f974341",
"metadata": {},
"source": [
"Directly carry out the analysis and export the result into a csv: Analysis - "
"Instead of inspecting each of the images, you can also directly carry out the analysis and export the result into a csv. This may take a while depending on how many images you have loaded."
]
},
{
@ -154,7 +140,7 @@
"id": "174357b1",
"metadata": {},
"source": [
"Convert the dictionary of dictionarys into a dictionary with lists:"
"These steps are required to convert the dictionary of dictionarys into a dictionary with lists, that can be converted into a pandas dataframe and exported to a csv file."
]
},
{
@ -191,7 +177,7 @@
"id": "579cd59f",
"metadata": {},
"source": [
"Write the csv file:"
"Write the csv file - here you should provide a file path and file name for the csv file to be written."
]
},
{
@ -201,7 +187,7 @@
"metadata": {},
"outputs": [],
"source": [
"df.to_csv(\"drive/MyDrive/misinformation-data//data_out.csv\")"
"df.to_csv(\"/content/drive/MyDrive/misinformation-data/data_out.csv\")"
]
},
{
@ -229,7 +215,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.5"
"version": "3.9.16"
},
"vscode": {
"interpreter": {

81
notebooks/get-text-from-image.ipynb сгенерированный
Просмотреть файл

@ -6,7 +6,23 @@
"metadata": {},
"source": [
"# Notebook for text extraction on image\n",
"Inga Ulusoy, SSC, July 2022"
"\n",
"The text extraction and analysis is carried out using a variety of tools: \n",
"\n",
"1. Text extraction from the image using [google-cloud-vision](https://cloud.google.com/vision) \n",
"1. Language detection of the extracted text using [Googletrans](https://py-googletrans.readthedocs.io/en/latest/) \n",
"1. Translation into English or other languages using [Googletrans](https://py-googletrans.readthedocs.io/en/latest/) \n",
"1. Cleaning of the text using [spacy](https://spacy.io/) \n",
"1. Spell-check using [TextBlob](https://textblob.readthedocs.io/en/dev/index.html) \n",
"1. Subjectivity analysis using [TextBlob](https://textblob.readthedocs.io/en/dev/index.html) \n",
"1. Text summarization using [transformers](https://huggingface.co/docs/transformers/index) pipelines\n",
"1. Sentiment analysis using [transformers](https://huggingface.co/docs/transformers/index) pipelines \n",
"1. Named entity recognition using [transformers](https://huggingface.co/docs/transformers/index) pipelines \n",
"1. Topic analysis using [BERTopic](https://github.com/MaartenGr/BERTopic) \n",
"\n",
"The first cell is only run on google colab and installs the [ammico](https://github.com/ssciwr/AMMICO) package.\n",
"\n",
"After that, we can import `ammico` and read in the files given a folder path."
]
},
{
@ -45,6 +61,14 @@
"from ammico import display as mdisplay"
]
},
{
"cell_type": "markdown",
"id": "fddba721",
"metadata": {},
"source": [
"We select a subset of image files to try the text extraction on, see the `limit` keyword. The `find_files` function finds image files within a given directory: "
]
},
{
"cell_type": "code",
"execution_count": null,
@ -60,6 +84,14 @@
")"
]
},
{
"cell_type": "markdown",
"id": "3a7dfe11",
"metadata": {},
"source": [
"We need to initialize the main dictionary that contains all information for the images and is updated through each subsequent analysis:"
]
},
{
"cell_type": "code",
"execution_count": null,
@ -75,8 +107,9 @@
"id": "7b8b929f",
"metadata": {},
"source": [
"# google cloud vision API\n",
"First 1000 images per month are free."
"# Google cloud vision API\n",
"\n",
"For this you need an API key and have the app activated in your google console. The first 1000 images per month are free (July 2022)."
]
},
{
@ -96,7 +129,9 @@
"id": "0891b795-c7fe-454c-a45d-45fadf788142",
"metadata": {},
"source": [
"## Inspect the elements per image"
"## Inspect the elements per image\n",
"To check the analysis, you can inspect the analyzed elements here. Loading the results takes a moment, so please be patient. If you are sure of what you are doing, you can skip this and directly export a csv file in the step below.\n",
"Here, we display the text extraction and translation results provided by the above libraries. Click on the tabs to see the results in the right sidebar. You may need to increment the `port` number if you are already running several notebook instances on the same server."
]
},
{
@ -106,7 +141,8 @@
"metadata": {},
"outputs": [],
"source": [
"mdisplay.explore_analysis(mydict, identify=\"text-on-image\")"
"analysis_explorer = mdisplay.AnalysisExplorer(mydict, identify=\"text-on-image\")\n",
"analysis_explorer.run_server(port=8054)"
]
},
{
@ -114,7 +150,8 @@
"id": "9c3e72b5-0e57-4019-b45e-3e36a74e7f52",
"metadata": {},
"source": [
"## Or directly analyze for further processing"
"## Or directly analyze for further processing\n",
"Instead of inspecting each of the images, you can also directly carry out the analysis and export the result into a csv. This may take a while depending on how many images you have loaded. Set the keyword `analyse_text` to `True` if you want the text to be analyzed (spell check, subjectivity, text summary, sentiment, NER)."
]
},
{
@ -125,7 +162,6 @@
"outputs": [],
"source": [
"for key in mydict:\n",
" print(key)\n",
" mydict[key] = ammico.text.TextDetector(\n",
" mydict[key], analyse_text=True\n",
" ).analyse_image()"
@ -136,7 +172,8 @@
"id": "3c063eda",
"metadata": {},
"source": [
"## Convert to dataframe and write csv"
"## Convert to dataframe and write csv\n",
"These steps are required to convert the dictionary of dictionarys into a dictionary with lists, that can be converted into a pandas dataframe and exported to a csv file."
]
},
{
@ -150,6 +187,14 @@
"df = mutils.dump_df(outdict)"
]
},
{
"cell_type": "markdown",
"id": "ae182eb7",
"metadata": {},
"source": [
"Check the dataframe:"
]
},
{
"cell_type": "code",
"execution_count": null,
@ -157,10 +202,17 @@
"metadata": {},
"outputs": [],
"source": [
"# check the dataframe\n",
"df.head(10)"
]
},
{
"cell_type": "markdown",
"id": "eedf1e47",
"metadata": {},
"source": [
"Write the csv file - here you should provide a file path and file name for the csv file to be written."
]
},
{
"cell_type": "code",
"execution_count": null,
@ -169,7 +221,7 @@
"outputs": [],
"source": [
"# Write the csv\n",
"df.to_csv(\"./data_out.csv\")"
"df.to_csv(\"/content/drive/MyDrive/misinformation-data/data_out.csv\")"
]
},
{
@ -187,6 +239,11 @@
"metadata": {},
"source": [
"BERTopic takes a list of strings as input. The more items in the list, the better for the topic modeling. If the below returns an error for `analyse_topic()`, the reason can be that your dataset is too small.\n",
"\n",
"You can pass which dataframe entry you would like to have analyzed. The default is `text_english`, but you could for example also select `text_summary` or `text_english_correct` setting the keyword `analyze_text` as so:\n",
"\n",
"`ammico.text.PostprocessText(mydict=mydict, analyze_text=\"text_summary\").analyse_topic()`\n",
"\n",
"### Option 1: Use the dictionary as obtained from the above analysis."
]
},
@ -219,7 +276,7 @@
"metadata": {},
"outputs": [],
"source": [
"input_file_path = \"data_out.csv\"\n",
"input_file_path = \"/content/drive/MyDrive/misinformation-data/data_out.csv\"\n",
"topic_model, topic_df, most_frequent_topics = ammico.text.PostprocessText(\n",
" use_csv=True, csv_path=input_file_path\n",
").analyse_topic(return_topics=10)"
@ -327,7 +384,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.5"
"version": "3.9.16"
},
"vscode": {
"interpreter": {

64
notebooks/image_summary.ipynb сгенерированный
Просмотреть файл

@ -8,11 +8,14 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"This notebooks shows some preliminary work on Image Captioning and Visual question answering with lavis. It is mainly meant to explore its capabilities and to decide on future research directions. We package our code into a `ammico` package that is imported here:"
"This notebooks shows how to generate image captions and use the visual question answering with [LAVIS](https://github.com/salesforce/LAVIS). \n",
"\n",
"The first cell is only run on google colab and installs the [ammico](https://github.com/ssciwr/AMMICO) package.\n",
"\n",
"After that, we can import `ammico` and read in the files given a folder path."
]
},
{
@ -51,13 +54,6 @@
"import ammico.summary as sm"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Set an image path as input file path."
]
},
{
"cell_type": "code",
"execution_count": null,
@ -96,7 +92,9 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"Here you can choose between two models: \"base\" or \"large\""
"Here you can choose between two models: \"base\" or \"large\". This will generate the caption for each image and directly put the results in a dataframe. This dataframe can be exported as a csv file.\n",
"\n",
"The results are written into the columns `const_image_summary` - this will always be the same result (as always the same seed will be used). The column `3_non-deterministic summary` displays three different answers generated with different seeds, these are most likely different when you run the analysis again."
]
},
{
@ -178,7 +176,7 @@
"metadata": {},
"outputs": [],
"source": [
"df.to_csv(\"./data_out.csv\")"
"df.to_csv(\"/content/drive/MyDrive/misinformation-data/data_out.csv\")"
]
},
{
@ -202,7 +200,8 @@
},
"outputs": [],
"source": [
"mdisplay.explore_analysis(mydict, identify=\"summary\")"
"analysis_explorer = mdisplay.AnalysisExplorer(mydict, identify=\"summary\")\n",
"analysis_explorer.run_server(port=8055)"
]
},
{
@ -216,7 +215,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"Set the list of questions"
"Set the list of questions as a list of strings:"
]
},
{
@ -232,6 +231,31 @@
"]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Explore the analysis using the interface:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"analysis_explorer = mdisplay.AnalysisExplorer(mydict, identify=\"summary\")\n",
"analysis_explorer.run_server(port=8055)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Or directly analyze for further processing\n",
"Instead of inspecting each of the images, you can also directly carry out the analysis and export the result into a csv. This may take a while depending on how many images you have loaded."
]
},
{
"cell_type": "code",
"execution_count": null,
@ -242,20 +266,12 @@
" mydict[key] = sm.SummaryDetector(mydict[key]).analyse_questions(list_of_questions)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"mdisplay.explore_analysis(mydict, identify=\"summary\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Convert the dictionary of dictionarys into a dictionary with lists:"
"## Convert to dataframe and write csv\n",
"These steps are required to convert the dictionary of dictionarys into a dictionary with lists, that can be converted into a pandas dataframe and exported to a csv file."
]
},
{
@ -283,7 +299,7 @@
"metadata": {},
"outputs": [],
"source": [
"df2.to_csv(\"./data_out2.csv\")"
"df2.to_csv(\"/content/drive/MyDrive/misinformation-data/data_out2.csv\")"
]
},
{

57
notebooks/multimodal_search.ipynb сгенерированный
Просмотреть файл

@ -9,12 +9,15 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "9eeeb302-296e-48dc-86c7-254aa02f2b3a",
"metadata": {},
"source": [
"This notebooks shows some preliminary work on Image Multimodal Search with lavis library. It is mainly meant to explore its capabilities and to decide on future research directions. We package our code into a `ammico` package that is imported here:"
"This notebooks shows how to carry out an image multimodal search with the [LAVIS](https://github.com/salesforce/LAVIS) library. \n",
"\n",
"The first cell is only run on google colab and installs the [ammico](https://github.com/ssciwr/AMMICO) package.\n",
"\n",
"After that, we can import `ammico` and read in the files given a folder path."
]
},
{
@ -53,14 +56,6 @@
"import ammico.multimodal_search as ms"
]
},
{
"cell_type": "markdown",
"id": "acf08b44-3ea6-44cd-926d-15c0fd9f39e0",
"metadata": {},
"source": [
"Set an image path as input file path."
]
},
{
"cell_type": "code",
"execution_count": null,
@ -70,8 +65,6 @@
},
"outputs": [],
"source": [
"# Here you need to provide the path to your google drive folder\n",
"# or local folder containing the images\n",
"images = mutils.find_files(\n",
" path=\"/content/drive/MyDrive/misinformation-data/\",\n",
" limit=10,\n",
@ -103,7 +96,13 @@
"id": "66d6ede4-00bc-4aeb-9a36-e52d7de33fe5",
"metadata": {},
"source": [
"You can choose one of the following models: blip, blip2, albef, clip_base, clip_vitl14, clip_vitl14_336"
"First you need to select a model. You can choose one of the following models: \n",
"- [blip](https://github.com/salesforce/BLIP)\n",
"- [blip2](https://huggingface.co/docs/transformers/main/model_doc/blip-2) \n",
"- [albef](https://github.com/salesforce/ALBEF) \n",
"- [clip_base](https://github.com/openai/CLIP/blob/main/model-card.md)\n",
"- [clip_vitl14](https://github.com/mlfoundations/open_clip) \n",
"- [clip_vitl14_336](https://github.com/mlfoundations/open_clip)"
]
},
{
@ -123,6 +122,14 @@
"# model_type = \"clip_vitl14_336\""
]
},
{
"cell_type": "markdown",
"id": "357828c9",
"metadata": {},
"source": [
"To process the loaded images using the selected model, use the below code:"
]
},
{
"cell_type": "code",
"execution_count": null,
@ -139,7 +146,11 @@
" image_keys,\n",
" image_names,\n",
" features_image_stacked,\n",
") = ms.MultimodalSearch.parsing_images(mydict, model_type, path_to_saved_tensors=\".\")"
") = ms.MultimodalSearch.parsing_images(\n",
" mydict, \n",
" model_type, \n",
" path_to_saved_tensors=\"/content/drive/MyDrive/misinformation-data/\"\n",
" )"
]
},
{
@ -147,7 +158,9 @@
"id": "9ff8a894-566b-4c4f-acca-21c50b5b1f52",
"metadata": {},
"source": [
"The tensors of all images `features_image_stacked` was saved in `<Number_of_images>_<model_name>_saved_features_image.pt`. If you run it once for current model and current set of images you do not need to repeat it again. Instead you can load this features with the command:"
"The images are then processed and stored in a numerical representation, a tensor. These tensors do not change for the same image and same model - so if you run this analysis once, and save the tensors giving a path with the keyword `path_to_saved_tensors`, a file with filename `.<Number_of_images>_<model_name>_saved_features_image.pt` will be placed there.\n",
"\n",
"This will save you a lot of time if you want to analyse same images with the same model but different questions. To run using the saved tensors, execute the below code giving the path and name of the tensor file."
]
},
{
@ -178,7 +191,7 @@
"id": "309923c1-d6f8-4424-8fca-bde5f3a98b38",
"metadata": {},
"source": [
"Here we already processed our image folder with 18 images with `clip_base` model. So you need just write the name `18_clip_base_saved_features_image.pt` of the saved file that consists of tensors of all images as a 3rd argument to the previous function. "
"Here we already processed our image folder with 5 images and the `clip_base` model. So you need just to write the name `5_clip_base_saved_features_image.pt` of the saved file that consists of tensors of all images as keyword argument for `path_to_load_tensors`. "
]
},
{
@ -186,6 +199,8 @@
"id": "162a52e8-6652-4897-b92e-645cab07aaef",
"metadata": {},
"source": [
"## Formulate your search queries\n",
"\n",
"Next, you need to form search queries. You can search either by image or by text. You can search for a single query, or you can search for several queries at once, the computational time should not be much different. The format of the queries is as follows:"
]
},
@ -286,7 +301,9 @@
"id": "0b750e9f-fe64-4028-9caf-52d7187462f1",
"metadata": {},
"source": [
"For even better results, a slightly different approach has been prepared that can improve search results. It is quite resource-intensive, so it is applied after the main algorithm has found the most relevant images. This approach works only with text queries. Among the parameters you can choose 3 models: `\"blip_base\"`, `\"blip_large\"`, `\"blip2_coco\"`. If you get the Out of Memory error, try reducing the batch_size value (minimum = 1), which is the number of images being processed simultaneously. With the parameter `need_grad_cam = True/False` you can enable the calculation of the heat map of each image to be processed. Thus the `image_text_match_reordering` function calculates new similarity values and new ranks for each image. The resulting values are added to the general dictionary."
"## Improve the search results\n",
"\n",
"For even better results, a slightly different approach has been prepared that can improve search results. It is quite resource-intensive, so it is applied after the main algorithm has found the most relevant images. This approach works only with text queries. Among the parameters you can choose 3 models: `\"blip_base\"`, `\"blip_large\"`, `\"blip2_coco\"`. If you get an `Out of Memory` error, try reducing the batch_size value (minimum = 1), which is the number of images being processed simultaneously. With the parameter `need_grad_cam = True/False` you can enable the calculation of the heat map of each image to be processed. Thus the `image_text_match_reordering` function calculates new similarity values and new ranks for each image. The resulting values are added to the general dictionary."
]
},
{
@ -352,7 +369,7 @@
"tags": []
},
"source": [
"## Save searhing results to csv"
"## Save search results to csv"
]
},
{
@ -417,7 +434,7 @@
},
"outputs": [],
"source": [
"df.to_csv(\"./data_out.csv\")"
"df.to_csv(\"/content/drive/MyDrive/misinformation-data/data_out.csv\")"
]
},
{
@ -445,7 +462,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.0"
"version": "3.9.16"
}
},
"nbformat": 4,

Двоичные данные
notebooks/obj_dect_cvlib/image.jpg

Двоичный файл не отображается.

До

Ширина:  |  Высота:  |  Размер: 121 KiB

Двоичные данные
notebooks/obj_dect_cvlib/image02.jpg

Двоичный файл не отображается.

До

Ширина:  |  Высота:  |  Размер: 792 KiB

Просмотреть файл

@ -1,103 +0,0 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<span style =\" color : green ;font - weight : bold \">ImageAI for Object Detection</span>\n",
"http://imageai.org/#features"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"A simple, high level, easy-to-use open source Computer Vision library for Python.\n",
"\n",
"It was developed with a focus on enabling easy and fast experimentation. Being able to go from an idea to prototype with least amount of delay is key to doing good research.\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<p>cvlib detect_common_objects pretrained on coco dataset.</p>\n",
"Underneath it uses YOLOv3 model trained on COCO dataset capable of detecting 80 common objects in context."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import cv2\n",
"import matplotlib.pyplot as plt\n",
"import cvlib as cv\n",
"from cvlib.object_detection import draw_bbox"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"im = cv2.imread(\"image.jpg\")\n",
"\n",
"bbox, label, conf = cv.detect_common_objects(im)\n",
"\n",
"output_image = draw_bbox(im, bbox, label, conf)\n",
"\n",
"plt.imshow(output_image)\n",
"\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"im = cv2.imread(\"image02.jpg\")\n",
"\n",
"bbox, label, conf = cv.detect_common_objects(im)\n",
"\n",
"output_image = draw_bbox(im, bbox, label, conf)\n",
"\n",
"plt.imshow(output_image)\n",
"\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

Просмотреть файл

@ -1,80 +0,0 @@
person
bicycle
car
motorcycle
airplane
bus
train
truck
boat
traffic light
fire hydrant
stop sign
parking meter
bench
bird
cat
dog
horse
sheep
cow
elephant
bear
zebra
giraffe
backpack
umbrella
handbag
tie
suitcase
frisbee
skis
snowboard
sports ball
kite
baseball bat
baseball glove
skateboard
surfboard
tennis racket
bottle
wine glass
cup
fork
knife
spoon
bowl
banana
apple
sandwich
orange
broccoli
carrot
hot dog
pizza
donut
cake
chair
couch
potted plant
bed
dining table
toilet
tv
laptop
mouse
remote
keyboard
cell phone
microwave
oven
toaster
sink
refrigerator
book
clock
vase
scissors
teddy bear
hair drier
toothbrush

Двоичные данные
notebooks/obj_dect_imageai/image.jpg

Двоичный файл не отображается.

До

Ширина:  |  Высота:  |  Размер: 121 KiB

Двоичные данные
notebooks/obj_dect_imageai/imagenew.jpg

Двоичный файл не отображается.

До

Ширина:  |  Высота:  |  Размер: 126 KiB

Просмотреть файл

@ -1,147 +0,0 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<span style =\" color : green ;font - weight : bold \">ImageAI for Object Detection</span>\n",
"http://imageai.org/#features"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"ImageAI provides API to recognize 1000 different objects in a picture using pre-trained models that were trained on the ImageNet-1000 dataset. The model implementations provided are SqueezeNet, ResNet, InceptionV3 and DenseNet.\n",
"</p>\n",
"ImageAI provides API to detect, locate and identify 80 most common objects in everyday life in a picture using pre-trained models that were trained on the COCO Dataset. The model implementations provided include RetinaNet, YOLOv3 and TinyYOLOv3."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"There are 80 possible objects that you can detect with the\n",
"ObjectDetection class, and they are as seen below.\n",
"\n",
" person, bicycle, car, motorcycle, airplane,\n",
" bus, train, truck, boat, traffic light, fire hydrant, stop_sign,\n",
" parking meter, bench, bird, cat, dog, horse, sheep, cow, elephant, bear, zebra,\n",
" giraffe, backpack, umbrella, handbag, tie, suitcase, frisbee, skis, snowboard,\n",
" sports ball, kite, baseball bat, baseball glove, skateboard, surfboard, tennis racket,\n",
" bottle, wine glass, cup, fork, knife, spoon, bowl, banana, apple, sandwich, orange,\n",
" broccoli, carrot, hot dog, pizza, donot, cake, chair, couch, potted plant, bed,\n",
" dining table, toilet, tv, laptop, mouse, remote, keyboard, cell phone, microwave,\n",
" oven, toaster, sink, refrigerator, book, clock, vase, scissors, teddy bear, hair dryer,\n",
" toothbrush."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<p>requirements</p>\n",
"<p>tensorflow==1.15.0</p>\n",
"<p>numpy==1.19.5</p>\n",
"<p>scipy==1.4.1</p>\n",
"<p>keras==2.1.0</p>\n",
"<p>imageai==2.0.2</p>\n",
"\n",
"<p>Or update to newest version, see https://github.com/OlafenwaMoses/ImageAI</p>"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Download the RetinaNet model file for object detection\n",
"\n",
"https://github.com/OlafenwaMoses/ImageAI/releases/download/1.0/resnet50_coco_best_v2.0.1.h5"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from imageai.Detection import ObjectDetection\n",
"import matplotlib.pyplot as plt\n",
"import skimage.io\n",
"import os"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"execution_path = os.getcwd()\n",
"\n",
"detector = ObjectDetection()\n",
"detector.setModelTypeAsRetinaNet()\n",
"detector.setModelPath(os.path.join(execution_path, \"resnet50_coco_best_v2.0.1.h5\"))\n",
"detector.loadModel()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"detections = detector.detectObjectsFromImage(\n",
" input_image=os.path.join(execution_path, \"image.jpg\"),\n",
" output_image_path=os.path.join(execution_path, \"imagenew.jpg\"),\n",
")\n",
"\n",
"for eachObject in detections:\n",
" print(eachObject[\"name\"], \" : \", eachObject[\"percentage_probability\"])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"image = skimage.io.imread(\"image.jpg\")\n",
"imagenew = skimage.io.imread(\"imagenew.jpg\")\n",
"\n",
"_, axis = plt.subplots(1, 2)\n",
"axis[0].imshow(image, cmap=\"gray\")\n",
"axis[1].imshow(imagenew, cmap=\"gray\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

66
notebooks/objects_expression.ipynb сгенерированный
Просмотреть файл

@ -4,15 +4,18 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"# Objects Expression recognition"
"# Objects recognition"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"This notebooks shows some preliminary work on detecting objects expressions with cvlib. It is mainly meant to explore its capabilities and to decide on future research directions. We package our code into a `ammico` package that is imported here:"
"This notebooks shows how to detect objects quickly using [cvlib](https://github.com/arunponnusamy/cvlib) and the [YOLOv4](https://github.com/AlexeyAB/darknet) model. This library detects faces, people, and several inanimate objects; we currently have restricted the output to person, bicycle, car, motorcycle, airplane, bus, train, truck, boat, traffic light, cell phone.\n",
"\n",
"The first cell is only run on google colab and installs the [ammico](https://github.com/ssciwr/AMMICO) package.\n",
"\n",
"After that, we can import `ammico` and read in the files given a folder path."
]
},
{
@ -83,7 +86,8 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"## Detect objects and directly write to csv"
"## Detect objects and directly write to csv\n",
"You can directly carry out the analysis and export the result into a csv. This may take a while depending on how many images you have loaded."
]
},
{
@ -142,7 +146,7 @@
"metadata": {},
"outputs": [],
"source": [
"df.to_csv(\"./data_out.csv\")"
"df.to_csv(\"/content/drive/MyDrive/misinformation-data/data_out.csv\")"
]
},
{
@ -151,7 +155,8 @@
"source": [
"## Manually inspect what was detected\n",
"\n",
"To check the analysis, you can inspect the analyzed elements here. Loading the results takes a moment, so please be patient. If you are sure of what you are doing."
"To check the analysis, you can inspect the analyzed elements here. Loading the results takes a moment, so please be patient. If you are sure of what you are doing, you can directly export a csv file in the step above.\n",
"Here, we display the object detection results provided by the above library. Click on the tabs to see the results in the right sidebar. You may need to increment the `port` number if you are already running several notebook instances on the same server."
]
},
{
@ -160,51 +165,8 @@
"metadata": {},
"outputs": [],
"source": [
"mdisplay.explore_analysis(mydict, identify=\"objects\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def localize_objects(path):\n",
" \"\"\"Localize objects in the local image.\n",
"\n",
" Args:\n",
" path: The path to the local file.\n",
" \"\"\"\n",
" from google.cloud import vision\n",
"\n",
" client = vision.ImageAnnotatorClient()\n",
"\n",
" with open(path, \"rb\") as image_file:\n",
" content = image_file.read()\n",
" image = vision.Image(content=content)\n",
"\n",
" objects = client.object_localization(image=image).localized_object_annotations\n",
"\n",
" print(\"Number of objects found: {}\".format(len(objects)))\n",
" for object_ in objects:\n",
" print(\"\\n{} (confidence: {})\".format(object_.name, object_.score))\n",
" print(\"Normalized bounding polygon vertices: \")\n",
" for vertex in object_.bounding_poly.normalized_vertices:\n",
" print(\" - ({}, {})\".format(vertex.x, vertex.y))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"\n",
"os.environ[\n",
" \"GOOGLE_APPLICATION_CREDENTIALS\"\n",
"] = \"../../misinformation-notes/seismic-bonfire-329406-412821a70264.json\"\n",
"localize_objects(\"/home/iulusoy/Desktop/102141_2_eng.png\")"
"analysis_explorer = mdisplay.AnalysisExplorer(mydict, identify=\"objects\")\n",
"analysis_explorer.run_server(port=8056)"
]
},
{
@ -231,7 +193,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.5"
"version": "3.9.16"
},
"vscode": {
"interpreter": {

Просмотреть файл

@ -52,6 +52,9 @@ dependencies = [
"google-cloud-vision",
"setuptools",
"opencv-contrib-python",
"dash",
"jupyter_dash",
"dash_renderjson",
]
[project.scripts]