{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Image summary and visual question answering" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "This notebooks shows some preliminary work on Image Captioning and Visual question answering with lavis. It is mainly meant to explore its capabilities and to decide on future research directions. We package our code into a `misinformation` package that is imported here:" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "execution": { "iopub.execute_input": "2023-03-27T13:40:04.169929Z", "iopub.status.busy": "2023-03-27T13:40:04.169438Z", "iopub.status.idle": "2023-03-27T13:40:52.411091Z", "shell.execute_reply": "2023-03-27T13:40:52.406428Z" } }, "outputs": [], "source": [ "import misinformation\n", "from misinformation import utils as mutils\n", "from misinformation import display as mdisplay\n", "import misinformation.summary as sm" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Set an image path as input file path." ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "execution": { "iopub.execute_input": "2023-03-27T13:40:52.462300Z", "iopub.status.busy": "2023-03-27T13:40:52.460078Z", "iopub.status.idle": "2023-03-27T13:40:52.467981Z", "shell.execute_reply": "2023-03-27T13:40:52.467342Z" } }, "outputs": [], "source": [ "images = mutils.find_files(\n", " path=\"data/\",\n", " limit=10,\n", ")" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "execution": { "iopub.execute_input": "2023-03-27T13:40:52.471724Z", "iopub.status.busy": "2023-03-27T13:40:52.471256Z", "iopub.status.idle": "2023-03-27T13:40:52.475006Z", "shell.execute_reply": "2023-03-27T13:40:52.474033Z" } }, "outputs": [], "source": [ "mydict = mutils.initialize_dict(images)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Create captions for images and directly write to csv" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Here you can choose between two models: \"base\" or \"large\"" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "execution": { "iopub.execute_input": "2023-03-27T13:40:52.479504Z", "iopub.status.busy": "2023-03-27T13:40:52.479062Z", "iopub.status.idle": "2023-03-27T13:40:53.542792Z", "shell.execute_reply": "2023-03-27T13:40:53.542001Z" } }, "outputs": [ { "ename": "AttributeError", "evalue": "module 'misinformation.utils' has no attribute 'load_model'", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[4], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m summary_model, summary_vis_processors \u001b[38;5;241m=\u001b[39m \u001b[43mmutils\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mload_model\u001b[49m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbase\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 2\u001b[0m \u001b[38;5;66;03m# summary_model, summary_vis_processors = mutils.load_model(\"large\")\u001b[39;00m\n", "\u001b[0;31mAttributeError\u001b[0m: module 'misinformation.utils' has no attribute 'load_model'" ] } ], "source": [ "summary_model, summary_vis_processors = mutils.load_model(\"base\")\n", "# summary_model, summary_vis_processors = mutils.load_model(\"large\")" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "execution": { "iopub.execute_input": "2023-03-27T13:40:53.565834Z", "iopub.status.busy": "2023-03-27T13:40:53.565166Z", "iopub.status.idle": "2023-03-27T13:40:53.586111Z", "shell.execute_reply": "2023-03-27T13:40:53.585037Z" } }, "outputs": [ { "ename": "NameError", "evalue": "name 'summary_model' is not defined", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[5], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m key \u001b[38;5;129;01min\u001b[39;00m mydict:\n\u001b[1;32m 2\u001b[0m mydict[key] \u001b[38;5;241m=\u001b[39m sm\u001b[38;5;241m.\u001b[39mSummaryDetector(mydict[key])\u001b[38;5;241m.\u001b[39manalyse_image(\n\u001b[0;32m----> 3\u001b[0m \u001b[43msummary_model\u001b[49m, summary_vis_processors\n\u001b[1;32m 4\u001b[0m )\n", "\u001b[0;31mNameError\u001b[0m: name 'summary_model' is not defined" ] } ], "source": [ "for key in mydict:\n", " mydict[key] = sm.SummaryDetector(mydict[key]).analyse_image(\n", " summary_model, summary_vis_processors\n", " )" ] }, { "cell_type": "markdown", "metadata": { "tags": [] }, "source": [ "Convert the dictionary of dictionarys into a dictionary with lists:" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "execution": { "iopub.execute_input": "2023-03-27T13:40:53.591283Z", "iopub.status.busy": "2023-03-27T13:40:53.590649Z", "iopub.status.idle": "2023-03-27T13:40:53.615153Z", "shell.execute_reply": "2023-03-27T13:40:53.614458Z" }, "tags": [] }, "outputs": [], "source": [ "outdict = mutils.append_data_to_dict(mydict)\n", "df = mutils.dump_df(outdict)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Check the dataframe:" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "execution": { "iopub.execute_input": "2023-03-27T13:40:53.619703Z", "iopub.status.busy": "2023-03-27T13:40:53.619195Z", "iopub.status.idle": "2023-03-27T13:40:53.648021Z", "shell.execute_reply": "2023-03-27T13:40:53.647135Z" } }, "outputs": [ { "data": { "text/html": [ "
| \n", " | filename | \n", "
|---|---|
| 0 | \n", "data/102730_eng.png | \n", "
| 1 | \n", "data/102141_2_eng.png | \n", "
| 2 | \n", "data/106349S_por.png | \n", "