зеркало из
				https://github.com/ssciwr/AMMICO.git
				synced 2025-11-03 23:46:04 +02:00 
			
		
		
		
	
		
			
				
	
	
		
			353 строки
		
	
	
		
			8.3 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
			
		
		
	
	
			353 строки
		
	
	
		
			8.3 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
{
 | 
						|
 "cells": [
 | 
						|
  {
 | 
						|
   "attachments": {},
 | 
						|
   "cell_type": "markdown",
 | 
						|
   "metadata": {},
 | 
						|
   "source": [
 | 
						|
    "# Image summary and visual question answering"
 | 
						|
   ]
 | 
						|
  },
 | 
						|
  {
 | 
						|
   "attachments": {},
 | 
						|
   "cell_type": "markdown",
 | 
						|
   "metadata": {},
 | 
						|
   "source": [
 | 
						|
    "This notebooks shows how to generate image captions and use the visual question answering with [LAVIS](https://github.com/salesforce/LAVIS). \n",
 | 
						|
    "\n",
 | 
						|
    "The first cell is only run on google colab and installs the [ammico](https://github.com/ssciwr/AMMICO) package.\n",
 | 
						|
    "\n",
 | 
						|
    "After that, we can import `ammico` and read in the files given a folder path."
 | 
						|
   ]
 | 
						|
  },
 | 
						|
  {
 | 
						|
   "cell_type": "code",
 | 
						|
   "execution_count": null,
 | 
						|
   "metadata": {},
 | 
						|
   "outputs": [],
 | 
						|
   "source": [
 | 
						|
    "# if running on google colab\n",
 | 
						|
    "# flake8-noqa-cell\n",
 | 
						|
    "import os\n",
 | 
						|
    "\n",
 | 
						|
    "if \"google.colab\" in str(get_ipython()):\n",
 | 
						|
    "    # update python version\n",
 | 
						|
    "    # install setuptools\n",
 | 
						|
    "    # %pip install setuptools==61 -qqq\n",
 | 
						|
    "    # install ammico\n",
 | 
						|
    "    %pip install git+https://github.com/ssciwr/ammico.git -qqq\n",
 | 
						|
    "    # mount google drive for data and API key\n",
 | 
						|
    "    from google.colab import drive\n",
 | 
						|
    "\n",
 | 
						|
    "    drive.mount(\"/content/drive\")"
 | 
						|
   ]
 | 
						|
  },
 | 
						|
  {
 | 
						|
   "cell_type": "code",
 | 
						|
   "execution_count": null,
 | 
						|
   "metadata": {
 | 
						|
    "tags": []
 | 
						|
   },
 | 
						|
   "outputs": [],
 | 
						|
   "source": [
 | 
						|
    "import ammico\n",
 | 
						|
    "from ammico import utils as mutils\n",
 | 
						|
    "from ammico import display as mdisplay\n",
 | 
						|
    "import ammico.summary as sm"
 | 
						|
   ]
 | 
						|
  },
 | 
						|
  {
 | 
						|
   "cell_type": "code",
 | 
						|
   "execution_count": null,
 | 
						|
   "metadata": {
 | 
						|
    "tags": []
 | 
						|
   },
 | 
						|
   "outputs": [],
 | 
						|
   "source": [
 | 
						|
    "# Here you need to provide the path to your google drive folder\n",
 | 
						|
    "# or local folder containing the images\n",
 | 
						|
    "images = mutils.find_files(\n",
 | 
						|
    "    path=\"data/\",\n",
 | 
						|
    "    limit=10,\n",
 | 
						|
    ")"
 | 
						|
   ]
 | 
						|
  },
 | 
						|
  {
 | 
						|
   "cell_type": "code",
 | 
						|
   "execution_count": null,
 | 
						|
   "metadata": {
 | 
						|
    "tags": []
 | 
						|
   },
 | 
						|
   "outputs": [],
 | 
						|
   "source": [
 | 
						|
    "mydict = mutils.initialize_dict(images)"
 | 
						|
   ]
 | 
						|
  },
 | 
						|
  {
 | 
						|
   "attachments": {},
 | 
						|
   "cell_type": "markdown",
 | 
						|
   "metadata": {},
 | 
						|
   "source": [
 | 
						|
    "## Create captions for images and directly write to csv"
 | 
						|
   ]
 | 
						|
  },
 | 
						|
  {
 | 
						|
   "attachments": {},
 | 
						|
   "cell_type": "markdown",
 | 
						|
   "metadata": {},
 | 
						|
   "source": [
 | 
						|
    "Here you can choose between two models: \"base\" or \"large\". This will generate the caption for each image and directly put the results in a dataframe. This dataframe can be exported as a csv file.\n",
 | 
						|
    "\n",
 | 
						|
    "The results are written into the columns `const_image_summary` - this will always be the same result (as always the same seed will be used). The column `3_non-deterministic summary` displays three different answers generated with different seeds, these are most likely different when you run the analysis again."
 | 
						|
   ]
 | 
						|
  },
 | 
						|
  {
 | 
						|
   "cell_type": "code",
 | 
						|
   "execution_count": null,
 | 
						|
   "metadata": {
 | 
						|
    "tags": []
 | 
						|
   },
 | 
						|
   "outputs": [],
 | 
						|
   "source": [
 | 
						|
    "obj = sm.SummaryDetector(mydict)\n",
 | 
						|
    "summary_model, summary_vis_processors = obj.load_model(model_type=\"base\")\n",
 | 
						|
    "# summary_model, summary_vis_processors = mutils.load_model(\"large\")"
 | 
						|
   ]
 | 
						|
  },
 | 
						|
  {
 | 
						|
   "cell_type": "code",
 | 
						|
   "execution_count": null,
 | 
						|
   "metadata": {
 | 
						|
    "tags": []
 | 
						|
   },
 | 
						|
   "outputs": [],
 | 
						|
   "source": [
 | 
						|
    "for key in mydict:\n",
 | 
						|
    "    mydict[key] = sm.SummaryDetector(mydict[key]).analyse_image(\n",
 | 
						|
    "        summary_model=summary_model, summary_vis_processors=summary_vis_processors\n",
 | 
						|
    "    )"
 | 
						|
   ]
 | 
						|
  },
 | 
						|
  {
 | 
						|
   "attachments": {},
 | 
						|
   "cell_type": "markdown",
 | 
						|
   "metadata": {
 | 
						|
    "tags": []
 | 
						|
   },
 | 
						|
   "source": [
 | 
						|
    "Convert the dictionary of dictionarys into a dictionary with lists:"
 | 
						|
   ]
 | 
						|
  },
 | 
						|
  {
 | 
						|
   "cell_type": "code",
 | 
						|
   "execution_count": null,
 | 
						|
   "metadata": {
 | 
						|
    "tags": []
 | 
						|
   },
 | 
						|
   "outputs": [],
 | 
						|
   "source": [
 | 
						|
    "outdict = mutils.append_data_to_dict(mydict)\n",
 | 
						|
    "df = mutils.dump_df(outdict)"
 | 
						|
   ]
 | 
						|
  },
 | 
						|
  {
 | 
						|
   "attachments": {},
 | 
						|
   "cell_type": "markdown",
 | 
						|
   "metadata": {},
 | 
						|
   "source": [
 | 
						|
    "Check the dataframe:"
 | 
						|
   ]
 | 
						|
  },
 | 
						|
  {
 | 
						|
   "cell_type": "code",
 | 
						|
   "execution_count": null,
 | 
						|
   "metadata": {
 | 
						|
    "tags": []
 | 
						|
   },
 | 
						|
   "outputs": [],
 | 
						|
   "source": [
 | 
						|
    "df.head(10)"
 | 
						|
   ]
 | 
						|
  },
 | 
						|
  {
 | 
						|
   "attachments": {},
 | 
						|
   "cell_type": "markdown",
 | 
						|
   "metadata": {},
 | 
						|
   "source": [
 | 
						|
    "Write the csv file:"
 | 
						|
   ]
 | 
						|
  },
 | 
						|
  {
 | 
						|
   "cell_type": "code",
 | 
						|
   "execution_count": null,
 | 
						|
   "metadata": {},
 | 
						|
   "outputs": [],
 | 
						|
   "source": [
 | 
						|
    "df.to_csv(\"data_out.csv\")"
 | 
						|
   ]
 | 
						|
  },
 | 
						|
  {
 | 
						|
   "attachments": {},
 | 
						|
   "cell_type": "markdown",
 | 
						|
   "metadata": {},
 | 
						|
   "source": [
 | 
						|
    "## Manually inspect the summaries\n",
 | 
						|
    "\n",
 | 
						|
    "To check the analysis, you can inspect the analyzed elements here. Loading the results takes a moment, so please be patient. If you are sure of what you are doing.\n",
 | 
						|
    "\n",
 | 
						|
    "`const_image_summary` - the permanent summarys, which does not change from run to run (analyse_image).\n",
 | 
						|
    "\n",
 | 
						|
    "`3_non-deterministic summary` - 3 different summarys examples that change from run to run (analyse_image). "
 | 
						|
   ]
 | 
						|
  },
 | 
						|
  {
 | 
						|
   "cell_type": "code",
 | 
						|
   "execution_count": null,
 | 
						|
   "metadata": {
 | 
						|
    "tags": []
 | 
						|
   },
 | 
						|
   "outputs": [],
 | 
						|
   "source": [
 | 
						|
    "analysis_explorer = mdisplay.AnalysisExplorer(mydict, identify=\"summary\")\n",
 | 
						|
    "analysis_explorer.run_server(port=8055)"
 | 
						|
   ]
 | 
						|
  },
 | 
						|
  {
 | 
						|
   "attachments": {},
 | 
						|
   "cell_type": "markdown",
 | 
						|
   "metadata": {},
 | 
						|
   "source": [
 | 
						|
    "## Generate answers to free-form questions about images written in natural language. "
 | 
						|
   ]
 | 
						|
  },
 | 
						|
  {
 | 
						|
   "attachments": {},
 | 
						|
   "cell_type": "markdown",
 | 
						|
   "metadata": {},
 | 
						|
   "source": [
 | 
						|
    "Set the list of questions as a list of strings:"
 | 
						|
   ]
 | 
						|
  },
 | 
						|
  {
 | 
						|
   "cell_type": "code",
 | 
						|
   "execution_count": null,
 | 
						|
   "metadata": {},
 | 
						|
   "outputs": [],
 | 
						|
   "source": [
 | 
						|
    "list_of_questions = [\n",
 | 
						|
    "    \"How many persons on the picture?\",\n",
 | 
						|
    "    \"Are there any politicians in the picture?\",\n",
 | 
						|
    "    \"Does the picture show something from medicine?\",\n",
 | 
						|
    "]"
 | 
						|
   ]
 | 
						|
  },
 | 
						|
  {
 | 
						|
   "attachments": {},
 | 
						|
   "cell_type": "markdown",
 | 
						|
   "metadata": {},
 | 
						|
   "source": [
 | 
						|
    "Explore the analysis using the interface:"
 | 
						|
   ]
 | 
						|
  },
 | 
						|
  {
 | 
						|
   "cell_type": "code",
 | 
						|
   "execution_count": null,
 | 
						|
   "metadata": {},
 | 
						|
   "outputs": [],
 | 
						|
   "source": [
 | 
						|
    "analysis_explorer = mdisplay.AnalysisExplorer(mydict, identify=\"summary\")\n",
 | 
						|
    "analysis_explorer.run_server(port=8055)"
 | 
						|
   ]
 | 
						|
  },
 | 
						|
  {
 | 
						|
   "attachments": {},
 | 
						|
   "cell_type": "markdown",
 | 
						|
   "metadata": {},
 | 
						|
   "source": [
 | 
						|
    "## Or directly analyze for further processing\n",
 | 
						|
    "Instead of inspecting each of the images, you can also directly carry out the analysis and export the result into a csv. This may take a while depending on how many images you have loaded."
 | 
						|
   ]
 | 
						|
  },
 | 
						|
  {
 | 
						|
   "cell_type": "code",
 | 
						|
   "execution_count": null,
 | 
						|
   "metadata": {},
 | 
						|
   "outputs": [],
 | 
						|
   "source": [
 | 
						|
    "for key in mydict:\n",
 | 
						|
    "    mydict[key] = sm.SummaryDetector(mydict[key]).analyse_questions(list_of_questions)"
 | 
						|
   ]
 | 
						|
  },
 | 
						|
  {
 | 
						|
   "attachments": {},
 | 
						|
   "cell_type": "markdown",
 | 
						|
   "metadata": {},
 | 
						|
   "source": [
 | 
						|
    "## Convert to dataframe and write csv\n",
 | 
						|
    "These steps are required to convert the dictionary of dictionarys into a dictionary with lists, that can be converted into a pandas dataframe and exported to a csv file."
 | 
						|
   ]
 | 
						|
  },
 | 
						|
  {
 | 
						|
   "cell_type": "code",
 | 
						|
   "execution_count": null,
 | 
						|
   "metadata": {},
 | 
						|
   "outputs": [],
 | 
						|
   "source": [
 | 
						|
    "outdict2 = mutils.append_data_to_dict(mydict)\n",
 | 
						|
    "df2 = mutils.dump_df(outdict2)"
 | 
						|
   ]
 | 
						|
  },
 | 
						|
  {
 | 
						|
   "cell_type": "code",
 | 
						|
   "execution_count": null,
 | 
						|
   "metadata": {},
 | 
						|
   "outputs": [],
 | 
						|
   "source": [
 | 
						|
    "df2.head(10)"
 | 
						|
   ]
 | 
						|
  },
 | 
						|
  {
 | 
						|
   "cell_type": "code",
 | 
						|
   "execution_count": null,
 | 
						|
   "metadata": {},
 | 
						|
   "outputs": [],
 | 
						|
   "source": [
 | 
						|
    "df2.to_csv(\"data_out2.csv\")"
 | 
						|
   ]
 | 
						|
  },
 | 
						|
  {
 | 
						|
   "cell_type": "code",
 | 
						|
   "execution_count": null,
 | 
						|
   "metadata": {},
 | 
						|
   "outputs": [],
 | 
						|
   "source": []
 | 
						|
  }
 | 
						|
 ],
 | 
						|
 "metadata": {
 | 
						|
  "kernelspec": {
 | 
						|
   "display_name": "Python 3 (ipykernel)",
 | 
						|
   "language": "python",
 | 
						|
   "name": "python3"
 | 
						|
  },
 | 
						|
  "language_info": {
 | 
						|
   "codemirror_mode": {
 | 
						|
    "name": "ipython",
 | 
						|
    "version": 3
 | 
						|
   },
 | 
						|
   "file_extension": ".py",
 | 
						|
   "mimetype": "text/x-python",
 | 
						|
   "name": "python",
 | 
						|
   "nbconvert_exporter": "python",
 | 
						|
   "pygments_lexer": "ipython3",
 | 
						|
   "version": "3.9.16"
 | 
						|
  },
 | 
						|
  "vscode": {
 | 
						|
   "interpreter": {
 | 
						|
    "hash": "f1142466f556ab37fe2d38e2897a16796906208adb09fea90ba58bdf8a56f0ba"
 | 
						|
   }
 | 
						|
  }
 | 
						|
 },
 | 
						|
 "nbformat": 4,
 | 
						|
 "nbformat_minor": 4
 | 
						|
}
 |