AMMICO/build/doctrees/nbsphinx/notebooks/Example summary.ipynb

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Image summary and visual question answering"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "This notebooks shows some preliminary work on Image Captioning and Visual question answering with lavis. It is mainly meant to explore its capabilities and to decide on future research directions. We package our code into a `ammico` package that is imported here:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2023-05-17T07:57:07.559673Z",
     "iopub.status.busy": "2023-05-17T07:57:07.559387Z",
     "iopub.status.idle": "2023-05-17T07:57:19.981365Z",
     "shell.execute_reply": "2023-05-17T07:57:19.980569Z"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "from ammico import utils as mutils\n",
    "from ammico import display as mdisplay\n",
    "import ammico.summary as sm"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Set an image path as input file path."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2023-05-17T07:57:19.985929Z",
     "iopub.status.busy": "2023-05-17T07:57:19.984966Z",
     "iopub.status.idle": "2023-05-17T07:57:19.991128Z",
     "shell.execute_reply": "2023-05-17T07:57:19.990348Z"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "images = mutils.find_files(\n",
    "    path=\"data/\",\n",
    "    limit=10,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2023-05-17T07:57:19.994576Z",
     "iopub.status.busy": "2023-05-17T07:57:19.994066Z",
     "iopub.status.idle": "2023-05-17T07:57:19.997953Z",
     "shell.execute_reply": "2023-05-17T07:57:19.997125Z"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "mydict = mutils.initialize_dict(images)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Create captions for images and directly write to csv"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Here you can choose between two models: \"base\" or \"large\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2023-05-17T07:57:20.001544Z",
     "iopub.status.busy": "2023-05-17T07:57:20.001252Z",
     "iopub.status.idle": "2023-05-17T07:57:34.285374Z",
     "shell.execute_reply": "2023-05-17T07:57:34.284156Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "ename": "OSError",
     "evalue": "Can't load tokenizer for 'bert-base-uncased'. If you were trying to load it from 'https://huggingface.co/models', make sure you don't have a local directory with the same name. Otherwise, make sure 'bert-base-uncased' is the correct path to a directory containing all relevant files for a BertTokenizer tokenizer.",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mOSError\u001b[0m                                   Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[4], line 2\u001b[0m\n\u001b[1;32m      1\u001b[0m obj \u001b[38;5;241m=\u001b[39m sm\u001b[38;5;241m.\u001b[39mSummaryDetector(mydict)\n\u001b[0;32m----> 2\u001b[0m summary_model, summary_vis_processors \u001b[38;5;241m=\u001b[39m \u001b[43mobj\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mload_model\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mbase\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m      3\u001b[0m \u001b[38;5;66;03m# summary_model, summary_vis_processors = obj.load_model(\"large\")\u001b[39;00m\n",
      "File \u001b[0;32m~/work/AMMICO/AMMICO/ammico/summary.py:35\u001b[0m, in \u001b[0;36mSummaryDetector.load_model\u001b[0;34m(self, model_type)\u001b[0m\n\u001b[1;32m     30\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mload_model\u001b[39m(\u001b[38;5;28mself\u001b[39m, model_type):\n\u001b[1;32m     31\u001b[0m     select_model \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m     32\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbase\u001b[39m\u001b[38;5;124m\"\u001b[39m: SummaryDetector\u001b[38;5;241m.\u001b[39mload_model_base,\n\u001b[1;32m     33\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mlarge\u001b[39m\u001b[38;5;124m\"\u001b[39m: SummaryDetector\u001b[38;5;241m.\u001b[39mload_model_large,\n\u001b[1;32m     34\u001b[0m     }\n\u001b[0;32m---> 35\u001b[0m     summary_model, summary_vis_processors \u001b[38;5;241m=\u001b[39m \u001b[43mselect_model\u001b[49m\u001b[43m[\u001b[49m\u001b[43mmodel_type\u001b[49m\u001b[43m]\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m     36\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m summary_model, summary_vis_processors\n",
      "File \u001b[0;32m~/work/AMMICO/AMMICO/ammico/summary.py:13\u001b[0m, in \u001b[0;36mSummaryDetector.load_model_base\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m     12\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mload_model_base\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[0;32m---> 13\u001b[0m     summary_model, summary_vis_processors, _ \u001b[38;5;241m=\u001b[39m \u001b[43mload_model_and_preprocess\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m     14\u001b[0m \u001b[43m        \u001b[49m\u001b[43mname\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mblip_caption\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m     15\u001b[0m \u001b[43m        \u001b[49m\u001b[43mmodel_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mbase_coco\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m     16\u001b[0m \u001b[43m        \u001b[49m\u001b[43mis_eval\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m     17\u001b[0m \u001b[43m        \u001b[49m\u001b[43mdevice\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msummary_device\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     18\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     19\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m summary_model, summary_vis_processors\n",
      "File \u001b[0;32m/opt/hostedtoolcache/Python/3.9.16/x64/lib/python3.9/site-packages/lavis/models/__init__.py:195\u001b[0m, in \u001b[0;36mload_model_and_preprocess\u001b[0;34m(name, model_type, is_eval, device)\u001b[0m\n\u001b[1;32m    192\u001b[0m model_cls \u001b[38;5;241m=\u001b[39m registry\u001b[38;5;241m.\u001b[39mget_model_class(name)\n\u001b[1;32m    194\u001b[0m \u001b[38;5;66;03m# load model\u001b[39;00m\n\u001b[0;32m--> 195\u001b[0m model \u001b[38;5;241m=\u001b[39m \u001b[43mmodel_cls\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_pretrained\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmodel_type\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    197\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_eval:\n\u001b[1;32m    198\u001b[0m     model\u001b[38;5;241m.\u001b[39meval()\n",
      "File \u001b[0;32m/opt/hostedtoolcache/Python/3.9.16/x64/lib/python3.9/site-packages/lavis/models/base_model.py:70\u001b[0m, in \u001b[0;36mBaseModel.from_pretrained\u001b[0;34m(cls, model_type)\u001b[0m\n\u001b[1;32m     60\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m     61\u001b[0m \u001b[38;5;124;03mBuild a pretrained model from default configuration file, specified by model_type.\u001b[39;00m\n\u001b[1;32m     62\u001b[0m \n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m     67\u001b[0m \u001b[38;5;124;03m    - model (nn.Module): pretrained or finetuned model, depending on the configuration.\u001b[39;00m\n\u001b[1;32m     68\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m     69\u001b[0m model_cfg \u001b[38;5;241m=\u001b[39m OmegaConf\u001b[38;5;241m.\u001b[39mload(\u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39mdefault_config_path(model_type))\u001b[38;5;241m.\u001b[39mmodel\n\u001b[0;32m---> 70\u001b[0m model \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_config\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel_cfg\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     72\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m model\n",
      "File \u001b[0;32m/opt/hostedtoolcache/Python/3.9.16/x64/lib/python3.9/site-packages/lavis/models/blip_models/blip_caption.py:216\u001b[0m, in \u001b[0;36mBlipCaption.from_config\u001b[0;34m(cls, cfg)\u001b[0m\n\u001b[1;32m    213\u001b[0m prompt \u001b[38;5;241m=\u001b[39m cfg\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mprompt\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m)\n\u001b[1;32m    214\u001b[0m max_txt_len \u001b[38;5;241m=\u001b[39m cfg\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmax_txt_len\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;241m40\u001b[39m)\n\u001b[0;32m--> 216\u001b[0m model \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mimage_encoder\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtext_decoder\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mprompt\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mprompt\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmax_txt_len\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmax_txt_len\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    217\u001b[0m model\u001b[38;5;241m.\u001b[39mload_checkpoint_from_config(cfg)\n\u001b[1;32m    219\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m model\n",
      "File \u001b[0;32m/opt/hostedtoolcache/Python/3.9.16/x64/lib/python3.9/site-packages/lavis/models/blip_models/blip_caption.py:43\u001b[0m, in \u001b[0;36mBlipCaption.__init__\u001b[0;34m(self, image_encoder, text_decoder, prompt, max_txt_len)\u001b[0m\n\u001b[1;32m     40\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__init__\u001b[39m(\u001b[38;5;28mself\u001b[39m, image_encoder, text_decoder, prompt\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, max_txt_len\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m40\u001b[39m):\n\u001b[1;32m     41\u001b[0m     \u001b[38;5;28msuper\u001b[39m()\u001b[38;5;241m.\u001b[39m\u001b[38;5;21m__init__\u001b[39m()\n\u001b[0;32m---> 43\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtokenizer \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minit_tokenizer\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     45\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mvisual_encoder \u001b[38;5;241m=\u001b[39m image_encoder\n\u001b[1;32m     46\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtext_decoder \u001b[38;5;241m=\u001b[39m text_decoder\n",
      "File \u001b[0;32m/opt/hostedtoolcache/Python/3.9.16/x64/lib/python3.9/site-packages/lavis/models/blip_models/blip.py:22\u001b[0m, in \u001b[0;36mBlipBase.init_tokenizer\u001b[0;34m(cls)\u001b[0m\n\u001b[1;32m     20\u001b[0m \u001b[38;5;129m@classmethod\u001b[39m\n\u001b[1;32m     21\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21minit_tokenizer\u001b[39m(\u001b[38;5;28mcls\u001b[39m):\n\u001b[0;32m---> 22\u001b[0m     tokenizer \u001b[38;5;241m=\u001b[39m \u001b[43mBertTokenizer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_pretrained\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mbert-base-uncased\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m     23\u001b[0m     tokenizer\u001b[38;5;241m.\u001b[39madd_special_tokens({\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbos_token\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m[DEC]\u001b[39m\u001b[38;5;124m\"\u001b[39m})\n\u001b[1;32m     24\u001b[0m     tokenizer\u001b[38;5;241m.\u001b[39madd_special_tokens({\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124madditional_special_tokens\u001b[39m\u001b[38;5;124m\"\u001b[39m: [\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m[ENC]\u001b[39m\u001b[38;5;124m\"\u001b[39m]})\n",
      "File \u001b[0;32m/opt/hostedtoolcache/Python/3.9.16/x64/lib/python3.9/site-packages/transformers/tokenization_utils_base.py:1788\u001b[0m, in \u001b[0;36mPreTrainedTokenizerBase.from_pretrained\u001b[0;34m(cls, pretrained_model_name_or_path, *init_inputs, **kwargs)\u001b[0m\n\u001b[1;32m   1782\u001b[0m     logger\u001b[38;5;241m.\u001b[39minfo(\n\u001b[1;32m   1783\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCan\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mt load following files from cache: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00munresolved_files\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m and cannot check if these \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   1784\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfiles are necessary for the tokenizer to operate.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   1785\u001b[0m     )\n\u001b[1;32m   1787\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mall\u001b[39m(full_file_name \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01mfor\u001b[39;00m full_file_name \u001b[38;5;129;01min\u001b[39;00m resolved_vocab_files\u001b[38;5;241m.\u001b[39mvalues()):\n\u001b[0;32m-> 1788\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mEnvironmentError\u001b[39;00m(\n\u001b[1;32m   1789\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCan\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mt load tokenizer for \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpretrained_model_name_or_path\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m. If you were trying to load it from \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   1790\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mhttps://huggingface.co/models\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m, make sure you don\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mt have a local directory with the same name. \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   1791\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mOtherwise, make sure \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpretrained_model_name_or_path\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m is the correct path to a directory \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   1792\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcontaining all relevant files for a \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m tokenizer.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   1793\u001b[0m     )\n\u001b[1;32m   1795\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m file_id, file_path \u001b[38;5;129;01min\u001b[39;00m vocab_files\u001b[38;5;241m.\u001b[39mitems():\n\u001b[1;32m   1796\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m file_id \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m resolved_vocab_files:\n",
      "\u001b[0;31mOSError\u001b[0m: Can't load tokenizer for 'bert-base-uncased'. If you were trying to load it from 'https://huggingface.co/models', make sure you don't have a local directory with the same name. Otherwise, make sure 'bert-base-uncased' is the correct path to a directory containing all relevant files for a BertTokenizer tokenizer."
     ]
    }
   ],
   "source": [
    "obj = sm.SummaryDetector(mydict)\n",
    "summary_model, summary_vis_processors = obj.load_model(\"base\")\n",
    "# summary_model, summary_vis_processors = obj.load_model(\"large\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2023-05-17T07:57:34.289426Z",
     "iopub.status.busy": "2023-05-17T07:57:34.288864Z",
     "iopub.status.idle": "2023-05-17T07:57:34.334585Z",
     "shell.execute_reply": "2023-05-17T07:57:34.333821Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'summary_model' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[5], line 3\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m key \u001b[38;5;129;01min\u001b[39;00m mydict:\n\u001b[1;32m      2\u001b[0m     mydict[key] \u001b[38;5;241m=\u001b[39m sm\u001b[38;5;241m.\u001b[39mSummaryDetector(mydict[key])\u001b[38;5;241m.\u001b[39manalyse_image(\n\u001b[0;32m----> 3\u001b[0m         \u001b[43msummary_model\u001b[49m, summary_vis_processors\n\u001b[1;32m      4\u001b[0m     )\n",
      "\u001b[0;31mNameError\u001b[0m: name 'summary_model' is not defined"
     ]
    }
   ],
   "source": [
    "for key in mydict:\n",
    "    mydict[key] = sm.SummaryDetector(mydict[key]).analyse_image(\n",
    "        summary_model, summary_vis_processors\n",
    "    )"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "tags": []
   },
   "source": [
    "Convert the dictionary of dictionaries into a dictionary with lists:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2023-05-17T07:57:34.342485Z",
     "iopub.status.busy": "2023-05-17T07:57:34.337901Z",
     "iopub.status.idle": "2023-05-17T07:57:34.351702Z",
     "shell.execute_reply": "2023-05-17T07:57:34.350828Z"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "outdict = mutils.append_data_to_dict(mydict)\n",
    "df = mutils.dump_df(outdict)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Check the dataframe:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2023-05-17T07:57:34.355521Z",
     "iopub.status.busy": "2023-05-17T07:57:34.355242Z",
     "iopub.status.idle": "2023-05-17T07:57:34.366774Z",
     "shell.execute_reply": "2023-05-17T07:57:34.365973Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>filename</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>data/102730_eng.png</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>data/102141_2_eng.png</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>data/106349S_por.png</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                filename\n",
       "0    data/102730_eng.png\n",
       "1  data/102141_2_eng.png\n",
       "2   data/106349S_por.png"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head(10)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Write the csv file:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2023-05-17T07:57:34.370375Z",
     "iopub.status.busy": "2023-05-17T07:57:34.369877Z",
     "iopub.status.idle": "2023-05-17T07:57:34.375313Z",
     "shell.execute_reply": "2023-05-17T07:57:34.374633Z"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "df.to_csv(\"./data_out.csv\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Manually inspect the summaries\n",
    "\n",
    "To check the analysis, you can inspect the analyzed elements here. Loading the results takes a moment, so please be patient. If you are sure of what you are doing.\n",
    "\n",
    "`const_image_summary` - the permanent summarys, which does not change from run to run (analyse_image).\n",
    "\n",
    "`3_non-deterministic summary` - 3 different summarys examples that change from run to run (analyse_image). "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2023-05-17T07:57:34.378686Z",
     "iopub.status.busy": "2023-05-17T07:57:34.378174Z",
     "iopub.status.idle": "2023-05-17T07:57:34.417753Z",
     "shell.execute_reply": "2023-05-17T07:57:34.416811Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "ename": "AttributeError",
     "evalue": "module 'ammico.display' has no attribute 'explore_analysis'",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[9], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mmdisplay\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexplore_analysis\u001b[49m(mydict, identify\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msummary\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
      "\u001b[0;31mAttributeError\u001b[0m: module 'ammico.display' has no attribute 'explore_analysis'"
     ]
    }
   ],
   "source": [
    "mdisplay.explore_analysis(mydict, identify=\"summary\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Generate answers to free-form questions about images written in natural language. "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Set the list of questions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2023-05-17T07:57:34.421632Z",
     "iopub.status.busy": "2023-05-17T07:57:34.421120Z",
     "iopub.status.idle": "2023-05-17T07:57:34.424792Z",
     "shell.execute_reply": "2023-05-17T07:57:34.424044Z"
    }
   },
   "outputs": [],
   "source": [
    "list_of_questions = [\n",
    "    \"How many persons on the picture?\",\n",
    "    \"Are there any politicians in the picture?\",\n",
    "    \"Does the picture show something from medicine?\",\n",
    "]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2023-05-17T07:57:34.428232Z",
     "iopub.status.busy": "2023-05-17T07:57:34.427624Z",
     "iopub.status.idle": "2023-05-17T07:58:22.391001Z",
     "shell.execute_reply": "2023-05-17T07:58:22.389910Z"
    }
   },
   "outputs": [
    {
     "ename": "OSError",
     "evalue": "Can't load tokenizer for 'bert-base-uncased'. If you were trying to load it from 'https://huggingface.co/models', make sure you don't have a local directory with the same name. Otherwise, make sure 'bert-base-uncased' is the correct path to a directory containing all relevant files for a BertTokenizer tokenizer.",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mOSError\u001b[0m                                   Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[11], line 2\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m key \u001b[38;5;129;01min\u001b[39;00m mydict:\n\u001b[0;32m----> 2\u001b[0m     mydict[key] \u001b[38;5;241m=\u001b[39m \u001b[43msm\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mSummaryDetector\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmydict\u001b[49m\u001b[43m[\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43manalyse_questions\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlist_of_questions\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m~/work/AMMICO/AMMICO/ammico/summary.py:63\u001b[0m, in \u001b[0;36mSummaryDetector.analyse_questions\u001b[0;34m(self, list_of_questions)\u001b[0m\n\u001b[1;32m     58\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21manalyse_questions\u001b[39m(\u001b[38;5;28mself\u001b[39m, list_of_questions):\n\u001b[1;32m     59\u001b[0m     (\n\u001b[1;32m     60\u001b[0m         summary_vqa_model,\n\u001b[1;32m     61\u001b[0m         summary_vqa_vis_processors,\n\u001b[1;32m     62\u001b[0m         summary_vqa_txt_processors,\n\u001b[0;32m---> 63\u001b[0m     ) \u001b[38;5;241m=\u001b[39m \u001b[43mload_model_and_preprocess\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m     64\u001b[0m \u001b[43m        \u001b[49m\u001b[43mname\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mblip_vqa\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m     65\u001b[0m \u001b[43m        \u001b[49m\u001b[43mmodel_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mvqav2\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m     66\u001b[0m \u001b[43m        \u001b[49m\u001b[43mis_eval\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m     67\u001b[0m \u001b[43m        \u001b[49m\u001b[43mdevice\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msummary_device\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     68\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     69\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(list_of_questions) \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[1;32m     70\u001b[0m         path \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msubdict[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfilename\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n",
      "File \u001b[0;32m/opt/hostedtoolcache/Python/3.9.16/x64/lib/python3.9/site-packages/lavis/models/__init__.py:195\u001b[0m, in \u001b[0;36mload_model_and_preprocess\u001b[0;34m(name, model_type, is_eval, device)\u001b[0m\n\u001b[1;32m    192\u001b[0m model_cls \u001b[38;5;241m=\u001b[39m registry\u001b[38;5;241m.\u001b[39mget_model_class(name)\n\u001b[1;32m    194\u001b[0m \u001b[38;5;66;03m# load model\u001b[39;00m\n\u001b[0;32m--> 195\u001b[0m model \u001b[38;5;241m=\u001b[39m \u001b[43mmodel_cls\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_pretrained\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmodel_type\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    197\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_eval:\n\u001b[1;32m    198\u001b[0m     model\u001b[38;5;241m.\u001b[39meval()\n",
      "File \u001b[0;32m/opt/hostedtoolcache/Python/3.9.16/x64/lib/python3.9/site-packages/lavis/models/base_model.py:70\u001b[0m, in \u001b[0;36mBaseModel.from_pretrained\u001b[0;34m(cls, model_type)\u001b[0m\n\u001b[1;32m     60\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m     61\u001b[0m \u001b[38;5;124;03mBuild a pretrained model from default configuration file, specified by model_type.\u001b[39;00m\n\u001b[1;32m     62\u001b[0m \n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m     67\u001b[0m \u001b[38;5;124;03m    - model (nn.Module): pretrained or finetuned model, depending on the configuration.\u001b[39;00m\n\u001b[1;32m     68\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m     69\u001b[0m model_cfg \u001b[38;5;241m=\u001b[39m OmegaConf\u001b[38;5;241m.\u001b[39mload(\u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39mdefault_config_path(model_type))\u001b[38;5;241m.\u001b[39mmodel\n\u001b[0;32m---> 70\u001b[0m model \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_config\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel_cfg\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     72\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m model\n",
      "File \u001b[0;32m/opt/hostedtoolcache/Python/3.9.16/x64/lib/python3.9/site-packages/lavis/models/blip_models/blip_vqa.py:366\u001b[0m, in \u001b[0;36mBlipVQA.from_config\u001b[0;34m(cls, cfg)\u001b[0m\n\u001b[1;32m    362\u001b[0m text_decoder \u001b[38;5;241m=\u001b[39m XBertLMHeadDecoder\u001b[38;5;241m.\u001b[39mfrom_config(cfg)\n\u001b[1;32m    364\u001b[0m max_txt_len \u001b[38;5;241m=\u001b[39m cfg\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmax_txt_len\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;241m35\u001b[39m)\n\u001b[0;32m--> 366\u001b[0m model \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[43m(\u001b[49m\n\u001b[1;32m    367\u001b[0m \u001b[43m    \u001b[49m\u001b[43mimage_encoder\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mimage_encoder\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    368\u001b[0m \u001b[43m    \u001b[49m\u001b[43mtext_encoder\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtext_encoder\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    369\u001b[0m \u001b[43m    \u001b[49m\u001b[43mtext_decoder\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtext_decoder\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    370\u001b[0m \u001b[43m    \u001b[49m\u001b[43mmax_txt_len\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmax_txt_len\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    371\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    373\u001b[0m model\u001b[38;5;241m.\u001b[39mload_checkpoint_from_config(cfg)\n\u001b[1;32m    375\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m model\n",
      "File \u001b[0;32m/opt/hostedtoolcache/Python/3.9.16/x64/lib/python3.9/site-packages/lavis/models/blip_models/blip_vqa.py:45\u001b[0m, in \u001b[0;36mBlipVQA.__init__\u001b[0;34m(self, image_encoder, text_encoder, text_decoder, max_txt_len)\u001b[0m\n\u001b[1;32m     43\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__init__\u001b[39m(\u001b[38;5;28mself\u001b[39m, image_encoder, text_encoder, text_decoder, max_txt_len\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m35\u001b[39m):\n\u001b[1;32m     44\u001b[0m     \u001b[38;5;28msuper\u001b[39m()\u001b[38;5;241m.\u001b[39m\u001b[38;5;21m__init__\u001b[39m()\n\u001b[0;32m---> 45\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtokenizer \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minit_tokenizer\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     47\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mvisual_encoder \u001b[38;5;241m=\u001b[39m image_encoder\n\u001b[1;32m     49\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtext_encoder \u001b[38;5;241m=\u001b[39m text_encoder\n",
      "File \u001b[0;32m/opt/hostedtoolcache/Python/3.9.16/x64/lib/python3.9/site-packages/lavis/models/blip_models/blip.py:22\u001b[0m, in \u001b[0;36mBlipBase.init_tokenizer\u001b[0;34m(cls)\u001b[0m\n\u001b[1;32m     20\u001b[0m \u001b[38;5;129m@classmethod\u001b[39m\n\u001b[1;32m     21\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21minit_tokenizer\u001b[39m(\u001b[38;5;28mcls\u001b[39m):\n\u001b[0;32m---> 22\u001b[0m     tokenizer \u001b[38;5;241m=\u001b[39m \u001b[43mBertTokenizer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_pretrained\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mbert-base-uncased\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m     23\u001b[0m     tokenizer\u001b[38;5;241m.\u001b[39madd_special_tokens({\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbos_token\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m[DEC]\u001b[39m\u001b[38;5;124m\"\u001b[39m})\n\u001b[1;32m     24\u001b[0m     tokenizer\u001b[38;5;241m.\u001b[39madd_special_tokens({\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124madditional_special_tokens\u001b[39m\u001b[38;5;124m\"\u001b[39m: [\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m[ENC]\u001b[39m\u001b[38;5;124m\"\u001b[39m]})\n",
      "File \u001b[0;32m/opt/hostedtoolcache/Python/3.9.16/x64/lib/python3.9/site-packages/transformers/tokenization_utils_base.py:1788\u001b[0m, in \u001b[0;36mPreTrainedTokenizerBase.from_pretrained\u001b[0;34m(cls, pretrained_model_name_or_path, *init_inputs, **kwargs)\u001b[0m\n\u001b[1;32m   1782\u001b[0m     logger\u001b[38;5;241m.\u001b[39minfo(\n\u001b[1;32m   1783\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCan\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mt load following files from cache: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00munresolved_files\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m and cannot check if these \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   1784\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfiles are necessary for the tokenizer to operate.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   1785\u001b[0m     )\n\u001b[1;32m   1787\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mall\u001b[39m(full_file_name \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01mfor\u001b[39;00m full_file_name \u001b[38;5;129;01min\u001b[39;00m resolved_vocab_files\u001b[38;5;241m.\u001b[39mvalues()):\n\u001b[0;32m-> 1788\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mEnvironmentError\u001b[39;00m(\n\u001b[1;32m   1789\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCan\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mt load tokenizer for \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpretrained_model_name_or_path\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m. If you were trying to load it from \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   1790\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mhttps://huggingface.co/models\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m, make sure you don\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mt have a local directory with the same name. \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   1791\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mOtherwise, make sure \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpretrained_model_name_or_path\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m is the correct path to a directory \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   1792\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcontaining all relevant files for a \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m tokenizer.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   1793\u001b[0m     )\n\u001b[1;32m   1795\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m file_id, file_path \u001b[38;5;129;01min\u001b[39;00m vocab_files\u001b[38;5;241m.\u001b[39mitems():\n\u001b[1;32m   1796\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m file_id \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m resolved_vocab_files:\n",
      "\u001b[0;31mOSError\u001b[0m: Can't load tokenizer for 'bert-base-uncased'. If you were trying to load it from 'https://huggingface.co/models', make sure you don't have a local directory with the same name. Otherwise, make sure 'bert-base-uncased' is the correct path to a directory containing all relevant files for a BertTokenizer tokenizer."
     ]
    }
   ],
   "source": [
    "for key in mydict:\n",
    "    mydict[key] = sm.SummaryDetector(mydict[key]).analyse_questions(list_of_questions)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2023-05-17T07:58:22.397270Z",
     "iopub.status.busy": "2023-05-17T07:58:22.396986Z",
     "iopub.status.idle": "2023-05-17T07:58:22.440375Z",
     "shell.execute_reply": "2023-05-17T07:58:22.439514Z"
    }
   },
   "outputs": [
    {
     "ename": "AttributeError",
     "evalue": "module 'ammico.display' has no attribute 'explore_analysis'",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[12], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mmdisplay\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexplore_analysis\u001b[49m(mydict, identify\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msummary\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
      "\u001b[0;31mAttributeError\u001b[0m: module 'ammico.display' has no attribute 'explore_analysis'"
     ]
    }
   ],
   "source": [
    "mdisplay.explore_analysis(mydict, identify=\"summary\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Convert the dictionary of dictionarys into a dictionary with lists:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2023-05-17T07:58:22.444018Z",
     "iopub.status.busy": "2023-05-17T07:58:22.443467Z",
     "iopub.status.idle": "2023-05-17T07:58:22.447814Z",
     "shell.execute_reply": "2023-05-17T07:58:22.447057Z"
    }
   },
   "outputs": [],
   "source": [
    "outdict2 = mutils.append_data_to_dict(mydict)\n",
    "df2 = mutils.dump_df(outdict2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2023-05-17T07:58:22.451680Z",
     "iopub.status.busy": "2023-05-17T07:58:22.451163Z",
     "iopub.status.idle": "2023-05-17T07:58:22.458395Z",
     "shell.execute_reply": "2023-05-17T07:58:22.457632Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>filename</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>data/102730_eng.png</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>data/102141_2_eng.png</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>data/106349S_por.png</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                filename\n",
       "0    data/102730_eng.png\n",
       "1  data/102141_2_eng.png\n",
       "2   data/106349S_por.png"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df2.head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2023-05-17T07:58:22.462659Z",
     "iopub.status.busy": "2023-05-17T07:58:22.462233Z",
     "iopub.status.idle": "2023-05-17T07:58:22.466883Z",
     "shell.execute_reply": "2023-05-17T07:58:22.466222Z"
    }
   },
   "outputs": [],
   "source": [
    "df2.to_csv(\"./data_out2.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.16"
  },
  "vscode": {
   "interpreter": {
    "hash": "f1142466f556ab37fe2d38e2897a16796906208adb09fea90ba58bdf8a56f0ba"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}