зеркало из
				https://github.com/ssciwr/AMMICO.git
				synced 2025-11-04 07:56:04 +02:00 
			
		
		
		
	
		
			
				
	
	
		
			2979 строки
		
	
	
		
			153 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
			
		
		
	
	
			2979 строки
		
	
	
		
			153 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
{
 | 
						|
 "cells": [
 | 
						|
  {
 | 
						|
   "attachments": {},
 | 
						|
   "cell_type": "markdown",
 | 
						|
   "id": "dcaa3da1",
 | 
						|
   "metadata": {},
 | 
						|
   "source": [
 | 
						|
    "# Notebook for text extraction on image\n",
 | 
						|
    "\n",
 | 
						|
    "The text extraction and analysis is carried out using a variety of tools:  \n",
 | 
						|
    "\n",
 | 
						|
    "1. Text extraction from the image using [google-cloud-vision](https://cloud.google.com/vision)  \n",
 | 
						|
    "1. Language detection of the extracted text using [Googletrans](https://py-googletrans.readthedocs.io/en/latest/)  \n",
 | 
						|
    "1. Translation into English or other languages using [Googletrans](https://py-googletrans.readthedocs.io/en/latest/) \n",
 | 
						|
    "1. Cleaning of the text using [spacy](https://spacy.io/) \n",
 | 
						|
    "1. Spell-check using [TextBlob](https://textblob.readthedocs.io/en/dev/index.html) \n",
 | 
						|
    "1. Subjectivity analysis using [TextBlob](https://textblob.readthedocs.io/en/dev/index.html) \n",
 | 
						|
    "1. Text summarization using [transformers](https://huggingface.co/docs/transformers/index) pipelines\n",
 | 
						|
    "1. Sentiment analysis using [transformers](https://huggingface.co/docs/transformers/index) pipelines \n",
 | 
						|
    "1. Named entity recognition using [transformers](https://huggingface.co/docs/transformers/index) pipelines \n",
 | 
						|
    "1. Topic analysis using [BERTopic](https://github.com/MaartenGr/BERTopic) \n",
 | 
						|
    "\n",
 | 
						|
    "The first cell is only run on google colab and installs the [ammico](https://github.com/ssciwr/AMMICO) package.\n",
 | 
						|
    "\n",
 | 
						|
    "After that, we can import `ammico` and read in the files given a folder path."
 | 
						|
   ]
 | 
						|
  },
 | 
						|
  {
 | 
						|
   "cell_type": "code",
 | 
						|
   "execution_count": 1,
 | 
						|
   "id": "f43f327c",
 | 
						|
   "metadata": {
 | 
						|
    "execution": {
 | 
						|
     "iopub.execute_input": "2023-06-23T12:23:00.786489Z",
 | 
						|
     "iopub.status.busy": "2023-06-23T12:23:00.785951Z",
 | 
						|
     "iopub.status.idle": "2023-06-23T12:23:00.794750Z",
 | 
						|
     "shell.execute_reply": "2023-06-23T12:23:00.794192Z"
 | 
						|
    }
 | 
						|
   },
 | 
						|
   "outputs": [],
 | 
						|
   "source": [
 | 
						|
    "# if running on google colab\n",
 | 
						|
    "# flake8-noqa-cell\n",
 | 
						|
    "import os\n",
 | 
						|
    "\n",
 | 
						|
    "if \"google.colab\" in str(get_ipython()):\n",
 | 
						|
    "    # update python version\n",
 | 
						|
    "    # install setuptools\n",
 | 
						|
    "    # %pip install setuptools==61 -qqq\n",
 | 
						|
    "    # install ammico\n",
 | 
						|
    "    %pip install git+https://github.com/ssciwr/ammico.git -qqq\n",
 | 
						|
    "    # mount google drive for data and API key\n",
 | 
						|
    "    from google.colab import drive\n",
 | 
						|
    "\n",
 | 
						|
    "    drive.mount(\"/content/drive\")"
 | 
						|
   ]
 | 
						|
  },
 | 
						|
  {
 | 
						|
   "cell_type": "code",
 | 
						|
   "execution_count": 2,
 | 
						|
   "id": "cf362e60",
 | 
						|
   "metadata": {
 | 
						|
    "execution": {
 | 
						|
     "iopub.execute_input": "2023-06-23T12:23:00.797570Z",
 | 
						|
     "iopub.status.busy": "2023-06-23T12:23:00.797005Z",
 | 
						|
     "iopub.status.idle": "2023-06-23T12:23:14.434068Z",
 | 
						|
     "shell.execute_reply": "2023-06-23T12:23:14.433419Z"
 | 
						|
    }
 | 
						|
   },
 | 
						|
   "outputs": [],
 | 
						|
   "source": [
 | 
						|
    "import os\n",
 | 
						|
    "import ammico\n",
 | 
						|
    "from ammico import utils as mutils\n",
 | 
						|
    "from ammico import display as mdisplay"
 | 
						|
   ]
 | 
						|
  },
 | 
						|
  {
 | 
						|
   "attachments": {},
 | 
						|
   "cell_type": "markdown",
 | 
						|
   "id": "fddba721",
 | 
						|
   "metadata": {},
 | 
						|
   "source": [
 | 
						|
    "We select a subset of image files to try the text extraction on, see the `limit` keyword. The `find_files` function finds image files within a given directory: "
 | 
						|
   ]
 | 
						|
  },
 | 
						|
  {
 | 
						|
   "cell_type": "code",
 | 
						|
   "execution_count": 3,
 | 
						|
   "id": "27675810",
 | 
						|
   "metadata": {
 | 
						|
    "execution": {
 | 
						|
     "iopub.execute_input": "2023-06-23T12:23:14.437891Z",
 | 
						|
     "iopub.status.busy": "2023-06-23T12:23:14.437175Z",
 | 
						|
     "iopub.status.idle": "2023-06-23T12:23:14.441193Z",
 | 
						|
     "shell.execute_reply": "2023-06-23T12:23:14.440570Z"
 | 
						|
    }
 | 
						|
   },
 | 
						|
   "outputs": [],
 | 
						|
   "source": [
 | 
						|
    "# Here you need to provide the path to your google drive folder\n",
 | 
						|
    "# or local folder containing the images\n",
 | 
						|
    "images = mutils.find_files(\n",
 | 
						|
    "    path=\"data/\",\n",
 | 
						|
    "    limit=10,\n",
 | 
						|
    ")"
 | 
						|
   ]
 | 
						|
  },
 | 
						|
  {
 | 
						|
   "attachments": {},
 | 
						|
   "cell_type": "markdown",
 | 
						|
   "id": "3a7dfe11",
 | 
						|
   "metadata": {},
 | 
						|
   "source": [
 | 
						|
    "We need to initialize the main dictionary that contains all information for the images and is updated through each subsequent analysis:"
 | 
						|
   ]
 | 
						|
  },
 | 
						|
  {
 | 
						|
   "cell_type": "code",
 | 
						|
   "execution_count": 4,
 | 
						|
   "id": "8b32409f",
 | 
						|
   "metadata": {
 | 
						|
    "execution": {
 | 
						|
     "iopub.execute_input": "2023-06-23T12:23:14.443971Z",
 | 
						|
     "iopub.status.busy": "2023-06-23T12:23:14.443642Z",
 | 
						|
     "iopub.status.idle": "2023-06-23T12:23:14.446733Z",
 | 
						|
     "shell.execute_reply": "2023-06-23T12:23:14.446119Z"
 | 
						|
    }
 | 
						|
   },
 | 
						|
   "outputs": [],
 | 
						|
   "source": [
 | 
						|
    "mydict = mutils.initialize_dict(images)"
 | 
						|
   ]
 | 
						|
  },
 | 
						|
  {
 | 
						|
   "cell_type": "markdown",
 | 
						|
   "id": "7b8b929f",
 | 
						|
   "metadata": {},
 | 
						|
   "source": [
 | 
						|
    "## Google cloud vision API\n",
 | 
						|
    "\n",
 | 
						|
    "For this you need an API key and have the app activated in your google console. The first 1000 images per month are free (July 2022)."
 | 
						|
   ]
 | 
						|
  },
 | 
						|
  {
 | 
						|
   "attachments": {},
 | 
						|
   "cell_type": "markdown",
 | 
						|
   "id": "cbf74c0b-52fe-4fb8-b617-f18611e8f986",
 | 
						|
   "metadata": {},
 | 
						|
   "source": [
 | 
						|
    "```\n",
 | 
						|
    "os.environ[\n",
 | 
						|
    "    \"GOOGLE_APPLICATION_CREDENTIALS\"\n",
 | 
						|
    "] = \"your-credentials.json\"\n",
 | 
						|
    "```"
 | 
						|
   ]
 | 
						|
  },
 | 
						|
  {
 | 
						|
   "attachments": {},
 | 
						|
   "cell_type": "markdown",
 | 
						|
   "id": "0891b795-c7fe-454c-a45d-45fadf788142",
 | 
						|
   "metadata": {},
 | 
						|
   "source": [
 | 
						|
    "## Inspect the elements per image\n",
 | 
						|
    "To check the analysis, you can inspect the analyzed elements here. Loading the results takes a moment, so please be patient. If you are sure of what you are doing, you can skip this and directly export a csv file in the step below.\n",
 | 
						|
    "Here, we display the text extraction and translation results provided by the above libraries. Click on the tabs to see the results in the right sidebar. You may need to increment the `port` number if you are already running several notebook instances on the same server."
 | 
						|
   ]
 | 
						|
  },
 | 
						|
  {
 | 
						|
   "cell_type": "code",
 | 
						|
   "execution_count": 5,
 | 
						|
   "id": "7c6ecc88",
 | 
						|
   "metadata": {
 | 
						|
    "execution": {
 | 
						|
     "iopub.execute_input": "2023-06-23T12:23:14.449813Z",
 | 
						|
     "iopub.status.busy": "2023-06-23T12:23:14.449608Z",
 | 
						|
     "iopub.status.idle": "2023-06-23T12:23:15.481829Z",
 | 
						|
     "shell.execute_reply": "2023-06-23T12:23:15.481213Z"
 | 
						|
    }
 | 
						|
   },
 | 
						|
   "outputs": [
 | 
						|
    {
 | 
						|
     "ename": "TypeError",
 | 
						|
     "evalue": "__init__() got an unexpected keyword argument 'identify'",
 | 
						|
     "output_type": "error",
 | 
						|
     "traceback": [
 | 
						|
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
 | 
						|
      "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
 | 
						|
      "Cell \u001b[0;32mIn[5], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m analysis_explorer \u001b[38;5;241m=\u001b[39m \u001b[43mmdisplay\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mAnalysisExplorer\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmydict\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43midentify\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtext-on-image\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m      2\u001b[0m analysis_explorer\u001b[38;5;241m.\u001b[39mrun_server(port\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m8054\u001b[39m)\n",
 | 
						|
      "\u001b[0;31mTypeError\u001b[0m: __init__() got an unexpected keyword argument 'identify'"
 | 
						|
     ]
 | 
						|
    }
 | 
						|
   ],
 | 
						|
   "source": [
 | 
						|
    "analysis_explorer = mdisplay.AnalysisExplorer(mydict, identify=\"text-on-image\")\n",
 | 
						|
    "analysis_explorer.run_server(port=8054)"
 | 
						|
   ]
 | 
						|
  },
 | 
						|
  {
 | 
						|
   "attachments": {},
 | 
						|
   "cell_type": "markdown",
 | 
						|
   "id": "9c3e72b5-0e57-4019-b45e-3e36a74e7f52",
 | 
						|
   "metadata": {},
 | 
						|
   "source": [
 | 
						|
    "## Or directly analyze for further processing\n",
 | 
						|
    "Instead of inspecting each of the images, you can also directly carry out the analysis and export the result into a csv. This may take a while depending on how many images you have loaded. Set the keyword `analyse_text` to `True` if you want the text to be analyzed (spell check, subjectivity, text summary, sentiment, NER)."
 | 
						|
   ]
 | 
						|
  },
 | 
						|
  {
 | 
						|
   "cell_type": "code",
 | 
						|
   "execution_count": 6,
 | 
						|
   "id": "365c78b1-7ff4-4213-86fa-6a0a2d05198f",
 | 
						|
   "metadata": {
 | 
						|
    "execution": {
 | 
						|
     "iopub.execute_input": "2023-06-23T12:23:15.485576Z",
 | 
						|
     "iopub.status.busy": "2023-06-23T12:23:15.485122Z",
 | 
						|
     "iopub.status.idle": "2023-06-23T12:24:30.756386Z",
 | 
						|
     "shell.execute_reply": "2023-06-23T12:24:30.748510Z"
 | 
						|
    }
 | 
						|
   },
 | 
						|
   "outputs": [
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading (…)/a4f8f3e/config.json: 0.00B [00:00, ?B/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading (…)/a4f8f3e/config.json: 1.80kB [00:00, 6.30MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\n"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:   0%|          | 0.00/1.22G [00:00<?, ?B/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:   1%|          | 10.5M/1.22G [00:00<00:13, 86.7MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:   2%|▏         | 21.0M/1.22G [00:00<00:12, 94.2MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:   3%|▎         | 31.5M/1.22G [00:00<00:12, 97.8MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:   4%|▍         | 52.4M/1.22G [00:00<00:11, 101MB/s] "
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:   5%|▌         | 62.9M/1.22G [00:00<00:11, 102MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:   6%|▌         | 73.4M/1.22G [00:00<00:11, 102MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:   7%|▋         | 83.9M/1.22G [00:00<00:11, 102MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:   8%|▊         | 94.4M/1.22G [00:00<00:11, 102MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:   9%|▊         | 105M/1.22G [00:01<00:11, 100MB/s] "
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:   9%|▉         | 115M/1.22G [00:01<00:11, 99.8MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  10%|█         | 126M/1.22G [00:01<00:10, 101MB/s] "
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  11%|█         | 136M/1.22G [00:01<00:10, 100MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  12%|█▏        | 147M/1.22G [00:01<00:10, 101MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  13%|█▎        | 157M/1.22G [00:01<00:10, 102MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  14%|█▎        | 168M/1.22G [00:01<00:10, 102MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  15%|█▍        | 178M/1.22G [00:01<00:10, 99.3MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  15%|█▌        | 189M/1.22G [00:01<00:10, 100MB/s] "
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  16%|█▋        | 199M/1.22G [00:01<00:10, 101MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  17%|█▋        | 210M/1.22G [00:02<00:10, 100MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  18%|█▊        | 220M/1.22G [00:02<00:09, 101MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  19%|█▉        | 231M/1.22G [00:02<00:09, 100MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  20%|█▉        | 241M/1.22G [00:02<00:09, 101MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  21%|██        | 252M/1.22G [00:02<00:09, 101MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  21%|██▏       | 262M/1.22G [00:02<00:09, 101MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  23%|██▎       | 283M/1.22G [00:02<00:09, 103MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  24%|██▍       | 294M/1.22G [00:02<00:09, 103MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  25%|██▍       | 304M/1.22G [00:03<00:09, 102MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  26%|██▌       | 315M/1.22G [00:03<00:09, 101MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  27%|██▋       | 325M/1.22G [00:03<00:08, 101MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  27%|██▋       | 336M/1.22G [00:03<00:08, 102MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  28%|██▊       | 346M/1.22G [00:03<00:08, 102MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  29%|██▉       | 357M/1.22G [00:03<00:08, 102MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  30%|███       | 367M/1.22G [00:03<00:08, 102MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  31%|███       | 377M/1.22G [00:03<00:08, 102MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  32%|███▏      | 388M/1.22G [00:03<00:08, 102MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  33%|███▎      | 398M/1.22G [00:03<00:08, 102MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  33%|███▎      | 409M/1.22G [00:04<00:07, 102MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  34%|███▍      | 419M/1.22G [00:04<00:08, 100MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  35%|███▌      | 430M/1.22G [00:04<00:07, 100MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  36%|███▌      | 440M/1.22G [00:04<00:07, 101MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  37%|███▋      | 451M/1.22G [00:04<00:07, 102MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  38%|███▊      | 461M/1.22G [00:04<00:07, 101MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  39%|███▊      | 472M/1.22G [00:04<00:07, 102MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  39%|███▉      | 482M/1.22G [00:04<00:07, 102MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  40%|████      | 493M/1.22G [00:04<00:07, 103MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  41%|████      | 503M/1.22G [00:04<00:07, 102MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  42%|████▏     | 514M/1.22G [00:05<00:06, 102MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  43%|████▎     | 524M/1.22G [00:05<00:06, 101MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  44%|████▍     | 535M/1.22G [00:05<00:06, 101MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  45%|████▍     | 545M/1.22G [00:05<00:06, 101MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  45%|████▌     | 556M/1.22G [00:05<00:06, 102MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  46%|████▋     | 566M/1.22G [00:05<00:06, 102MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  47%|████▋     | 577M/1.22G [00:05<00:06, 102MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  48%|████▊     | 587M/1.22G [00:05<00:06, 102MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  49%|████▉     | 598M/1.22G [00:05<00:06, 102MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  50%|████▉     | 608M/1.22G [00:06<00:06, 100MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  51%|█████     | 619M/1.22G [00:06<00:05, 101MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  51%|█████▏    | 629M/1.22G [00:06<00:05, 101MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  52%|█████▏    | 640M/1.22G [00:06<00:05, 101MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  53%|█████▎    | 650M/1.22G [00:06<00:05, 102MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  54%|█████▍    | 661M/1.22G [00:06<00:05, 101MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  55%|█████▍    | 671M/1.22G [00:06<00:05, 99.6MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  56%|█████▌    | 682M/1.22G [00:06<00:05, 97.5MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  57%|█████▋    | 692M/1.22G [00:06<00:05, 98.5MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  57%|█████▋    | 703M/1.22G [00:06<00:05, 100MB/s] "
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  58%|█████▊    | 713M/1.22G [00:07<00:05, 101MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  59%|█████▉    | 724M/1.22G [00:07<00:04, 101MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  60%|██████    | 734M/1.22G [00:07<00:04, 101MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  61%|██████    | 744M/1.22G [00:07<00:04, 102MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  62%|██████▏   | 755M/1.22G [00:07<00:04, 101MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  63%|██████▎   | 765M/1.22G [00:07<00:05, 84.6MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  64%|██████▍   | 786M/1.22G [00:07<00:04, 104MB/s] "
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  66%|██████▌   | 807M/1.22G [00:08<00:04, 101MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  67%|██████▋   | 818M/1.22G [00:08<00:05, 68.4MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  68%|██████▊   | 828M/1.22G [00:08<00:05, 74.6MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  69%|██████▉   | 849M/1.22G [00:08<00:03, 98.0MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  71%|███████   | 870M/1.22G [00:08<00:03, 98.5MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  73%|███████▎  | 891M/1.22G [00:09<00:04, 82.5MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  75%|███████▍  | 912M/1.22G [00:09<00:03, 101MB/s] "
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  76%|███████▋  | 933M/1.22G [00:09<00:02, 101MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  78%|███████▊  | 954M/1.22G [00:10<00:04, 53.7MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  80%|███████▉  | 975M/1.22G [00:10<00:03, 66.0MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  81%|████████  | 986M/1.22G [00:10<00:03, 68.6MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  82%|████████▏ | 1.01G/1.22G [00:10<00:02, 84.7MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  84%|████████▍ | 1.03G/1.22G [00:10<00:02, 89.6MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  86%|████████▌ | 1.05G/1.22G [00:11<00:01, 93.2MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  88%|████████▊ | 1.07G/1.22G [00:11<00:01, 96.0MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  89%|████████▉ | 1.09G/1.22G [00:11<00:01, 96.4MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  90%|█████████ | 1.10G/1.22G [00:11<00:01, 97.6MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  91%|█████████ | 1.11G/1.22G [00:11<00:01, 98.5MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  92%|█████████▏| 1.12G/1.22G [00:11<00:01, 99.4MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  93%|█████████▎| 1.13G/1.22G [00:11<00:00, 100MB/s] "
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  94%|█████████▎| 1.14G/1.22G [00:11<00:00, 101MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  94%|█████████▍| 1.15G/1.22G [00:12<00:00, 101MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  95%|█████████▌| 1.16G/1.22G [00:12<00:00, 101MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  96%|█████████▌| 1.17G/1.22G [00:12<00:00, 96.8MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  97%|█████████▋| 1.18G/1.22G [00:12<00:00, 98.4MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  98%|█████████▊| 1.20G/1.22G [00:12<00:00, 99.5MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  99%|█████████▊| 1.21G/1.22G [00:12<00:00, 100MB/s] "
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin: 100%|█████████▉| 1.22G/1.22G [00:12<00:00, 101MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin: 100%|██████████| 1.22G/1.22G [00:12<00:00, 95.8MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\n"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading (…)okenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading (…)okenizer_config.json: 100%|██████████| 26.0/26.0 [00:00<00:00, 25.8kB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\n"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading (…)e/a4f8f3e/vocab.json: 0.00B [00:00, ?B/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading (…)e/a4f8f3e/vocab.json: 899kB [00:00, 11.5MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\n"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading (…)e/a4f8f3e/merges.txt: 0.00B [00:00, ?B/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading (…)e/a4f8f3e/merges.txt: 456kB [00:00, 8.34MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\n"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading (…)/af0f99b/config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading (…)/af0f99b/config.json: 100%|██████████| 629/629 [00:00<00:00, 615kB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\n"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:   0%|          | 0.00/268M [00:00<?, ?B/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:   4%|▍         | 10.5M/268M [00:00<00:04, 53.5MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:   8%|▊         | 21.0M/268M [00:00<00:03, 74.0MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  12%|█▏        | 31.5M/268M [00:00<00:02, 81.0MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  16%|█▌        | 41.9M/268M [00:00<00:02, 86.9MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  20%|█▉        | 52.4M/268M [00:00<00:02, 90.9MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  23%|██▎       | 62.9M/268M [00:00<00:02, 94.6MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  27%|██▋       | 73.4M/268M [00:00<00:02, 96.8MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  31%|███▏      | 83.9M/268M [00:00<00:01, 95.4MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  35%|███▌      | 94.4M/268M [00:01<00:03, 56.5MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  43%|████▎     | 115M/268M [00:01<00:02, 71.0MB/s] "
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  47%|████▋     | 126M/268M [00:01<00:01, 76.7MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  51%|█████     | 136M/268M [00:01<00:01, 82.4MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  55%|█████▍    | 147M/268M [00:01<00:01, 87.1MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  59%|█████▊    | 157M/268M [00:01<00:01, 91.2MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  63%|██████▎   | 168M/268M [00:02<00:01, 94.3MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  67%|██████▋   | 178M/268M [00:02<00:01, 68.8MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  70%|███████   | 189M/268M [00:02<00:01, 52.3MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  82%|████████▏ | 220M/268M [00:02<00:00, 86.1MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  90%|█████████ | 241M/268M [00:02<00:00, 91.3MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  94%|█████████▍| 252M/268M [00:03<00:00, 92.6MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  98%|█████████▊| 262M/268M [00:03<00:00, 94.4MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin: 100%|██████████| 268M/268M [00:03<00:00, 82.8MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\n"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading (…)okenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading (…)okenizer_config.json: 100%|██████████| 48.0/48.0 [00:00<00:00, 46.2kB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\n"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading (…)ve/af0f99b/vocab.txt: 0.00B [00:00, ?B/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading (…)ve/af0f99b/vocab.txt: 232kB [00:00, 6.12MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\n"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading (…)/f2482bf/config.json:   0%|          | 0.00/998 [00:00<?, ?B/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading (…)/f2482bf/config.json: 100%|██████████| 998/998 [00:00<00:00, 960kB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\n"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:   0%|          | 0.00/1.33G [00:00<?, ?B/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:   1%|          | 10.5M/1.33G [00:00<00:12, 102MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:   2%|▏         | 21.0M/1.33G [00:00<00:12, 102MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:   2%|▏         | 31.5M/1.33G [00:00<00:12, 102MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:   3%|▎         | 41.9M/1.33G [00:00<00:12, 101MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:   4%|▍         | 52.4M/1.33G [00:00<00:12, 100MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:   5%|▍         | 62.9M/1.33G [00:00<00:12, 101MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:   6%|▌         | 73.4M/1.33G [00:00<00:12, 98.1MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:   6%|▋         | 83.9M/1.33G [00:00<00:12, 97.4MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:   7%|▋         | 94.4M/1.33G [00:00<00:12, 97.7MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:   8%|▊         | 105M/1.33G [00:01<00:12, 99.2MB/s] "
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:   9%|▊         | 115M/1.33G [00:01<00:12, 99.4MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:   9%|▉         | 126M/1.33G [00:01<00:12, 100MB/s] "
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  10%|█         | 136M/1.33G [00:01<00:12, 99.8MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  11%|█         | 147M/1.33G [00:01<00:11, 101MB/s] "
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  12%|█▏        | 157M/1.33G [00:01<00:11, 101MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  13%|█▎        | 168M/1.33G [00:01<00:11, 100MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  13%|█▎        | 178M/1.33G [00:01<00:11, 99.9MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  14%|█▍        | 189M/1.33G [00:01<00:11, 99.9MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  15%|█▍        | 199M/1.33G [00:01<00:11, 101MB/s] "
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  16%|█▌        | 210M/1.33G [00:02<00:11, 102MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  17%|█▋        | 220M/1.33G [00:02<00:11, 101MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  17%|█▋        | 231M/1.33G [00:02<00:10, 101MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  18%|█▊        | 241M/1.33G [00:02<00:11, 95.8MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  19%|█▉        | 252M/1.33G [00:02<00:11, 97.9MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  20%|█▉        | 262M/1.33G [00:03<00:24, 43.3MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  20%|██        | 273M/1.33G [00:03<00:20, 52.3MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  22%|██▏       | 294M/1.33G [00:03<00:13, 77.8MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  24%|██▎       | 315M/1.33G [00:03<00:11, 89.2MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  25%|██▌       | 336M/1.33G [00:03<00:11, 89.9MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  27%|██▋       | 357M/1.33G [00:03<00:10, 93.7MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  28%|██▊       | 377M/1.33G [00:04<00:09, 96.0MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  29%|██▉       | 388M/1.33G [00:04<00:09, 97.1MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  30%|██▉       | 398M/1.33G [00:04<00:09, 97.8MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  31%|███       | 409M/1.33G [00:04<00:09, 98.7MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  31%|███▏      | 419M/1.33G [00:04<00:09, 99.9MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  32%|███▏      | 430M/1.33G [00:04<00:08, 101MB/s] "
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  33%|███▎      | 440M/1.33G [00:04<00:08, 99.5MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  35%|███▍      | 461M/1.33G [00:04<00:08, 102MB/s] "
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  35%|███▌      | 472M/1.33G [00:05<00:08, 103MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  36%|███▌      | 482M/1.33G [00:05<00:08, 102MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  37%|███▋      | 493M/1.33G [00:05<00:08, 103MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  38%|███▊      | 503M/1.33G [00:05<00:08, 103MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  39%|███▊      | 514M/1.33G [00:05<00:07, 103MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  39%|███▉      | 524M/1.33G [00:05<00:07, 103MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  41%|████      | 545M/1.33G [00:05<00:07, 103MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  42%|████▏     | 556M/1.33G [00:05<00:07, 102MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  42%|████▏     | 566M/1.33G [00:05<00:07, 102MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  43%|████▎     | 577M/1.33G [00:06<00:07, 102MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  44%|████▍     | 587M/1.33G [00:06<00:07, 101MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  45%|████▍     | 598M/1.33G [00:06<00:07, 99.4MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  46%|████▌     | 608M/1.33G [00:06<00:07, 99.8MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  46%|████▋     | 619M/1.33G [00:06<00:16, 42.8MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  47%|████▋     | 629M/1.33G [00:07<00:13, 50.5MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  48%|████▊     | 640M/1.33G [00:07<00:14, 47.1MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  50%|████▉     | 661M/1.33G [00:07<00:09, 69.9MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  50%|█████     | 671M/1.33G [00:07<00:08, 76.2MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  51%|█████     | 682M/1.33G [00:07<00:07, 81.7MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  52%|█████▏    | 692M/1.33G [00:07<00:07, 86.7MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  53%|█████▎    | 703M/1.33G [00:07<00:07, 90.1MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  53%|█████▎    | 713M/1.33G [00:07<00:06, 93.6MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  54%|█████▍    | 724M/1.33G [00:08<00:06, 95.8MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  55%|█████▌    | 734M/1.33G [00:08<00:06, 98.1MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  56%|█████▌    | 744M/1.33G [00:08<00:05, 99.1MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  57%|█████▋    | 755M/1.33G [00:08<00:05, 100MB/s] "
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  57%|█████▋    | 765M/1.33G [00:08<00:05, 100MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  58%|█████▊    | 776M/1.33G [00:08<00:05, 97.1MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  59%|█████▉    | 786M/1.33G [00:08<00:05, 99.1MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  60%|█████▉    | 797M/1.33G [00:08<00:05, 100MB/s] "
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  61%|██████    | 807M/1.33G [00:08<00:05, 100MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  61%|██████▏   | 818M/1.33G [00:09<00:05, 100MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  62%|██████▏   | 828M/1.33G [00:09<00:05, 100MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  63%|██████▎   | 839M/1.33G [00:09<00:04, 101MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  64%|██████▎   | 849M/1.33G [00:09<00:04, 102MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  65%|██████▌   | 870M/1.33G [00:09<00:04, 103MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  66%|██████▌   | 881M/1.33G [00:09<00:04, 102MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  67%|██████▋   | 891M/1.33G [00:09<00:04, 102MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  68%|██████▊   | 912M/1.33G [00:09<00:04, 103MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  69%|██████▉   | 923M/1.33G [00:10<00:04, 102MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  70%|██████▉   | 933M/1.33G [00:10<00:03, 102MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  71%|███████   | 944M/1.33G [00:10<00:03, 102MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  72%|███████▏  | 954M/1.33G [00:10<00:03, 101MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  72%|███████▏  | 965M/1.33G [00:10<00:03, 100MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  73%|███████▎  | 975M/1.33G [00:10<00:03, 97.9MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  74%|███████▍  | 986M/1.33G [00:10<00:03, 99.1MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  75%|███████▍  | 996M/1.33G [00:10<00:03, 101MB/s] "
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  75%|███████▌  | 1.01G/1.33G [00:10<00:03, 101MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  76%|███████▌  | 1.02G/1.33G [00:11<00:03, 100MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  77%|███████▋  | 1.03G/1.33G [00:11<00:03, 100MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  78%|███████▊  | 1.04G/1.33G [00:11<00:02, 100MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  79%|███████▊  | 1.05G/1.33G [00:11<00:02, 100MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  79%|███████▉  | 1.06G/1.33G [00:11<00:02, 99.7MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  80%|████████  | 1.07G/1.33G [00:11<00:02, 98.6MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  81%|████████  | 1.08G/1.33G [00:11<00:02, 87.2MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  83%|████████▎ | 1.10G/1.33G [00:11<00:02, 85.2MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  84%|████████▍ | 1.12G/1.33G [00:12<00:01, 110MB/s] "
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  86%|████████▌ | 1.14G/1.33G [00:12<00:01, 107MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  87%|████████▋ | 1.16G/1.33G [00:12<00:02, 62.3MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  89%|████████▉ | 1.18G/1.33G [00:13<00:02, 72.8MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  90%|█████████ | 1.21G/1.33G [00:13<00:02, 57.0MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  92%|█████████▏| 1.23G/1.33G [00:13<00:01, 73.6MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  94%|█████████▎| 1.25G/1.33G [00:13<00:01, 80.9MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  95%|█████████▌| 1.27G/1.33G [00:14<00:00, 86.4MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  97%|█████████▋| 1.29G/1.33G [00:14<00:00, 79.5MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin:  98%|█████████▊| 1.31G/1.33G [00:14<00:00, 72.3MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin: 100%|██████████| 1.33G/1.33G [00:14<00:00, 91.9MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading pytorch_model.bin: 100%|██████████| 1.33G/1.33G [00:14<00:00, 89.7MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\n"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading (…)okenizer_config.json:   0%|          | 0.00/60.0 [00:00<?, ?B/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading (…)okenizer_config.json: 100%|██████████| 60.0/60.0 [00:00<00:00, 58.4kB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\n"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading (…)ve/f2482bf/vocab.txt: 0.00B [00:00, ?B/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "Downloading (…)ve/f2482bf/vocab.txt: 213kB [00:00, 12.7MB/s]"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\n"
 | 
						|
     ]
 | 
						|
    }
 | 
						|
   ],
 | 
						|
   "source": [
 | 
						|
    "for key in mydict:\n",
 | 
						|
    "    mydict[key] = ammico.text.TextDetector(\n",
 | 
						|
    "        mydict[key], analyse_text=True\n",
 | 
						|
    "    ).analyse_image()"
 | 
						|
   ]
 | 
						|
  },
 | 
						|
  {
 | 
						|
   "attachments": {},
 | 
						|
   "cell_type": "markdown",
 | 
						|
   "id": "3c063eda",
 | 
						|
   "metadata": {},
 | 
						|
   "source": [
 | 
						|
    "## Convert to dataframe and write csv\n",
 | 
						|
    "These steps are required to convert the dictionary of dictionarys into a dictionary with lists, that can be converted into a pandas dataframe and exported to a csv file."
 | 
						|
   ]
 | 
						|
  },
 | 
						|
  {
 | 
						|
   "cell_type": "code",
 | 
						|
   "execution_count": 7,
 | 
						|
   "id": "5709c2cd",
 | 
						|
   "metadata": {
 | 
						|
    "execution": {
 | 
						|
     "iopub.execute_input": "2023-06-23T12:24:30.784996Z",
 | 
						|
     "iopub.status.busy": "2023-06-23T12:24:30.784319Z",
 | 
						|
     "iopub.status.idle": "2023-06-23T12:24:30.818628Z",
 | 
						|
     "shell.execute_reply": "2023-06-23T12:24:30.818032Z"
 | 
						|
    }
 | 
						|
   },
 | 
						|
   "outputs": [],
 | 
						|
   "source": [
 | 
						|
    "outdict = mutils.append_data_to_dict(mydict)\n",
 | 
						|
    "df = mutils.dump_df(outdict)"
 | 
						|
   ]
 | 
						|
  },
 | 
						|
  {
 | 
						|
   "attachments": {},
 | 
						|
   "cell_type": "markdown",
 | 
						|
   "id": "ae182eb7",
 | 
						|
   "metadata": {},
 | 
						|
   "source": [
 | 
						|
    "Check the dataframe:"
 | 
						|
   ]
 | 
						|
  },
 | 
						|
  {
 | 
						|
   "cell_type": "code",
 | 
						|
   "execution_count": 8,
 | 
						|
   "id": "c4f05637",
 | 
						|
   "metadata": {
 | 
						|
    "execution": {
 | 
						|
     "iopub.execute_input": "2023-06-23T12:24:30.821898Z",
 | 
						|
     "iopub.status.busy": "2023-06-23T12:24:30.821537Z",
 | 
						|
     "iopub.status.idle": "2023-06-23T12:24:30.888205Z",
 | 
						|
     "shell.execute_reply": "2023-06-23T12:24:30.887540Z"
 | 
						|
    }
 | 
						|
   },
 | 
						|
   "outputs": [
 | 
						|
    {
 | 
						|
     "data": {
 | 
						|
      "text/html": [
 | 
						|
       "<div>\n",
 | 
						|
       "<style scoped>\n",
 | 
						|
       "    .dataframe tbody tr th:only-of-type {\n",
 | 
						|
       "        vertical-align: middle;\n",
 | 
						|
       "    }\n",
 | 
						|
       "\n",
 | 
						|
       "    .dataframe tbody tr th {\n",
 | 
						|
       "        vertical-align: top;\n",
 | 
						|
       "    }\n",
 | 
						|
       "\n",
 | 
						|
       "    .dataframe thead th {\n",
 | 
						|
       "        text-align: right;\n",
 | 
						|
       "    }\n",
 | 
						|
       "</style>\n",
 | 
						|
       "<table border=\"1\" class=\"dataframe\">\n",
 | 
						|
       "  <thead>\n",
 | 
						|
       "    <tr style=\"text-align: right;\">\n",
 | 
						|
       "      <th></th>\n",
 | 
						|
       "      <th>filename</th>\n",
 | 
						|
       "      <th>text</th>\n",
 | 
						|
       "      <th>text_language</th>\n",
 | 
						|
       "      <th>text_english</th>\n",
 | 
						|
       "      <th>text_summary</th>\n",
 | 
						|
       "      <th>sentiment</th>\n",
 | 
						|
       "      <th>sentiment_score</th>\n",
 | 
						|
       "      <th>entity</th>\n",
 | 
						|
       "      <th>entity_type</th>\n",
 | 
						|
       "    </tr>\n",
 | 
						|
       "  </thead>\n",
 | 
						|
       "  <tbody>\n",
 | 
						|
       "    <tr>\n",
 | 
						|
       "      <th>0</th>\n",
 | 
						|
       "      <td>data/106349S_por.png</td>\n",
 | 
						|
       "      <td>NEWS URGENTE SAMSUNG AO VIVO Rio de Janeiro NO...</td>\n",
 | 
						|
       "      <td>pt</td>\n",
 | 
						|
       "      <td>NEWS URGENT SAMSUNG LIVE Rio de Janeiro NEW CO...</td>\n",
 | 
						|
       "      <td>NEW COUNTING METHOD RJ City HALL EXCLUDES 1,1...</td>\n",
 | 
						|
       "      <td>NEGATIVE</td>\n",
 | 
						|
       "      <td>0.99</td>\n",
 | 
						|
       "      <td>[Rio de Janeiro, C, ##IT, P, ##NA, ##LTO]</td>\n",
 | 
						|
       "      <td>[LOC, ORG, LOC, LOC, ORG, LOC]</td>\n",
 | 
						|
       "    </tr>\n",
 | 
						|
       "    <tr>\n",
 | 
						|
       "      <th>1</th>\n",
 | 
						|
       "      <td>data/102141_2_eng.png</td>\n",
 | 
						|
       "      <td>CORONAVIRUS QUARANTINE CORONAVIRUS OUTBREAK BE...</td>\n",
 | 
						|
       "      <td>en</td>\n",
 | 
						|
       "      <td>CORONAVIRUS QUARANTINE CORONAVIRUS OUTBREAK BE...</td>\n",
 | 
						|
       "      <td>Coronavirus QUARANTINE CORONAVIRUS OUTBREAK</td>\n",
 | 
						|
       "      <td>NEGATIVE</td>\n",
 | 
						|
       "      <td>0.98</td>\n",
 | 
						|
       "      <td>[CORONAVIRUS, ##AR, ##TI, ##RONAVIR, ##C, Co]</td>\n",
 | 
						|
       "      <td>[ORG, MISC, MISC, ORG, MISC, MISC]</td>\n",
 | 
						|
       "    </tr>\n",
 | 
						|
       "    <tr>\n",
 | 
						|
       "      <th>2</th>\n",
 | 
						|
       "      <td>data/102730_eng.png</td>\n",
 | 
						|
       "      <td>400 DEATHS GET E-BOOK X AN Corporation ncy Ser...</td>\n",
 | 
						|
       "      <td>en</td>\n",
 | 
						|
       "      <td>400 DEATHS GET E-BOOK X AN Corporation ncy Ser...</td>\n",
 | 
						|
       "      <td>A municipal worker sprays disinfectant on his...</td>\n",
 | 
						|
       "      <td>NEGATIVE</td>\n",
 | 
						|
       "      <td>0.99</td>\n",
 | 
						|
       "      <td>[AN Corporation ncy Services, Ahmedabad, RE, #...</td>\n",
 | 
						|
       "      <td>[ORG, LOC, PER, ORG]</td>\n",
 | 
						|
       "    </tr>\n",
 | 
						|
       "  </tbody>\n",
 | 
						|
       "</table>\n",
 | 
						|
       "</div>"
 | 
						|
      ],
 | 
						|
      "text/plain": [
 | 
						|
       "                filename                                               text  \\\n",
 | 
						|
       "0   data/106349S_por.png  NEWS URGENTE SAMSUNG AO VIVO Rio de Janeiro NO...   \n",
 | 
						|
       "1  data/102141_2_eng.png  CORONAVIRUS QUARANTINE CORONAVIRUS OUTBREAK BE...   \n",
 | 
						|
       "2    data/102730_eng.png  400 DEATHS GET E-BOOK X AN Corporation ncy Ser...   \n",
 | 
						|
       "\n",
 | 
						|
       "  text_language                                       text_english  \\\n",
 | 
						|
       "0            pt  NEWS URGENT SAMSUNG LIVE Rio de Janeiro NEW CO...   \n",
 | 
						|
       "1            en  CORONAVIRUS QUARANTINE CORONAVIRUS OUTBREAK BE...   \n",
 | 
						|
       "2            en  400 DEATHS GET E-BOOK X AN Corporation ncy Ser...   \n",
 | 
						|
       "\n",
 | 
						|
       "                                        text_summary sentiment  \\\n",
 | 
						|
       "0   NEW COUNTING METHOD RJ City HALL EXCLUDES 1,1...  NEGATIVE   \n",
 | 
						|
       "1        Coronavirus QUARANTINE CORONAVIRUS OUTBREAK  NEGATIVE   \n",
 | 
						|
       "2   A municipal worker sprays disinfectant on his...  NEGATIVE   \n",
 | 
						|
       "\n",
 | 
						|
       "   sentiment_score                                             entity  \\\n",
 | 
						|
       "0             0.99          [Rio de Janeiro, C, ##IT, P, ##NA, ##LTO]   \n",
 | 
						|
       "1             0.98      [CORONAVIRUS, ##AR, ##TI, ##RONAVIR, ##C, Co]   \n",
 | 
						|
       "2             0.99  [AN Corporation ncy Services, Ahmedabad, RE, #...   \n",
 | 
						|
       "\n",
 | 
						|
       "                          entity_type  \n",
 | 
						|
       "0      [LOC, ORG, LOC, LOC, ORG, LOC]  \n",
 | 
						|
       "1  [ORG, MISC, MISC, ORG, MISC, MISC]  \n",
 | 
						|
       "2                [ORG, LOC, PER, ORG]  "
 | 
						|
      ]
 | 
						|
     },
 | 
						|
     "execution_count": 8,
 | 
						|
     "metadata": {},
 | 
						|
     "output_type": "execute_result"
 | 
						|
    }
 | 
						|
   ],
 | 
						|
   "source": [
 | 
						|
    "df.head(10)"
 | 
						|
   ]
 | 
						|
  },
 | 
						|
  {
 | 
						|
   "attachments": {},
 | 
						|
   "cell_type": "markdown",
 | 
						|
   "id": "eedf1e47",
 | 
						|
   "metadata": {},
 | 
						|
   "source": [
 | 
						|
    "Write the csv file - here you should provide a file path and file name for the csv file to be written."
 | 
						|
   ]
 | 
						|
  },
 | 
						|
  {
 | 
						|
   "cell_type": "code",
 | 
						|
   "execution_count": 9,
 | 
						|
   "id": "bf6c9ddb",
 | 
						|
   "metadata": {
 | 
						|
    "execution": {
 | 
						|
     "iopub.execute_input": "2023-06-23T12:24:30.891561Z",
 | 
						|
     "iopub.status.busy": "2023-06-23T12:24:30.891007Z",
 | 
						|
     "iopub.status.idle": "2023-06-23T12:24:30.905301Z",
 | 
						|
     "shell.execute_reply": "2023-06-23T12:24:30.904714Z"
 | 
						|
    }
 | 
						|
   },
 | 
						|
   "outputs": [],
 | 
						|
   "source": [
 | 
						|
    "# Write the csv\n",
 | 
						|
    "df.to_csv(\"./data_out.csv\")"
 | 
						|
   ]
 | 
						|
  },
 | 
						|
  {
 | 
						|
   "attachments": {},
 | 
						|
   "cell_type": "markdown",
 | 
						|
   "id": "4bc8ac0a",
 | 
						|
   "metadata": {},
 | 
						|
   "source": [
 | 
						|
    "## Topic analysis\n",
 | 
						|
    "The topic analysis is carried out using [BERTopic](https://maartengr.github.io/BERTopic/index.html) using an embedded model through a [spaCy](https://spacy.io/) pipeline."
 | 
						|
   ]
 | 
						|
  },
 | 
						|
  {
 | 
						|
   "attachments": {},
 | 
						|
   "cell_type": "markdown",
 | 
						|
   "id": "4931941b",
 | 
						|
   "metadata": {},
 | 
						|
   "source": [
 | 
						|
    "BERTopic takes a list of strings as input. The more items in the list, the better for the topic modeling. If the below returns an error for `analyse_topic()`, the reason can be that your dataset is too small.\n",
 | 
						|
    "\n",
 | 
						|
    "You can pass which dataframe entry you would like to have analyzed. The default is `text_english`, but you could for example also select `text_summary` or `text_english_correct` setting the keyword `analyze_text` as so:\n",
 | 
						|
    "\n",
 | 
						|
    "`ammico.text.PostprocessText(mydict=mydict, analyze_text=\"text_summary\").analyse_topic()`\n",
 | 
						|
    "\n",
 | 
						|
    "### Option 1: Use the dictionary as obtained from the above analysis."
 | 
						|
   ]
 | 
						|
  },
 | 
						|
  {
 | 
						|
   "cell_type": "code",
 | 
						|
   "execution_count": 10,
 | 
						|
   "id": "a3450a61",
 | 
						|
   "metadata": {
 | 
						|
    "execution": {
 | 
						|
     "iopub.execute_input": "2023-06-23T12:24:30.908227Z",
 | 
						|
     "iopub.status.busy": "2023-06-23T12:24:30.907705Z",
 | 
						|
     "iopub.status.idle": "2023-06-23T12:24:45.015459Z",
 | 
						|
     "shell.execute_reply": "2023-06-23T12:24:45.013545Z"
 | 
						|
    }
 | 
						|
   },
 | 
						|
   "outputs": [
 | 
						|
    {
 | 
						|
     "name": "stdout",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "Reading data from dict.\n",
 | 
						|
      "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
 | 
						|
      "To disable this warning, you can either:\n",
 | 
						|
      "\t- Avoid using `tokenizers` before the fork if possible\n",
 | 
						|
      "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stdout",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "Collecting en-core-web-md==3.5.0\n"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stdout",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.5.0/en_core_web_md-3.5.0-py3-none-any.whl (42.8 MB)\n",
 | 
						|
      "\u001b[?25l                                              0.0/42.8 MB ? eta -:--:--\r",
 | 
						|
      "\u001b[2K                                              0.1/42.8 MB 2.4 MB/s eta 0:00:18\r",
 | 
						|
      "\u001b[2K     ╸                                        0.8/42.8 MB 11.9 MB/s eta 0:00:04\r",
 | 
						|
      "\u001b[2K     ━━━━                                     4.7/42.8 MB 44.5 MB/s eta 0:00:01\r",
 | 
						|
      "\u001b[2K     ━━━━━━━━━━╸                             11.7/42.8 MB 165.6 MB/s eta 0:00:01\r",
 | 
						|
      "\u001b[2K     ━━━━━━━━━━━━━━━━━                       18.9/42.8 MB 198.9 MB/s eta 0:00:01"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stdout",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "\u001b[2K     ━━━━━━━━━━━━━━━━━━━━━━╸                 25.0/42.8 MB 187.4 MB/s eta 0:00:01\r",
 | 
						|
      "\u001b[2K     ━━━━━━━━━━━━━━━━━━━━━━━━━━━             29.7/42.8 MB 157.5 MB/s eta 0:00:01\r",
 | 
						|
      "\u001b[2K     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸         33.9/42.8 MB 130.5 MB/s eta 0:00:01\r",
 | 
						|
      "\u001b[2K     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸     38.0/42.8 MB 117.2 MB/s eta 0:00:01\r",
 | 
						|
      "\u001b[2K     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━  42.1/42.8 MB 114.7 MB/s eta 0:00:01\r",
 | 
						|
      "\u001b[2K     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 42.8/42.8 MB 114.3 MB/s eta 0:00:01"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stdout",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\r",
 | 
						|
      "\u001b[2K     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 42.8/42.8 MB 114.3 MB/s eta 0:00:01\r",
 | 
						|
      "\u001b[2K     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╸ 42.8/42.8 MB 114.3 MB/s eta 0:00:01\r",
 | 
						|
      "\u001b[2K     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 42.8/42.8 MB 50.0 MB/s eta 0:00:00\n",
 | 
						|
      "\u001b[?25h"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stdout",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "Requirement already satisfied: spacy<3.6.0,>=3.5.0 in /opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/site-packages (from en-core-web-md==3.5.0) (3.5.3)\n",
 | 
						|
      "Requirement already satisfied: spacy-legacy<3.1.0,>=3.0.11 in /opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/site-packages (from spacy<3.6.0,>=3.5.0->en-core-web-md==3.5.0) (3.0.12)\n",
 | 
						|
      "Requirement already satisfied: spacy-loggers<2.0.0,>=1.0.0 in /opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/site-packages (from spacy<3.6.0,>=3.5.0->en-core-web-md==3.5.0) (1.0.4)\n",
 | 
						|
      "Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/site-packages (from spacy<3.6.0,>=3.5.0->en-core-web-md==3.5.0) (1.0.9)\n",
 | 
						|
      "Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/site-packages (from spacy<3.6.0,>=3.5.0->en-core-web-md==3.5.0) (2.0.7)\n",
 | 
						|
      "Requirement already satisfied: preshed<3.1.0,>=3.0.2 in /opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/site-packages (from spacy<3.6.0,>=3.5.0->en-core-web-md==3.5.0) (3.0.8)\n",
 | 
						|
      "Requirement already satisfied: thinc<8.2.0,>=8.1.8 in /opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/site-packages (from spacy<3.6.0,>=3.5.0->en-core-web-md==3.5.0) (8.1.10)\n",
 | 
						|
      "Requirement already satisfied: wasabi<1.2.0,>=0.9.1 in /opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/site-packages (from spacy<3.6.0,>=3.5.0->en-core-web-md==3.5.0) (1.1.2)\n",
 | 
						|
      "Requirement already satisfied: srsly<3.0.0,>=2.4.3 in /opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/site-packages (from spacy<3.6.0,>=3.5.0->en-core-web-md==3.5.0) (2.4.6)\n",
 | 
						|
      "Requirement already satisfied: catalogue<2.1.0,>=2.0.6 in /opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/site-packages (from spacy<3.6.0,>=3.5.0->en-core-web-md==3.5.0) (2.0.8)\n",
 | 
						|
      "Requirement already satisfied: typer<0.8.0,>=0.3.0 in /opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/site-packages (from spacy<3.6.0,>=3.5.0->en-core-web-md==3.5.0) (0.7.0)\n",
 | 
						|
      "Requirement already satisfied: pathy>=0.10.0 in /opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/site-packages (from spacy<3.6.0,>=3.5.0->en-core-web-md==3.5.0) (0.10.2)\n",
 | 
						|
      "Requirement already satisfied: smart-open<7.0.0,>=5.2.1 in /opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/site-packages (from spacy<3.6.0,>=3.5.0->en-core-web-md==3.5.0) (6.3.0)\n",
 | 
						|
      "Requirement already satisfied: tqdm<5.0.0,>=4.38.0 in /opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/site-packages (from spacy<3.6.0,>=3.5.0->en-core-web-md==3.5.0) (4.65.0)\n",
 | 
						|
      "Requirement already satisfied: numpy>=1.15.0 in /opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/site-packages (from spacy<3.6.0,>=3.5.0->en-core-web-md==3.5.0) (1.23.4)\n",
 | 
						|
      "Requirement already satisfied: requests<3.0.0,>=2.13.0 in /opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/site-packages (from spacy<3.6.0,>=3.5.0->en-core-web-md==3.5.0) (2.31.0)\n",
 | 
						|
      "Requirement already satisfied: pydantic!=1.8,!=1.8.1,<1.11.0,>=1.7.4 in /opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/site-packages (from spacy<3.6.0,>=3.5.0->en-core-web-md==3.5.0) (1.10.9)\n",
 | 
						|
      "Requirement already satisfied: jinja2 in /opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/site-packages (from spacy<3.6.0,>=3.5.0->en-core-web-md==3.5.0) (3.1.2)\n",
 | 
						|
      "Requirement already satisfied: setuptools in /opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/site-packages (from spacy<3.6.0,>=3.5.0->en-core-web-md==3.5.0) (58.1.0)\n",
 | 
						|
      "Requirement already satisfied: packaging>=20.0 in /opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/site-packages (from spacy<3.6.0,>=3.5.0->en-core-web-md==3.5.0) (23.1)\n",
 | 
						|
      "Requirement already satisfied: langcodes<4.0.0,>=3.2.0 in /opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/site-packages (from spacy<3.6.0,>=3.5.0->en-core-web-md==3.5.0) (3.3.0)\n",
 | 
						|
      "Requirement already satisfied: typing-extensions>=4.2.0 in /opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/site-packages (from pydantic!=1.8,!=1.8.1,<1.11.0,>=1.7.4->spacy<3.6.0,>=3.5.0->en-core-web-md==3.5.0) (4.6.3)\n",
 | 
						|
      "Requirement already satisfied: charset-normalizer<4,>=2 in /opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/site-packages (from requests<3.0.0,>=2.13.0->spacy<3.6.0,>=3.5.0->en-core-web-md==3.5.0) (3.1.0)\n",
 | 
						|
      "Requirement already satisfied: idna<4,>=2.5 in /opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/site-packages (from requests<3.0.0,>=2.13.0->spacy<3.6.0,>=3.5.0->en-core-web-md==3.5.0) (2.10)\n",
 | 
						|
      "Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/site-packages (from requests<3.0.0,>=2.13.0->spacy<3.6.0,>=3.5.0->en-core-web-md==3.5.0) (1.26.16)\n",
 | 
						|
      "Requirement already satisfied: certifi>=2017.4.17 in /opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/site-packages (from requests<3.0.0,>=2.13.0->spacy<3.6.0,>=3.5.0->en-core-web-md==3.5.0) (2023.5.7)\n",
 | 
						|
      "Requirement already satisfied: blis<0.8.0,>=0.7.8 in /opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/site-packages (from thinc<8.2.0,>=8.1.8->spacy<3.6.0,>=3.5.0->en-core-web-md==3.5.0) (0.7.9)\n",
 | 
						|
      "Requirement already satisfied: confection<1.0.0,>=0.0.1 in /opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/site-packages (from thinc<8.2.0,>=8.1.8->spacy<3.6.0,>=3.5.0->en-core-web-md==3.5.0) (0.0.4)\n"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stdout",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "Requirement already satisfied: click<9.0.0,>=7.1.1 in /opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/site-packages (from typer<0.8.0,>=0.3.0->spacy<3.6.0,>=3.5.0->en-core-web-md==3.5.0) (8.1.3)\n",
 | 
						|
      "Requirement already satisfied: MarkupSafe>=2.0 in /opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/site-packages (from jinja2->spacy<3.6.0,>=3.5.0->en-core-web-md==3.5.0) (2.1.3)\n"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stdout",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "Installing collected packages: en-core-web-md\n"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stdout",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "Successfully installed en-core-web-md-3.5.0\n",
 | 
						|
      "\u001b[38;5;2m✔ Download and installation successful\u001b[0m\n",
 | 
						|
      "You can now load the package via spacy.load('en_core_web_md')\n"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "name": "stderr",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "\n",
 | 
						|
      "[notice] A new release of pip is available: 23.0.1 -> 23.1.2\n",
 | 
						|
      "[notice] To update, run: pip install --upgrade pip\n"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "ename": "TypeError",
 | 
						|
     "evalue": "Cannot use scipy.linalg.eigh for sparse A with k >= N. Use scipy.linalg.eigh(A.toarray()) or reduce k.",
 | 
						|
     "output_type": "error",
 | 
						|
     "traceback": [
 | 
						|
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
 | 
						|
      "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
 | 
						|
      "File \u001b[0;32m/opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/site-packages/bertopic/_bertopic.py:2868\u001b[0m, in \u001b[0;36mBERTopic._reduce_dimensionality\u001b[0;34m(self, embeddings, y, partial_fit)\u001b[0m\n\u001b[1;32m   2867\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 2868\u001b[0m     \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mumap_model\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfit\u001b[49m\u001b[43m(\u001b[49m\u001b[43membeddings\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43my\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   2869\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m:\n",
 | 
						|
      "File \u001b[0;32m/opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/site-packages/umap/umap_.py:2684\u001b[0m, in \u001b[0;36mUMAP.fit\u001b[0;34m(self, X, y)\u001b[0m\n\u001b[1;32m   2683\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtransform_mode \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124membedding\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m-> 2684\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39membedding_, aux_data \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_fit_embed_data\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   2685\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_raw_data\u001b[49m\u001b[43m[\u001b[49m\u001b[43mindex\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2686\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mn_epochs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2687\u001b[0m \u001b[43m        \u001b[49m\u001b[43minit\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2688\u001b[0m \u001b[43m        \u001b[49m\u001b[43mrandom_state\u001b[49m\u001b[43m,\u001b[49m\u001b[43m  \u001b[49m\u001b[38;5;66;43;03m# JH why raw data?\u001b[39;49;00m\n\u001b[1;32m   2689\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   2690\u001b[0m     \u001b[38;5;66;03m# Assign any points that are fully disconnected from our manifold(s) to have embedding\u001b[39;00m\n\u001b[1;32m   2691\u001b[0m     \u001b[38;5;66;03m# coordinates of np.nan.  These will be filtered by our plotting functions automatically.\u001b[39;00m\n\u001b[1;32m   2692\u001b[0m     \u001b[38;5;66;03m# They also prevent users from being deceived a distance query to one of these points.\u001b[39;00m\n\u001b[1;32m   2693\u001b[0m     \u001b[38;5;66;03m# Might be worth moving this into simplicial_set_embedding or _fit_embed_data\u001b[39;00m\n",
 | 
						|
      "File \u001b[0;32m/opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/site-packages/umap/umap_.py:2717\u001b[0m, in \u001b[0;36mUMAP._fit_embed_data\u001b[0;34m(self, X, n_epochs, init, random_state)\u001b[0m\n\u001b[1;32m   2714\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"A method wrapper for simplicial_set_embedding that can be\u001b[39;00m\n\u001b[1;32m   2715\u001b[0m \u001b[38;5;124;03mreplaced by subclasses.\u001b[39;00m\n\u001b[1;32m   2716\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m-> 2717\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43msimplicial_set_embedding\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   2718\u001b[0m \u001b[43m    \u001b[49m\u001b[43mX\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2719\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgraph_\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2720\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mn_components\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2721\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_initial_alpha\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2722\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_a\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2723\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_b\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2724\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrepulsion_strength\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2725\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnegative_sample_rate\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2726\u001b[0m \u001b[43m    \u001b[49m\u001b[43mn_epochs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2727\u001b[0m \u001b[43m    \u001b[49m\u001b[43minit\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2728\u001b[0m \u001b[43m    \u001b[49m\u001b[43mrandom_state\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2729\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_input_distance_func\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2730\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_metric_kwds\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2731\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdensmap\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2732\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_densmap_kwds\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2733\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moutput_dens\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2734\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_output_distance_func\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2735\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_output_metric_kwds\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2736\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moutput_metric\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43meuclidean\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43ml2\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2737\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrandom_state\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mis\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m   2738\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mverbose\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2739\u001b[0m \u001b[43m    \u001b[49m\u001b[43mtqdm_kwds\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtqdm_kwds\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2740\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
 | 
						|
      "File \u001b[0;32m/opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/site-packages/umap/umap_.py:1078\u001b[0m, in \u001b[0;36msimplicial_set_embedding\u001b[0;34m(data, graph, n_components, initial_alpha, a, b, gamma, negative_sample_rate, n_epochs, init, random_state, metric, metric_kwds, densmap, densmap_kwds, output_dens, output_metric, output_metric_kwds, euclidean_output, parallel, verbose, tqdm_kwds)\u001b[0m\n\u001b[1;32m   1076\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(init, \u001b[38;5;28mstr\u001b[39m) \u001b[38;5;129;01mand\u001b[39;00m init \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mspectral\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m   1077\u001b[0m     \u001b[38;5;66;03m# We add a little noise to avoid local minima for optimization to come\u001b[39;00m\n\u001b[0;32m-> 1078\u001b[0m     initialisation \u001b[38;5;241m=\u001b[39m \u001b[43mspectral_layout\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1079\u001b[0m \u001b[43m        \u001b[49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1080\u001b[0m \u001b[43m        \u001b[49m\u001b[43mgraph\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1081\u001b[0m \u001b[43m        \u001b[49m\u001b[43mn_components\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1082\u001b[0m \u001b[43m        \u001b[49m\u001b[43mrandom_state\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1083\u001b[0m \u001b[43m        \u001b[49m\u001b[43mmetric\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmetric\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1084\u001b[0m \u001b[43m        \u001b[49m\u001b[43mmetric_kwds\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmetric_kwds\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1085\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1086\u001b[0m     expansion \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m10.0\u001b[39m \u001b[38;5;241m/\u001b[39m np\u001b[38;5;241m.\u001b[39mabs(initialisation)\u001b[38;5;241m.\u001b[39mmax()\n",
 | 
						|
      "File \u001b[0;32m/opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/site-packages/umap/spectral.py:332\u001b[0m, in \u001b[0;36mspectral_layout\u001b[0;34m(data, graph, dim, random_state, metric, metric_kwds)\u001b[0m\n\u001b[1;32m    331\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m L\u001b[38;5;241m.\u001b[39mshape[\u001b[38;5;241m0\u001b[39m] \u001b[38;5;241m<\u001b[39m \u001b[38;5;241m2000000\u001b[39m:\n\u001b[0;32m--> 332\u001b[0m     eigenvalues, eigenvectors \u001b[38;5;241m=\u001b[39m \u001b[43mscipy\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msparse\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlinalg\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43meigsh\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    333\u001b[0m \u001b[43m        \u001b[49m\u001b[43mL\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    334\u001b[0m \u001b[43m        \u001b[49m\u001b[43mk\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    335\u001b[0m \u001b[43m        \u001b[49m\u001b[43mwhich\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mSM\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m    336\u001b[0m \u001b[43m        \u001b[49m\u001b[43mncv\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mnum_lanczos_vectors\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    337\u001b[0m \u001b[43m        \u001b[49m\u001b[43mtol\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m1e-4\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m    338\u001b[0m \u001b[43m        \u001b[49m\u001b[43mv0\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mnp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mones\u001b[49m\u001b[43m(\u001b[49m\u001b[43mL\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mshape\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    339\u001b[0m \u001b[43m        \u001b[49m\u001b[43mmaxiter\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgraph\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mshape\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m5\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m    340\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    341\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n",
 | 
						|
      "File \u001b[0;32m/opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/site-packages/scipy/sparse/linalg/_eigen/arpack/arpack.py:1605\u001b[0m, in \u001b[0;36meigsh\u001b[0;34m(A, k, M, sigma, which, v0, ncv, maxiter, tol, return_eigenvectors, Minv, OPinv, mode)\u001b[0m\n\u001b[1;32m   1604\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m issparse(A):\n\u001b[0;32m-> 1605\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCannot use scipy.linalg.eigh for sparse A with \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   1606\u001b[0m                     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mk >= N. Use scipy.linalg.eigh(A.toarray()) or\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   1607\u001b[0m                     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m reduce k.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m   1608\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(A, LinearOperator):\n",
 | 
						|
      "\u001b[0;31mTypeError\u001b[0m: Cannot use scipy.linalg.eigh for sparse A with k >= N. Use scipy.linalg.eigh(A.toarray()) or reduce k.",
 | 
						|
      "\nDuring handling of the above exception, another exception occurred:\n",
 | 
						|
      "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
 | 
						|
      "Cell \u001b[0;32mIn[10], line 2\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[38;5;66;03m# make a list of all the text_english entries per analysed image from the mydict variable as above\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m topic_model, topic_df, most_frequent_topics \u001b[38;5;241m=\u001b[39m \u001b[43mammico\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtext\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mPostprocessText\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m      3\u001b[0m \u001b[43m    \u001b[49m\u001b[43mmydict\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmydict\u001b[49m\n\u001b[1;32m      4\u001b[0m \u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43manalyse_topic\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
 | 
						|
      "File \u001b[0;32m~/work/AMMICO/AMMICO/ammico/text.py:221\u001b[0m, in \u001b[0;36mPostprocessText.analyse_topic\u001b[0;34m(self, return_topics)\u001b[0m\n\u001b[1;32m    219\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m:\n\u001b[1;32m    220\u001b[0m     \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mBERTopic excited with an error - maybe your dataset is too small?\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 221\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtopics, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mprobs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtopic_model\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfit_transform\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlist_text_english\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    222\u001b[0m \u001b[38;5;66;03m# return the topic list\u001b[39;00m\n\u001b[1;32m    223\u001b[0m topic_df \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtopic_model\u001b[38;5;241m.\u001b[39mget_topic_info()\n",
 | 
						|
      "File \u001b[0;32m/opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/site-packages/bertopic/_bertopic.py:356\u001b[0m, in \u001b[0;36mBERTopic.fit_transform\u001b[0;34m(self, documents, embeddings, y)\u001b[0m\n\u001b[1;32m    354\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mseed_topic_list \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39membedding_model \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m    355\u001b[0m     y, embeddings \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_guided_topic_modeling(embeddings)\n\u001b[0;32m--> 356\u001b[0m umap_embeddings \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_reduce_dimensionality\u001b[49m\u001b[43m(\u001b[49m\u001b[43membeddings\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    358\u001b[0m \u001b[38;5;66;03m# Cluster reduced embeddings\u001b[39;00m\n\u001b[1;32m    359\u001b[0m documents, probabilities \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_cluster_embeddings(umap_embeddings, documents, y\u001b[38;5;241m=\u001b[39my)\n",
 | 
						|
      "File \u001b[0;32m/opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/site-packages/bertopic/_bertopic.py:2872\u001b[0m, in \u001b[0;36mBERTopic._reduce_dimensionality\u001b[0;34m(self, embeddings, y, partial_fit)\u001b[0m\n\u001b[1;32m   2869\u001b[0m     \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m:\n\u001b[1;32m   2870\u001b[0m         logger\u001b[38;5;241m.\u001b[39minfo(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThe dimensionality reduction algorithm did not contain the `y` parameter and\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   2871\u001b[0m                     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m therefore the `y` parameter was not used\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m-> 2872\u001b[0m         \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mumap_model\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfit\u001b[49m\u001b[43m(\u001b[49m\u001b[43membeddings\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   2874\u001b[0m umap_embeddings \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mumap_model\u001b[38;5;241m.\u001b[39mtransform(embeddings)\n\u001b[1;32m   2875\u001b[0m logger\u001b[38;5;241m.\u001b[39minfo(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mReduced dimensionality\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
 | 
						|
      "File \u001b[0;32m/opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/site-packages/umap/umap_.py:2684\u001b[0m, in \u001b[0;36mUMAP.fit\u001b[0;34m(self, X, y)\u001b[0m\n\u001b[1;32m   2681\u001b[0m     \u001b[38;5;28mprint\u001b[39m(ts(), \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mConstruct embedding\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m   2683\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtransform_mode \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124membedding\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m-> 2684\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39membedding_, aux_data \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_fit_embed_data\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   2685\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_raw_data\u001b[49m\u001b[43m[\u001b[49m\u001b[43mindex\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2686\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mn_epochs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2687\u001b[0m \u001b[43m        \u001b[49m\u001b[43minit\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2688\u001b[0m \u001b[43m        \u001b[49m\u001b[43mrandom_state\u001b[49m\u001b[43m,\u001b[49m\u001b[43m  \u001b[49m\u001b[38;5;66;43;03m# JH why raw data?\u001b[39;49;00m\n\u001b[1;32m   2689\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   2690\u001b[0m     \u001b[38;5;66;03m# Assign any points that are fully disconnected from our manifold(s) to have embedding\u001b[39;00m\n\u001b[1;32m   2691\u001b[0m     \u001b[38;5;66;03m# coordinates of np.nan.  These will be filtered by our plotting functions automatically.\u001b[39;00m\n\u001b[1;32m   2692\u001b[0m     \u001b[38;5;66;03m# They also prevent users from being deceived a distance query to one of these points.\u001b[39;00m\n\u001b[1;32m   2693\u001b[0m     \u001b[38;5;66;03m# Might be worth moving this into simplicial_set_embedding or _fit_embed_data\u001b[39;00m\n\u001b[1;32m   2694\u001b[0m     disconnected_vertices \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39marray(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mgraph_\u001b[38;5;241m.\u001b[39msum(axis\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m))\u001b[38;5;241m.\u001b[39mflatten() \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0\u001b[39m\n",
 | 
						|
      "File \u001b[0;32m/opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/site-packages/umap/umap_.py:2717\u001b[0m, in \u001b[0;36mUMAP._fit_embed_data\u001b[0;34m(self, X, n_epochs, init, random_state)\u001b[0m\n\u001b[1;32m   2713\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_fit_embed_data\u001b[39m(\u001b[38;5;28mself\u001b[39m, X, n_epochs, init, random_state):\n\u001b[1;32m   2714\u001b[0m \u001b[38;5;250m    \u001b[39m\u001b[38;5;124;03m\"\"\"A method wrapper for simplicial_set_embedding that can be\u001b[39;00m\n\u001b[1;32m   2715\u001b[0m \u001b[38;5;124;03m    replaced by subclasses.\u001b[39;00m\n\u001b[1;32m   2716\u001b[0m \u001b[38;5;124;03m    \"\"\"\u001b[39;00m\n\u001b[0;32m-> 2717\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43msimplicial_set_embedding\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   2718\u001b[0m \u001b[43m        \u001b[49m\u001b[43mX\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2719\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgraph_\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2720\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mn_components\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2721\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_initial_alpha\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2722\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_a\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2723\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_b\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2724\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrepulsion_strength\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2725\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnegative_sample_rate\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2726\u001b[0m \u001b[43m        \u001b[49m\u001b[43mn_epochs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2727\u001b[0m \u001b[43m        \u001b[49m\u001b[43minit\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2728\u001b[0m \u001b[43m        \u001b[49m\u001b[43mrandom_state\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2729\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_input_distance_func\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2730\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_metric_kwds\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2731\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdensmap\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2732\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_densmap_kwds\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2733\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moutput_dens\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2734\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_output_distance_func\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2735\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_output_metric_kwds\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2736\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moutput_metric\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43meuclidean\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43ml2\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2737\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrandom_state\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mis\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m   2738\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mverbose\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2739\u001b[0m \u001b[43m        \u001b[49m\u001b[43mtqdm_kwds\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtqdm_kwds\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2740\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n",
 | 
						|
      "File \u001b[0;32m/opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/site-packages/umap/umap_.py:1078\u001b[0m, in \u001b[0;36msimplicial_set_embedding\u001b[0;34m(data, graph, n_components, initial_alpha, a, b, gamma, negative_sample_rate, n_epochs, init, random_state, metric, metric_kwds, densmap, densmap_kwds, output_dens, output_metric, output_metric_kwds, euclidean_output, parallel, verbose, tqdm_kwds)\u001b[0m\n\u001b[1;32m   1073\u001b[0m     embedding \u001b[38;5;241m=\u001b[39m random_state\u001b[38;5;241m.\u001b[39muniform(\n\u001b[1;32m   1074\u001b[0m         low\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m10.0\u001b[39m, high\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m10.0\u001b[39m, size\u001b[38;5;241m=\u001b[39m(graph\u001b[38;5;241m.\u001b[39mshape[\u001b[38;5;241m0\u001b[39m], n_components)\n\u001b[1;32m   1075\u001b[0m     )\u001b[38;5;241m.\u001b[39mastype(np\u001b[38;5;241m.\u001b[39mfloat32)\n\u001b[1;32m   1076\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(init, \u001b[38;5;28mstr\u001b[39m) \u001b[38;5;129;01mand\u001b[39;00m init \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mspectral\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m   1077\u001b[0m     \u001b[38;5;66;03m# We add a little noise to avoid local minima for optimization to come\u001b[39;00m\n\u001b[0;32m-> 1078\u001b[0m     initialisation \u001b[38;5;241m=\u001b[39m \u001b[43mspectral_layout\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1079\u001b[0m \u001b[43m        \u001b[49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1080\u001b[0m \u001b[43m        \u001b[49m\u001b[43mgraph\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1081\u001b[0m \u001b[43m        \u001b[49m\u001b[43mn_components\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1082\u001b[0m \u001b[43m        \u001b[49m\u001b[43mrandom_state\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1083\u001b[0m \u001b[43m        \u001b[49m\u001b[43mmetric\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmetric\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1084\u001b[0m \u001b[43m        \u001b[49m\u001b[43mmetric_kwds\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmetric_kwds\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1085\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1086\u001b[0m     expansion \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m10.0\u001b[39m \u001b[38;5;241m/\u001b[39m np\u001b[38;5;241m.\u001b[39mabs(initialisation)\u001b[38;5;241m.\u001b[39mmax()\n\u001b[1;32m   1087\u001b[0m     embedding \u001b[38;5;241m=\u001b[39m (initialisation \u001b[38;5;241m*\u001b[39m expansion)\u001b[38;5;241m.\u001b[39mastype(\n\u001b[1;32m   1088\u001b[0m         np\u001b[38;5;241m.\u001b[39mfloat32\n\u001b[1;32m   1089\u001b[0m     ) \u001b[38;5;241m+\u001b[39m random_state\u001b[38;5;241m.\u001b[39mnormal(\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m   1092\u001b[0m         np\u001b[38;5;241m.\u001b[39mfloat32\n\u001b[1;32m   1093\u001b[0m     )\n",
 | 
						|
      "File \u001b[0;32m/opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/site-packages/umap/spectral.py:332\u001b[0m, in \u001b[0;36mspectral_layout\u001b[0;34m(data, graph, dim, random_state, metric, metric_kwds)\u001b[0m\n\u001b[1;32m    330\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m    331\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m L\u001b[38;5;241m.\u001b[39mshape[\u001b[38;5;241m0\u001b[39m] \u001b[38;5;241m<\u001b[39m \u001b[38;5;241m2000000\u001b[39m:\n\u001b[0;32m--> 332\u001b[0m         eigenvalues, eigenvectors \u001b[38;5;241m=\u001b[39m \u001b[43mscipy\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msparse\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlinalg\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43meigsh\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    333\u001b[0m \u001b[43m            \u001b[49m\u001b[43mL\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    334\u001b[0m \u001b[43m            \u001b[49m\u001b[43mk\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    335\u001b[0m \u001b[43m            \u001b[49m\u001b[43mwhich\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mSM\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m    336\u001b[0m \u001b[43m            \u001b[49m\u001b[43mncv\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mnum_lanczos_vectors\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    337\u001b[0m \u001b[43m            \u001b[49m\u001b[43mtol\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m1e-4\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m    338\u001b[0m \u001b[43m            \u001b[49m\u001b[43mv0\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mnp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mones\u001b[49m\u001b[43m(\u001b[49m\u001b[43mL\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mshape\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    339\u001b[0m \u001b[43m            \u001b[49m\u001b[43mmaxiter\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgraph\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mshape\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m5\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m    340\u001b[0m \u001b[43m        \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    341\u001b[0m     \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m    342\u001b[0m         eigenvalues, eigenvectors \u001b[38;5;241m=\u001b[39m scipy\u001b[38;5;241m.\u001b[39msparse\u001b[38;5;241m.\u001b[39mlinalg\u001b[38;5;241m.\u001b[39mlobpcg(\n\u001b[1;32m    343\u001b[0m             L, random_state\u001b[38;5;241m.\u001b[39mnormal(size\u001b[38;5;241m=\u001b[39m(L\u001b[38;5;241m.\u001b[39mshape[\u001b[38;5;241m0\u001b[39m], k)), largest\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m, tol\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1e-8\u001b[39m\n\u001b[1;32m    344\u001b[0m         )\n",
 | 
						|
      "File \u001b[0;32m/opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/site-packages/scipy/sparse/linalg/_eigen/arpack/arpack.py:1605\u001b[0m, in \u001b[0;36meigsh\u001b[0;34m(A, k, M, sigma, which, v0, ncv, maxiter, tol, return_eigenvectors, Minv, OPinv, mode)\u001b[0m\n\u001b[1;32m   1600\u001b[0m warnings\u001b[38;5;241m.\u001b[39mwarn(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mk >= N for N * N square matrix. \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   1601\u001b[0m               \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAttempting to use scipy.linalg.eigh instead.\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m   1602\u001b[0m               \u001b[38;5;167;01mRuntimeWarning\u001b[39;00m)\n\u001b[1;32m   1604\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m issparse(A):\n\u001b[0;32m-> 1605\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCannot use scipy.linalg.eigh for sparse A with \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   1606\u001b[0m                     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mk >= N. Use scipy.linalg.eigh(A.toarray()) or\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   1607\u001b[0m                     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m reduce k.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m   1608\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(A, LinearOperator):\n\u001b[1;32m   1609\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCannot use scipy.linalg.eigh for LinearOperator \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   1610\u001b[0m                     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mA with k >= N.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
 | 
						|
      "\u001b[0;31mTypeError\u001b[0m: Cannot use scipy.linalg.eigh for sparse A with k >= N. Use scipy.linalg.eigh(A.toarray()) or reduce k."
 | 
						|
     ]
 | 
						|
    }
 | 
						|
   ],
 | 
						|
   "source": [
 | 
						|
    "# make a list of all the text_english entries per analysed image from the mydict variable as above\n",
 | 
						|
    "topic_model, topic_df, most_frequent_topics = ammico.text.PostprocessText(\n",
 | 
						|
    "    mydict=mydict\n",
 | 
						|
    ").analyse_topic()"
 | 
						|
   ]
 | 
						|
  },
 | 
						|
  {
 | 
						|
   "attachments": {},
 | 
						|
   "cell_type": "markdown",
 | 
						|
   "id": "95667342",
 | 
						|
   "metadata": {},
 | 
						|
   "source": [
 | 
						|
    "### Option 2: Read in a csv\n",
 | 
						|
    "Not to analyse too many images on google Cloud Vision, use the csv output to obtain the text (when rerunning already analysed images)."
 | 
						|
   ]
 | 
						|
  },
 | 
						|
  {
 | 
						|
   "cell_type": "code",
 | 
						|
   "execution_count": 11,
 | 
						|
   "id": "5530e436",
 | 
						|
   "metadata": {
 | 
						|
    "execution": {
 | 
						|
     "iopub.execute_input": "2023-06-23T12:24:45.019909Z",
 | 
						|
     "iopub.status.busy": "2023-06-23T12:24:45.019678Z",
 | 
						|
     "iopub.status.idle": "2023-06-23T12:24:46.513284Z",
 | 
						|
     "shell.execute_reply": "2023-06-23T12:24:46.512283Z"
 | 
						|
    }
 | 
						|
   },
 | 
						|
   "outputs": [
 | 
						|
    {
 | 
						|
     "name": "stdout",
 | 
						|
     "output_type": "stream",
 | 
						|
     "text": [
 | 
						|
      "Reading data from df.\n"
 | 
						|
     ]
 | 
						|
    },
 | 
						|
    {
 | 
						|
     "ename": "TypeError",
 | 
						|
     "evalue": "Cannot use scipy.linalg.eigh for sparse A with k >= N. Use scipy.linalg.eigh(A.toarray()) or reduce k.",
 | 
						|
     "output_type": "error",
 | 
						|
     "traceback": [
 | 
						|
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
 | 
						|
      "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
 | 
						|
      "File \u001b[0;32m/opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/site-packages/bertopic/_bertopic.py:2868\u001b[0m, in \u001b[0;36mBERTopic._reduce_dimensionality\u001b[0;34m(self, embeddings, y, partial_fit)\u001b[0m\n\u001b[1;32m   2867\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 2868\u001b[0m     \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mumap_model\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfit\u001b[49m\u001b[43m(\u001b[49m\u001b[43membeddings\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43my\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   2869\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m:\n",
 | 
						|
      "File \u001b[0;32m/opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/site-packages/umap/umap_.py:2684\u001b[0m, in \u001b[0;36mUMAP.fit\u001b[0;34m(self, X, y)\u001b[0m\n\u001b[1;32m   2683\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtransform_mode \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124membedding\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m-> 2684\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39membedding_, aux_data \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_fit_embed_data\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   2685\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_raw_data\u001b[49m\u001b[43m[\u001b[49m\u001b[43mindex\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2686\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mn_epochs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2687\u001b[0m \u001b[43m        \u001b[49m\u001b[43minit\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2688\u001b[0m \u001b[43m        \u001b[49m\u001b[43mrandom_state\u001b[49m\u001b[43m,\u001b[49m\u001b[43m  \u001b[49m\u001b[38;5;66;43;03m# JH why raw data?\u001b[39;49;00m\n\u001b[1;32m   2689\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   2690\u001b[0m     \u001b[38;5;66;03m# Assign any points that are fully disconnected from our manifold(s) to have embedding\u001b[39;00m\n\u001b[1;32m   2691\u001b[0m     \u001b[38;5;66;03m# coordinates of np.nan.  These will be filtered by our plotting functions automatically.\u001b[39;00m\n\u001b[1;32m   2692\u001b[0m     \u001b[38;5;66;03m# They also prevent users from being deceived a distance query to one of these points.\u001b[39;00m\n\u001b[1;32m   2693\u001b[0m     \u001b[38;5;66;03m# Might be worth moving this into simplicial_set_embedding or _fit_embed_data\u001b[39;00m\n",
 | 
						|
      "File \u001b[0;32m/opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/site-packages/umap/umap_.py:2717\u001b[0m, in \u001b[0;36mUMAP._fit_embed_data\u001b[0;34m(self, X, n_epochs, init, random_state)\u001b[0m\n\u001b[1;32m   2714\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"A method wrapper for simplicial_set_embedding that can be\u001b[39;00m\n\u001b[1;32m   2715\u001b[0m \u001b[38;5;124;03mreplaced by subclasses.\u001b[39;00m\n\u001b[1;32m   2716\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m-> 2717\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43msimplicial_set_embedding\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   2718\u001b[0m \u001b[43m    \u001b[49m\u001b[43mX\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2719\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgraph_\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2720\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mn_components\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2721\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_initial_alpha\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2722\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_a\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2723\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_b\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2724\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrepulsion_strength\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2725\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnegative_sample_rate\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2726\u001b[0m \u001b[43m    \u001b[49m\u001b[43mn_epochs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2727\u001b[0m \u001b[43m    \u001b[49m\u001b[43minit\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2728\u001b[0m \u001b[43m    \u001b[49m\u001b[43mrandom_state\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2729\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_input_distance_func\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2730\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_metric_kwds\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2731\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdensmap\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2732\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_densmap_kwds\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2733\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moutput_dens\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2734\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_output_distance_func\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2735\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_output_metric_kwds\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2736\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moutput_metric\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43meuclidean\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43ml2\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2737\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrandom_state\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mis\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m   2738\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mverbose\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2739\u001b[0m \u001b[43m    \u001b[49m\u001b[43mtqdm_kwds\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtqdm_kwds\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2740\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
 | 
						|
      "File \u001b[0;32m/opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/site-packages/umap/umap_.py:1078\u001b[0m, in \u001b[0;36msimplicial_set_embedding\u001b[0;34m(data, graph, n_components, initial_alpha, a, b, gamma, negative_sample_rate, n_epochs, init, random_state, metric, metric_kwds, densmap, densmap_kwds, output_dens, output_metric, output_metric_kwds, euclidean_output, parallel, verbose, tqdm_kwds)\u001b[0m\n\u001b[1;32m   1076\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(init, \u001b[38;5;28mstr\u001b[39m) \u001b[38;5;129;01mand\u001b[39;00m init \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mspectral\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m   1077\u001b[0m     \u001b[38;5;66;03m# We add a little noise to avoid local minima for optimization to come\u001b[39;00m\n\u001b[0;32m-> 1078\u001b[0m     initialisation \u001b[38;5;241m=\u001b[39m \u001b[43mspectral_layout\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1079\u001b[0m \u001b[43m        \u001b[49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1080\u001b[0m \u001b[43m        \u001b[49m\u001b[43mgraph\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1081\u001b[0m \u001b[43m        \u001b[49m\u001b[43mn_components\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1082\u001b[0m \u001b[43m        \u001b[49m\u001b[43mrandom_state\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1083\u001b[0m \u001b[43m        \u001b[49m\u001b[43mmetric\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmetric\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1084\u001b[0m \u001b[43m        \u001b[49m\u001b[43mmetric_kwds\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmetric_kwds\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1085\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1086\u001b[0m     expansion \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m10.0\u001b[39m \u001b[38;5;241m/\u001b[39m np\u001b[38;5;241m.\u001b[39mabs(initialisation)\u001b[38;5;241m.\u001b[39mmax()\n",
 | 
						|
      "File \u001b[0;32m/opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/site-packages/umap/spectral.py:332\u001b[0m, in \u001b[0;36mspectral_layout\u001b[0;34m(data, graph, dim, random_state, metric, metric_kwds)\u001b[0m\n\u001b[1;32m    331\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m L\u001b[38;5;241m.\u001b[39mshape[\u001b[38;5;241m0\u001b[39m] \u001b[38;5;241m<\u001b[39m \u001b[38;5;241m2000000\u001b[39m:\n\u001b[0;32m--> 332\u001b[0m     eigenvalues, eigenvectors \u001b[38;5;241m=\u001b[39m \u001b[43mscipy\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msparse\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlinalg\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43meigsh\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    333\u001b[0m \u001b[43m        \u001b[49m\u001b[43mL\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    334\u001b[0m \u001b[43m        \u001b[49m\u001b[43mk\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    335\u001b[0m \u001b[43m        \u001b[49m\u001b[43mwhich\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mSM\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m    336\u001b[0m \u001b[43m        \u001b[49m\u001b[43mncv\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mnum_lanczos_vectors\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    337\u001b[0m \u001b[43m        \u001b[49m\u001b[43mtol\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m1e-4\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m    338\u001b[0m \u001b[43m        \u001b[49m\u001b[43mv0\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mnp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mones\u001b[49m\u001b[43m(\u001b[49m\u001b[43mL\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mshape\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    339\u001b[0m \u001b[43m        \u001b[49m\u001b[43mmaxiter\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgraph\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mshape\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m5\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m    340\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    341\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n",
 | 
						|
      "File \u001b[0;32m/opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/site-packages/scipy/sparse/linalg/_eigen/arpack/arpack.py:1605\u001b[0m, in \u001b[0;36meigsh\u001b[0;34m(A, k, M, sigma, which, v0, ncv, maxiter, tol, return_eigenvectors, Minv, OPinv, mode)\u001b[0m\n\u001b[1;32m   1604\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m issparse(A):\n\u001b[0;32m-> 1605\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCannot use scipy.linalg.eigh for sparse A with \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   1606\u001b[0m                     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mk >= N. Use scipy.linalg.eigh(A.toarray()) or\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   1607\u001b[0m                     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m reduce k.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m   1608\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(A, LinearOperator):\n",
 | 
						|
      "\u001b[0;31mTypeError\u001b[0m: Cannot use scipy.linalg.eigh for sparse A with k >= N. Use scipy.linalg.eigh(A.toarray()) or reduce k.",
 | 
						|
      "\nDuring handling of the above exception, another exception occurred:\n",
 | 
						|
      "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
 | 
						|
      "Cell \u001b[0;32mIn[11], line 2\u001b[0m\n\u001b[1;32m      1\u001b[0m input_file_path \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdata_out.csv\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m----> 2\u001b[0m topic_model, topic_df, most_frequent_topics \u001b[38;5;241m=\u001b[39m \u001b[43mammico\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtext\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mPostprocessText\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m      3\u001b[0m \u001b[43m    \u001b[49m\u001b[43muse_csv\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcsv_path\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minput_file_path\u001b[49m\n\u001b[1;32m      4\u001b[0m \u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43manalyse_topic\u001b[49m\u001b[43m(\u001b[49m\u001b[43mreturn_topics\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m10\u001b[39;49m\u001b[43m)\u001b[49m\n",
 | 
						|
      "File \u001b[0;32m~/work/AMMICO/AMMICO/ammico/text.py:221\u001b[0m, in \u001b[0;36mPostprocessText.analyse_topic\u001b[0;34m(self, return_topics)\u001b[0m\n\u001b[1;32m    219\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m:\n\u001b[1;32m    220\u001b[0m     \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mBERTopic excited with an error - maybe your dataset is too small?\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 221\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtopics, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mprobs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtopic_model\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfit_transform\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlist_text_english\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    222\u001b[0m \u001b[38;5;66;03m# return the topic list\u001b[39;00m\n\u001b[1;32m    223\u001b[0m topic_df \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtopic_model\u001b[38;5;241m.\u001b[39mget_topic_info()\n",
 | 
						|
      "File \u001b[0;32m/opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/site-packages/bertopic/_bertopic.py:356\u001b[0m, in \u001b[0;36mBERTopic.fit_transform\u001b[0;34m(self, documents, embeddings, y)\u001b[0m\n\u001b[1;32m    354\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mseed_topic_list \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39membedding_model \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m    355\u001b[0m     y, embeddings \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_guided_topic_modeling(embeddings)\n\u001b[0;32m--> 356\u001b[0m umap_embeddings \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_reduce_dimensionality\u001b[49m\u001b[43m(\u001b[49m\u001b[43membeddings\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    358\u001b[0m \u001b[38;5;66;03m# Cluster reduced embeddings\u001b[39;00m\n\u001b[1;32m    359\u001b[0m documents, probabilities \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_cluster_embeddings(umap_embeddings, documents, y\u001b[38;5;241m=\u001b[39my)\n",
 | 
						|
      "File \u001b[0;32m/opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/site-packages/bertopic/_bertopic.py:2872\u001b[0m, in \u001b[0;36mBERTopic._reduce_dimensionality\u001b[0;34m(self, embeddings, y, partial_fit)\u001b[0m\n\u001b[1;32m   2869\u001b[0m     \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m:\n\u001b[1;32m   2870\u001b[0m         logger\u001b[38;5;241m.\u001b[39minfo(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThe dimensionality reduction algorithm did not contain the `y` parameter and\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   2871\u001b[0m                     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m therefore the `y` parameter was not used\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m-> 2872\u001b[0m         \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mumap_model\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfit\u001b[49m\u001b[43m(\u001b[49m\u001b[43membeddings\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   2874\u001b[0m umap_embeddings \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mumap_model\u001b[38;5;241m.\u001b[39mtransform(embeddings)\n\u001b[1;32m   2875\u001b[0m logger\u001b[38;5;241m.\u001b[39minfo(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mReduced dimensionality\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
 | 
						|
      "File \u001b[0;32m/opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/site-packages/umap/umap_.py:2684\u001b[0m, in \u001b[0;36mUMAP.fit\u001b[0;34m(self, X, y)\u001b[0m\n\u001b[1;32m   2681\u001b[0m     \u001b[38;5;28mprint\u001b[39m(ts(), \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mConstruct embedding\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m   2683\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtransform_mode \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124membedding\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m-> 2684\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39membedding_, aux_data \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_fit_embed_data\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   2685\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_raw_data\u001b[49m\u001b[43m[\u001b[49m\u001b[43mindex\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2686\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mn_epochs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2687\u001b[0m \u001b[43m        \u001b[49m\u001b[43minit\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2688\u001b[0m \u001b[43m        \u001b[49m\u001b[43mrandom_state\u001b[49m\u001b[43m,\u001b[49m\u001b[43m  \u001b[49m\u001b[38;5;66;43;03m# JH why raw data?\u001b[39;49;00m\n\u001b[1;32m   2689\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   2690\u001b[0m     \u001b[38;5;66;03m# Assign any points that are fully disconnected from our manifold(s) to have embedding\u001b[39;00m\n\u001b[1;32m   2691\u001b[0m     \u001b[38;5;66;03m# coordinates of np.nan.  These will be filtered by our plotting functions automatically.\u001b[39;00m\n\u001b[1;32m   2692\u001b[0m     \u001b[38;5;66;03m# They also prevent users from being deceived a distance query to one of these points.\u001b[39;00m\n\u001b[1;32m   2693\u001b[0m     \u001b[38;5;66;03m# Might be worth moving this into simplicial_set_embedding or _fit_embed_data\u001b[39;00m\n\u001b[1;32m   2694\u001b[0m     disconnected_vertices \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39marray(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mgraph_\u001b[38;5;241m.\u001b[39msum(axis\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m))\u001b[38;5;241m.\u001b[39mflatten() \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0\u001b[39m\n",
 | 
						|
      "File \u001b[0;32m/opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/site-packages/umap/umap_.py:2717\u001b[0m, in \u001b[0;36mUMAP._fit_embed_data\u001b[0;34m(self, X, n_epochs, init, random_state)\u001b[0m\n\u001b[1;32m   2713\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_fit_embed_data\u001b[39m(\u001b[38;5;28mself\u001b[39m, X, n_epochs, init, random_state):\n\u001b[1;32m   2714\u001b[0m \u001b[38;5;250m    \u001b[39m\u001b[38;5;124;03m\"\"\"A method wrapper for simplicial_set_embedding that can be\u001b[39;00m\n\u001b[1;32m   2715\u001b[0m \u001b[38;5;124;03m    replaced by subclasses.\u001b[39;00m\n\u001b[1;32m   2716\u001b[0m \u001b[38;5;124;03m    \"\"\"\u001b[39;00m\n\u001b[0;32m-> 2717\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43msimplicial_set_embedding\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   2718\u001b[0m \u001b[43m        \u001b[49m\u001b[43mX\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2719\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgraph_\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2720\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mn_components\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2721\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_initial_alpha\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2722\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_a\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2723\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_b\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2724\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrepulsion_strength\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2725\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnegative_sample_rate\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2726\u001b[0m \u001b[43m        \u001b[49m\u001b[43mn_epochs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2727\u001b[0m \u001b[43m        \u001b[49m\u001b[43minit\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2728\u001b[0m \u001b[43m        \u001b[49m\u001b[43mrandom_state\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2729\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_input_distance_func\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2730\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_metric_kwds\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2731\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdensmap\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2732\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_densmap_kwds\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2733\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moutput_dens\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2734\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_output_distance_func\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2735\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_output_metric_kwds\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2736\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moutput_metric\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43meuclidean\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43ml2\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2737\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrandom_state\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mis\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m   2738\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mverbose\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2739\u001b[0m \u001b[43m        \u001b[49m\u001b[43mtqdm_kwds\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtqdm_kwds\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2740\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n",
 | 
						|
      "File \u001b[0;32m/opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/site-packages/umap/umap_.py:1078\u001b[0m, in \u001b[0;36msimplicial_set_embedding\u001b[0;34m(data, graph, n_components, initial_alpha, a, b, gamma, negative_sample_rate, n_epochs, init, random_state, metric, metric_kwds, densmap, densmap_kwds, output_dens, output_metric, output_metric_kwds, euclidean_output, parallel, verbose, tqdm_kwds)\u001b[0m\n\u001b[1;32m   1073\u001b[0m     embedding \u001b[38;5;241m=\u001b[39m random_state\u001b[38;5;241m.\u001b[39muniform(\n\u001b[1;32m   1074\u001b[0m         low\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m10.0\u001b[39m, high\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m10.0\u001b[39m, size\u001b[38;5;241m=\u001b[39m(graph\u001b[38;5;241m.\u001b[39mshape[\u001b[38;5;241m0\u001b[39m], n_components)\n\u001b[1;32m   1075\u001b[0m     )\u001b[38;5;241m.\u001b[39mastype(np\u001b[38;5;241m.\u001b[39mfloat32)\n\u001b[1;32m   1076\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(init, \u001b[38;5;28mstr\u001b[39m) \u001b[38;5;129;01mand\u001b[39;00m init \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mspectral\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m   1077\u001b[0m     \u001b[38;5;66;03m# We add a little noise to avoid local minima for optimization to come\u001b[39;00m\n\u001b[0;32m-> 1078\u001b[0m     initialisation \u001b[38;5;241m=\u001b[39m \u001b[43mspectral_layout\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1079\u001b[0m \u001b[43m        \u001b[49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1080\u001b[0m \u001b[43m        \u001b[49m\u001b[43mgraph\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1081\u001b[0m \u001b[43m        \u001b[49m\u001b[43mn_components\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1082\u001b[0m \u001b[43m        \u001b[49m\u001b[43mrandom_state\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1083\u001b[0m \u001b[43m        \u001b[49m\u001b[43mmetric\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmetric\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1084\u001b[0m \u001b[43m        \u001b[49m\u001b[43mmetric_kwds\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmetric_kwds\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1085\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1086\u001b[0m     expansion \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m10.0\u001b[39m \u001b[38;5;241m/\u001b[39m np\u001b[38;5;241m.\u001b[39mabs(initialisation)\u001b[38;5;241m.\u001b[39mmax()\n\u001b[1;32m   1087\u001b[0m     embedding \u001b[38;5;241m=\u001b[39m (initialisation \u001b[38;5;241m*\u001b[39m expansion)\u001b[38;5;241m.\u001b[39mastype(\n\u001b[1;32m   1088\u001b[0m         np\u001b[38;5;241m.\u001b[39mfloat32\n\u001b[1;32m   1089\u001b[0m     ) \u001b[38;5;241m+\u001b[39m random_state\u001b[38;5;241m.\u001b[39mnormal(\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m   1092\u001b[0m         np\u001b[38;5;241m.\u001b[39mfloat32\n\u001b[1;32m   1093\u001b[0m     )\n",
 | 
						|
      "File \u001b[0;32m/opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/site-packages/umap/spectral.py:332\u001b[0m, in \u001b[0;36mspectral_layout\u001b[0;34m(data, graph, dim, random_state, metric, metric_kwds)\u001b[0m\n\u001b[1;32m    330\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m    331\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m L\u001b[38;5;241m.\u001b[39mshape[\u001b[38;5;241m0\u001b[39m] \u001b[38;5;241m<\u001b[39m \u001b[38;5;241m2000000\u001b[39m:\n\u001b[0;32m--> 332\u001b[0m         eigenvalues, eigenvectors \u001b[38;5;241m=\u001b[39m \u001b[43mscipy\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msparse\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlinalg\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43meigsh\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    333\u001b[0m \u001b[43m            \u001b[49m\u001b[43mL\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    334\u001b[0m \u001b[43m            \u001b[49m\u001b[43mk\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    335\u001b[0m \u001b[43m            \u001b[49m\u001b[43mwhich\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mSM\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m    336\u001b[0m \u001b[43m            \u001b[49m\u001b[43mncv\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mnum_lanczos_vectors\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    337\u001b[0m \u001b[43m            \u001b[49m\u001b[43mtol\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m1e-4\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m    338\u001b[0m \u001b[43m            \u001b[49m\u001b[43mv0\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mnp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mones\u001b[49m\u001b[43m(\u001b[49m\u001b[43mL\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mshape\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    339\u001b[0m \u001b[43m            \u001b[49m\u001b[43mmaxiter\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgraph\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mshape\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m5\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m    340\u001b[0m \u001b[43m        \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    341\u001b[0m     \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m    342\u001b[0m         eigenvalues, eigenvectors \u001b[38;5;241m=\u001b[39m scipy\u001b[38;5;241m.\u001b[39msparse\u001b[38;5;241m.\u001b[39mlinalg\u001b[38;5;241m.\u001b[39mlobpcg(\n\u001b[1;32m    343\u001b[0m             L, random_state\u001b[38;5;241m.\u001b[39mnormal(size\u001b[38;5;241m=\u001b[39m(L\u001b[38;5;241m.\u001b[39mshape[\u001b[38;5;241m0\u001b[39m], k)), largest\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m, tol\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1e-8\u001b[39m\n\u001b[1;32m    344\u001b[0m         )\n",
 | 
						|
      "File \u001b[0;32m/opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/site-packages/scipy/sparse/linalg/_eigen/arpack/arpack.py:1605\u001b[0m, in \u001b[0;36meigsh\u001b[0;34m(A, k, M, sigma, which, v0, ncv, maxiter, tol, return_eigenvectors, Minv, OPinv, mode)\u001b[0m\n\u001b[1;32m   1600\u001b[0m warnings\u001b[38;5;241m.\u001b[39mwarn(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mk >= N for N * N square matrix. \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   1601\u001b[0m               \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAttempting to use scipy.linalg.eigh instead.\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m   1602\u001b[0m               \u001b[38;5;167;01mRuntimeWarning\u001b[39;00m)\n\u001b[1;32m   1604\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m issparse(A):\n\u001b[0;32m-> 1605\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCannot use scipy.linalg.eigh for sparse A with \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   1606\u001b[0m                     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mk >= N. Use scipy.linalg.eigh(A.toarray()) or\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   1607\u001b[0m                     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m reduce k.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m   1608\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(A, LinearOperator):\n\u001b[1;32m   1609\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCannot use scipy.linalg.eigh for LinearOperator \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   1610\u001b[0m                     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mA with k >= N.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
 | 
						|
      "\u001b[0;31mTypeError\u001b[0m: Cannot use scipy.linalg.eigh for sparse A with k >= N. Use scipy.linalg.eigh(A.toarray()) or reduce k."
 | 
						|
     ]
 | 
						|
    }
 | 
						|
   ],
 | 
						|
   "source": [
 | 
						|
    "input_file_path = \"data_out.csv\"\n",
 | 
						|
    "topic_model, topic_df, most_frequent_topics = ammico.text.PostprocessText(\n",
 | 
						|
    "    use_csv=True, csv_path=input_file_path\n",
 | 
						|
    ").analyse_topic(return_topics=10)"
 | 
						|
   ]
 | 
						|
  },
 | 
						|
  {
 | 
						|
   "attachments": {},
 | 
						|
   "cell_type": "markdown",
 | 
						|
   "id": "0b6ef6d7",
 | 
						|
   "metadata": {},
 | 
						|
   "source": [
 | 
						|
    "### Access frequent topics\n",
 | 
						|
    "A topic of `-1` stands for an outlier and should be ignored. Topic count is the number of occurence of that topic. The output is structured from most frequent to least frequent topic."
 | 
						|
   ]
 | 
						|
  },
 | 
						|
  {
 | 
						|
   "cell_type": "code",
 | 
						|
   "execution_count": 12,
 | 
						|
   "id": "43288cda-61bb-4ff1-a209-dcfcc4916b1f",
 | 
						|
   "metadata": {
 | 
						|
    "execution": {
 | 
						|
     "iopub.execute_input": "2023-06-23T12:24:46.516495Z",
 | 
						|
     "iopub.status.busy": "2023-06-23T12:24:46.516261Z",
 | 
						|
     "iopub.status.idle": "2023-06-23T12:24:46.550184Z",
 | 
						|
     "shell.execute_reply": "2023-06-23T12:24:46.549551Z"
 | 
						|
    }
 | 
						|
   },
 | 
						|
   "outputs": [
 | 
						|
    {
 | 
						|
     "ename": "NameError",
 | 
						|
     "evalue": "name 'topic_df' is not defined",
 | 
						|
     "output_type": "error",
 | 
						|
     "traceback": [
 | 
						|
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
 | 
						|
      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
 | 
						|
      "Cell \u001b[0;32mIn[12], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[43mtopic_df\u001b[49m)\n",
 | 
						|
      "\u001b[0;31mNameError\u001b[0m: name 'topic_df' is not defined"
 | 
						|
     ]
 | 
						|
    }
 | 
						|
   ],
 | 
						|
   "source": [
 | 
						|
    "print(topic_df)"
 | 
						|
   ]
 | 
						|
  },
 | 
						|
  {
 | 
						|
   "attachments": {},
 | 
						|
   "cell_type": "markdown",
 | 
						|
   "id": "b3316770",
 | 
						|
   "metadata": {},
 | 
						|
   "source": [
 | 
						|
    "### Get information for specific topic\n",
 | 
						|
    "The most frequent topics can be accessed through `most_frequent_topics` with the most occuring topics first in the list."
 | 
						|
   ]
 | 
						|
  },
 | 
						|
  {
 | 
						|
   "cell_type": "code",
 | 
						|
   "execution_count": 13,
 | 
						|
   "id": "db14fe03",
 | 
						|
   "metadata": {
 | 
						|
    "execution": {
 | 
						|
     "iopub.execute_input": "2023-06-23T12:24:46.554159Z",
 | 
						|
     "iopub.status.busy": "2023-06-23T12:24:46.552888Z",
 | 
						|
     "iopub.status.idle": "2023-06-23T12:24:46.584494Z",
 | 
						|
     "shell.execute_reply": "2023-06-23T12:24:46.583894Z"
 | 
						|
    }
 | 
						|
   },
 | 
						|
   "outputs": [
 | 
						|
    {
 | 
						|
     "ename": "NameError",
 | 
						|
     "evalue": "name 'most_frequent_topics' is not defined",
 | 
						|
     "output_type": "error",
 | 
						|
     "traceback": [
 | 
						|
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
 | 
						|
      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
 | 
						|
      "Cell \u001b[0;32mIn[13], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m topic \u001b[38;5;129;01min\u001b[39;00m \u001b[43mmost_frequent_topics\u001b[49m:\n\u001b[1;32m      2\u001b[0m     \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mTopic:\u001b[39m\u001b[38;5;124m\"\u001b[39m, topic)\n",
 | 
						|
      "\u001b[0;31mNameError\u001b[0m: name 'most_frequent_topics' is not defined"
 | 
						|
     ]
 | 
						|
    }
 | 
						|
   ],
 | 
						|
   "source": [
 | 
						|
    "for topic in most_frequent_topics:\n",
 | 
						|
    "    print(\"Topic:\", topic)"
 | 
						|
   ]
 | 
						|
  },
 | 
						|
  {
 | 
						|
   "attachments": {},
 | 
						|
   "cell_type": "markdown",
 | 
						|
   "id": "d10f701e",
 | 
						|
   "metadata": {},
 | 
						|
   "source": [
 | 
						|
    "### Topic visualization\n",
 | 
						|
    "The topics can also be visualized. Careful: This only works if there is sufficient data (quantity and quality)."
 | 
						|
   ]
 | 
						|
  },
 | 
						|
  {
 | 
						|
   "cell_type": "code",
 | 
						|
   "execution_count": 14,
 | 
						|
   "id": "2331afe6",
 | 
						|
   "metadata": {
 | 
						|
    "execution": {
 | 
						|
     "iopub.execute_input": "2023-06-23T12:24:46.587622Z",
 | 
						|
     "iopub.status.busy": "2023-06-23T12:24:46.587263Z",
 | 
						|
     "iopub.status.idle": "2023-06-23T12:24:46.616640Z",
 | 
						|
     "shell.execute_reply": "2023-06-23T12:24:46.616059Z"
 | 
						|
    }
 | 
						|
   },
 | 
						|
   "outputs": [
 | 
						|
    {
 | 
						|
     "ename": "NameError",
 | 
						|
     "evalue": "name 'topic_model' is not defined",
 | 
						|
     "output_type": "error",
 | 
						|
     "traceback": [
 | 
						|
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
 | 
						|
      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
 | 
						|
      "Cell \u001b[0;32mIn[14], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mtopic_model\u001b[49m\u001b[38;5;241m.\u001b[39mvisualize_topics()\n",
 | 
						|
      "\u001b[0;31mNameError\u001b[0m: name 'topic_model' is not defined"
 | 
						|
     ]
 | 
						|
    }
 | 
						|
   ],
 | 
						|
   "source": [
 | 
						|
    "topic_model.visualize_topics()"
 | 
						|
   ]
 | 
						|
  },
 | 
						|
  {
 | 
						|
   "attachments": {},
 | 
						|
   "cell_type": "markdown",
 | 
						|
   "id": "f4eaf353",
 | 
						|
   "metadata": {},
 | 
						|
   "source": [
 | 
						|
    "### Save the model\n",
 | 
						|
    "The model can be saved for future use."
 | 
						|
   ]
 | 
						|
  },
 | 
						|
  {
 | 
						|
   "cell_type": "code",
 | 
						|
   "execution_count": 15,
 | 
						|
   "id": "e5e8377c",
 | 
						|
   "metadata": {
 | 
						|
    "execution": {
 | 
						|
     "iopub.execute_input": "2023-06-23T12:24:46.619565Z",
 | 
						|
     "iopub.status.busy": "2023-06-23T12:24:46.619271Z",
 | 
						|
     "iopub.status.idle": "2023-06-23T12:24:46.648645Z",
 | 
						|
     "shell.execute_reply": "2023-06-23T12:24:46.648043Z"
 | 
						|
    }
 | 
						|
   },
 | 
						|
   "outputs": [
 | 
						|
    {
 | 
						|
     "ename": "NameError",
 | 
						|
     "evalue": "name 'topic_model' is not defined",
 | 
						|
     "output_type": "error",
 | 
						|
     "traceback": [
 | 
						|
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
 | 
						|
      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
 | 
						|
      "Cell \u001b[0;32mIn[15], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mtopic_model\u001b[49m\u001b[38;5;241m.\u001b[39msave(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmisinfo_posts\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
 | 
						|
      "\u001b[0;31mNameError\u001b[0m: name 'topic_model' is not defined"
 | 
						|
     ]
 | 
						|
    }
 | 
						|
   ],
 | 
						|
   "source": [
 | 
						|
    "topic_model.save(\"misinfo_posts\")"
 | 
						|
   ]
 | 
						|
  },
 | 
						|
  {
 | 
						|
   "cell_type": "code",
 | 
						|
   "execution_count": null,
 | 
						|
   "id": "7c94edb9",
 | 
						|
   "metadata": {},
 | 
						|
   "outputs": [],
 | 
						|
   "source": []
 | 
						|
  }
 | 
						|
 ],
 | 
						|
 "metadata": {
 | 
						|
  "kernelspec": {
 | 
						|
   "display_name": "Python 3 (ipykernel)",
 | 
						|
   "language": "python",
 | 
						|
   "name": "python3"
 | 
						|
  },
 | 
						|
  "language_info": {
 | 
						|
   "codemirror_mode": {
 | 
						|
    "name": "ipython",
 | 
						|
    "version": 3
 | 
						|
   },
 | 
						|
   "file_extension": ".py",
 | 
						|
   "mimetype": "text/x-python",
 | 
						|
   "name": "python",
 | 
						|
   "nbconvert_exporter": "python",
 | 
						|
   "pygments_lexer": "ipython3",
 | 
						|
   "version": "3.9.17"
 | 
						|
  },
 | 
						|
  "vscode": {
 | 
						|
   "interpreter": {
 | 
						|
    "hash": "da98320027a74839c7141b42ef24e2d47d628ba1f51115c13da5d8b45a372ec2"
 | 
						|
   }
 | 
						|
  }
 | 
						|
 },
 | 
						|
 "nbformat": 4,
 | 
						|
 "nbformat_minor": 5
 | 
						|
}
 |