зеркало из
				https://github.com/ssciwr/AMMICO.git
				synced 2025-10-30 21:46:04 +02:00 
			
		
		
		
	
		
			
				
	
	
		
			326 строки
		
	
	
		
			11 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
			
		
		
	
	
			326 строки
		
	
	
		
			11 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
| {
 | |
|  "cells": [
 | |
|   {
 | |
|    "cell_type": "markdown",
 | |
|    "metadata": {},
 | |
|    "source": [
 | |
|     "# Color analysis of pictures\n",
 | |
|     "\n",
 | |
|     "\n",
 | |
|     "\n",
 | |
|     "This notebook shows primary color analysis of color image using K-Means algorithm.\n",
 | |
|     "The output are N primary colors and their corresponding percentage.\n",
 | |
|     "\n",
 | |
|     "The first cell is only run on google colab and installs the [ammico](https://github.com/ssciwr/AMMICO) package.\n",
 | |
|     "\n",
 | |
|     "After that, we can import `ammico` and read in the files given a folder path."
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 1,
 | |
|    "metadata": {
 | |
|     "execution": {
 | |
|      "iopub.execute_input": "2023-06-22T12:06:51.157185Z",
 | |
|      "iopub.status.busy": "2023-06-22T12:06:51.156533Z",
 | |
|      "iopub.status.idle": "2023-06-22T12:06:51.165679Z",
 | |
|      "shell.execute_reply": "2023-06-22T12:06:51.165046Z"
 | |
|     }
 | |
|    },
 | |
|    "outputs": [],
 | |
|    "source": [
 | |
|     "# if running on google colab\n",
 | |
|     "# flake8-noqa-cell\n",
 | |
|     "import os\n",
 | |
|     "\n",
 | |
|     "if \"google.colab\" in str(get_ipython()):\n",
 | |
|     "    # update python version\n",
 | |
|     "    # install setuptools\n",
 | |
|     "    # %pip install setuptools==61 -qqq\n",
 | |
|     "    # install ammico\n",
 | |
|     "    %pip install git+https://github.com/ssciwr/ammico.git -qqq\n",
 | |
|     "    # mount google drive for data and API key\n",
 | |
|     "    from google.colab import drive\n",
 | |
|     "\n",
 | |
|     "    drive.mount(\"/content/drive\")"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 2,
 | |
|    "metadata": {
 | |
|     "execution": {
 | |
|      "iopub.execute_input": "2023-06-22T12:06:51.168990Z",
 | |
|      "iopub.status.busy": "2023-06-22T12:06:51.168555Z",
 | |
|      "iopub.status.idle": "2023-06-22T12:07:02.844083Z",
 | |
|      "shell.execute_reply": "2023-06-22T12:07:02.843367Z"
 | |
|     }
 | |
|    },
 | |
|    "outputs": [],
 | |
|    "source": [
 | |
|     "import ammico\n",
 | |
|     "from ammico import utils as mutils\n",
 | |
|     "from ammico import display as mdisplay\n"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "markdown",
 | |
|    "metadata": {},
 | |
|    "source": [
 | |
|     "We select a subset of image files to try the color analysis on, see the `limit` keyword. The `find_files` function finds image files within a given directory:"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 3,
 | |
|    "metadata": {
 | |
|     "execution": {
 | |
|      "iopub.execute_input": "2023-06-22T12:07:02.848332Z",
 | |
|      "iopub.status.busy": "2023-06-22T12:07:02.847598Z",
 | |
|      "iopub.status.idle": "2023-06-22T12:07:02.851965Z",
 | |
|      "shell.execute_reply": "2023-06-22T12:07:02.851262Z"
 | |
|     }
 | |
|    },
 | |
|    "outputs": [],
 | |
|    "source": [
 | |
|     "# Here you need to provide the path to your google drive folder\n",
 | |
|     "# or local folder containing the images\n",
 | |
|     "images = mutils.find_files(\n",
 | |
|     "    path=\"/content/drive/MyDrive/misinformation-data/\",\n",
 | |
|     "    limit=10,\n",
 | |
|     ")\n"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "markdown",
 | |
|    "metadata": {},
 | |
|    "source": [
 | |
|     "We need to initialize the main dictionary that contains all information for the images and is updated through each subsequent analysis:"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 4,
 | |
|    "metadata": {
 | |
|     "execution": {
 | |
|      "iopub.execute_input": "2023-06-22T12:07:02.855341Z",
 | |
|      "iopub.status.busy": "2023-06-22T12:07:02.855106Z",
 | |
|      "iopub.status.idle": "2023-06-22T12:07:02.858208Z",
 | |
|      "shell.execute_reply": "2023-06-22T12:07:02.857509Z"
 | |
|     }
 | |
|    },
 | |
|    "outputs": [],
 | |
|    "source": [
 | |
|     "mydict = mutils.initialize_dict(images)"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "markdown",
 | |
|    "metadata": {},
 | |
|    "source": [
 | |
|     "To check the analysis, you can inspect the analyzed elements here. Loading the results takes a moment, so please be patient. If you are sure of what you are doing, you can skip this and directly export a csv file in the step below.\n",
 | |
|     "Here, we display the color detection results provided by `colorgram` and `colour` libraries. Click on the tabs to see the results in the right sidebar. You may need to increment the `port` number if you are already running several notebook instances on the same server."
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 5,
 | |
|    "metadata": {
 | |
|     "execution": {
 | |
|      "iopub.execute_input": "2023-06-22T12:07:02.861441Z",
 | |
|      "iopub.status.busy": "2023-06-22T12:07:02.861207Z",
 | |
|      "iopub.status.idle": "2023-06-22T12:07:02.894620Z",
 | |
|      "shell.execute_reply": "2023-06-22T12:07:02.893951Z"
 | |
|     }
 | |
|    },
 | |
|    "outputs": [
 | |
|     {
 | |
|      "name": "stdout",
 | |
|      "output_type": "stream",
 | |
|      "text": [
 | |
|       "Dash is running on http://127.0.0.1:8057/\n",
 | |
|       "\n"
 | |
|      ]
 | |
|     },
 | |
|     {
 | |
|      "data": {
 | |
|       "text/html": [
 | |
|        "\n",
 | |
|        "        <iframe\n",
 | |
|        "            width=\"100%\"\n",
 | |
|        "            height=\"650\"\n",
 | |
|        "            src=\"http://127.0.0.1:8057/\"\n",
 | |
|        "            frameborder=\"0\"\n",
 | |
|        "            allowfullscreen\n",
 | |
|        "            \n",
 | |
|        "        ></iframe>\n",
 | |
|        "        "
 | |
|       ],
 | |
|       "text/plain": [
 | |
|        "<IPython.lib.display.IFrame at 0x7f905c50efd0>"
 | |
|       ]
 | |
|      },
 | |
|      "metadata": {},
 | |
|      "output_type": "display_data"
 | |
|     }
 | |
|    ],
 | |
|    "source": [
 | |
|     "analysis_explorer = mdisplay.AnalysisExplorer(mydict, identify=\"colors\")\n",
 | |
|     "analysis_explorer.run_server(port = 8057)"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "markdown",
 | |
|    "metadata": {},
 | |
|    "source": [
 | |
|     "Instead of inspecting each of the images, you can also directly carry out the analysis and export the result into a csv. This may take a while depending on how many images you have loaded."
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 6,
 | |
|    "metadata": {
 | |
|     "execution": {
 | |
|      "iopub.execute_input": "2023-06-22T12:07:03.390903Z",
 | |
|      "iopub.status.busy": "2023-06-22T12:07:03.390288Z",
 | |
|      "iopub.status.idle": "2023-06-22T12:07:03.394359Z",
 | |
|      "shell.execute_reply": "2023-06-22T12:07:03.393589Z"
 | |
|     }
 | |
|    },
 | |
|    "outputs": [],
 | |
|    "source": [
 | |
|     "for key in mydict.keys():\n",
 | |
|     "    mydict[key] = ammico.colors.ColorDetector(mydict[key]).analyse_image()"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "markdown",
 | |
|    "metadata": {},
 | |
|    "source": [
 | |
|     "These steps are required to convert the dictionary of dictionarys into a dictionary with lists, that can be converted into a pandas dataframe and exported to a csv file."
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 7,
 | |
|    "metadata": {
 | |
|     "execution": {
 | |
|      "iopub.execute_input": "2023-06-22T12:07:03.398008Z",
 | |
|      "iopub.status.busy": "2023-06-22T12:07:03.397345Z",
 | |
|      "iopub.status.idle": "2023-06-22T12:07:04.583139Z",
 | |
|      "shell.execute_reply": "2023-06-22T12:07:04.582297Z"
 | |
|     }
 | |
|    },
 | |
|    "outputs": [
 | |
|     {
 | |
|      "ename": "IndexError",
 | |
|      "evalue": "list index out of range",
 | |
|      "output_type": "error",
 | |
|      "traceback": [
 | |
|       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
 | |
|       "\u001b[0;31mIndexError\u001b[0m                                Traceback (most recent call last)",
 | |
|       "Cell \u001b[0;32mIn[7], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m outdict \u001b[38;5;241m=\u001b[39m \u001b[43mmutils\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mappend_data_to_dict\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmydict\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m      2\u001b[0m df \u001b[38;5;241m=\u001b[39m mutils\u001b[38;5;241m.\u001b[39mdump_df(outdict)\n",
 | |
|       "File \u001b[0;32m~/work/AMMICO/AMMICO/ammico/utils.py:92\u001b[0m, in \u001b[0;36mappend_data_to_dict\u001b[0;34m(mydict)\u001b[0m\n\u001b[1;32m     89\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Append entries from nested dictionaries to keys in a global dict.\"\"\"\u001b[39;00m\n\u001b[1;32m     91\u001b[0m \u001b[38;5;66;03m# first initialize empty list for each key that is present\u001b[39;00m\n\u001b[0;32m---> 92\u001b[0m outdict \u001b[38;5;241m=\u001b[39m {key: [] \u001b[38;5;28;01mfor\u001b[39;00m key \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28;43mlist\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mmydict\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvalues\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;241m.\u001b[39mkeys()}\n\u001b[1;32m     93\u001b[0m \u001b[38;5;66;03m# now append the values to each key in a list\u001b[39;00m\n\u001b[1;32m     94\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m subdict \u001b[38;5;129;01min\u001b[39;00m mydict\u001b[38;5;241m.\u001b[39mvalues():\n",
 | |
|       "\u001b[0;31mIndexError\u001b[0m: list index out of range"
 | |
|      ]
 | |
|     }
 | |
|    ],
 | |
|    "source": [
 | |
|     "outdict = mutils.append_data_to_dict(mydict)\n",
 | |
|     "df = mutils.dump_df(outdict)"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "markdown",
 | |
|    "metadata": {},
 | |
|    "source": [
 | |
|     "Check the dataframe:"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 8,
 | |
|    "metadata": {
 | |
|     "execution": {
 | |
|      "iopub.execute_input": "2023-06-22T12:07:04.586954Z",
 | |
|      "iopub.status.busy": "2023-06-22T12:07:04.586692Z",
 | |
|      "iopub.status.idle": "2023-06-22T12:07:04.625945Z",
 | |
|      "shell.execute_reply": "2023-06-22T12:07:04.625122Z"
 | |
|     }
 | |
|    },
 | |
|    "outputs": [
 | |
|     {
 | |
|      "ename": "NameError",
 | |
|      "evalue": "name 'df' is not defined",
 | |
|      "output_type": "error",
 | |
|      "traceback": [
 | |
|       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
 | |
|       "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
 | |
|       "Cell \u001b[0;32mIn[8], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mdf\u001b[49m\u001b[38;5;241m.\u001b[39mhead(\u001b[38;5;241m10\u001b[39m)\n",
 | |
|       "\u001b[0;31mNameError\u001b[0m: name 'df' is not defined"
 | |
|      ]
 | |
|     }
 | |
|    ],
 | |
|    "source": [
 | |
|     "df.head(10)"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "markdown",
 | |
|    "metadata": {},
 | |
|    "source": [
 | |
|     "Write the csv file - here you should provide a file path and file name for the csv file to be written."
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 9,
 | |
|    "metadata": {
 | |
|     "execution": {
 | |
|      "iopub.execute_input": "2023-06-22T12:07:04.629298Z",
 | |
|      "iopub.status.busy": "2023-06-22T12:07:04.628858Z",
 | |
|      "iopub.status.idle": "2023-06-22T12:07:04.668480Z",
 | |
|      "shell.execute_reply": "2023-06-22T12:07:04.667619Z"
 | |
|     }
 | |
|    },
 | |
|    "outputs": [
 | |
|     {
 | |
|      "ename": "NameError",
 | |
|      "evalue": "name 'df' is not defined",
 | |
|      "output_type": "error",
 | |
|      "traceback": [
 | |
|       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
 | |
|       "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
 | |
|       "Cell \u001b[0;32mIn[9], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mdf\u001b[49m\u001b[38;5;241m.\u001b[39mto_csv(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m/content/drive/MyDrive/misinformation-data/data_out.csv\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
 | |
|       "\u001b[0;31mNameError\u001b[0m: name 'df' is not defined"
 | |
|      ]
 | |
|     }
 | |
|    ],
 | |
|    "source": [
 | |
|     "df.to_csv(\"/content/drive/MyDrive/misinformation-data/data_out.csv\")"
 | |
|    ]
 | |
|   }
 | |
|  ],
 | |
|  "metadata": {
 | |
|   "kernelspec": {
 | |
|    "display_name": "Python 3 (ipykernel)",
 | |
|    "language": "python",
 | |
|    "name": "python3"
 | |
|   },
 | |
|   "language_info": {
 | |
|    "codemirror_mode": {
 | |
|     "name": "ipython",
 | |
|     "version": 3
 | |
|    },
 | |
|    "file_extension": ".py",
 | |
|    "mimetype": "text/x-python",
 | |
|    "name": "python",
 | |
|    "nbconvert_exporter": "python",
 | |
|    "pygments_lexer": "ipython3",
 | |
|    "version": "3.9.17"
 | |
|   }
 | |
|  },
 | |
|  "nbformat": 4,
 | |
|  "nbformat_minor": 2
 | |
| }
 | 
