зеркало из
https://github.com/ssciwr/AMMICO.git
synced 2025-10-30 21:46:04 +02:00
344 строки
14 KiB
Plaintext
344 строки
14 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Color analysis of pictures\n",
|
|
"\n",
|
|
"\n",
|
|
"\n",
|
|
"This notebook shows primary color analysis of color image using K-Means algorithm.\n",
|
|
"The output are N primary colors and their corresponding percentage.\n",
|
|
"\n",
|
|
"The first cell is only run on google colab and installs the [ammico](https://github.com/ssciwr/AMMICO) package.\n",
|
|
"\n",
|
|
"After that, we can import `ammico` and read in the files given a folder path."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"metadata": {
|
|
"execution": {
|
|
"iopub.execute_input": "2023-07-07T14:23:23.992173Z",
|
|
"iopub.status.busy": "2023-07-07T14:23:23.991486Z",
|
|
"iopub.status.idle": "2023-07-07T14:23:24.000880Z",
|
|
"shell.execute_reply": "2023-07-07T14:23:24.000215Z"
|
|
}
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# if running on google colab\n",
|
|
"# flake8-noqa-cell\n",
|
|
"import os\n",
|
|
"\n",
|
|
"if \"google.colab\" in str(get_ipython()):\n",
|
|
" # update python version\n",
|
|
" # install setuptools\n",
|
|
" # %pip install setuptools==61 -qqq\n",
|
|
" # install ammico\n",
|
|
" %pip install git+https://github.com/ssciwr/ammico.git -qqq\n",
|
|
" # mount google drive for data and API key\n",
|
|
" from google.colab import drive\n",
|
|
"\n",
|
|
" drive.mount(\"/content/drive\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"metadata": {
|
|
"execution": {
|
|
"iopub.execute_input": "2023-07-07T14:23:24.003980Z",
|
|
"iopub.status.busy": "2023-07-07T14:23:24.003538Z",
|
|
"iopub.status.idle": "2023-07-07T14:23:35.647973Z",
|
|
"shell.execute_reply": "2023-07-07T14:23:35.647255Z"
|
|
}
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"import ammico\n",
|
|
"from ammico import utils as mutils\n",
|
|
"from ammico import display as mdisplay\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"We select a subset of image files to try the color analysis on, see the `limit` keyword. The `find_files` function finds image files within a given directory:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"metadata": {
|
|
"execution": {
|
|
"iopub.execute_input": "2023-07-07T14:23:35.652621Z",
|
|
"iopub.status.busy": "2023-07-07T14:23:35.651752Z",
|
|
"iopub.status.idle": "2023-07-07T14:23:36.880889Z",
|
|
"shell.execute_reply": "2023-07-07T14:23:36.880043Z"
|
|
}
|
|
},
|
|
"outputs": [
|
|
{
|
|
"ename": "FileNotFoundError",
|
|
"evalue": "No files found in /content/drive/MyDrive/misinformation-data/ with pattern '['png', 'jpg', 'jpeg', 'gif', 'webp', 'avif', 'tiff']'",
|
|
"output_type": "error",
|
|
"traceback": [
|
|
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
|
"\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)",
|
|
"Cell \u001b[0;32mIn[3], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# Here you need to provide the path to your google drive folder\u001b[39;00m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;66;03m# or local folder containing the images\u001b[39;00m\n\u001b[0;32m----> 3\u001b[0m images \u001b[38;5;241m=\u001b[39m \u001b[43mmutils\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfind_files\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 4\u001b[0m \u001b[43m \u001b[49m\u001b[43mpath\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m/content/drive/MyDrive/misinformation-data/\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 5\u001b[0m \u001b[43m \u001b[49m\u001b[43mlimit\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m10\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 6\u001b[0m \u001b[43m)\u001b[49m\n",
|
|
"File \u001b[0;32m~/work/AMMICO/AMMICO/ammico/utils.py:134\u001b[0m, in \u001b[0;36mfind_files\u001b[0;34m(path, pattern, recursive, limit, random_seed)\u001b[0m\n\u001b[1;32m 131\u001b[0m results\u001b[38;5;241m.\u001b[39mextend(_match_pattern(path, p, recursive\u001b[38;5;241m=\u001b[39mrecursive))\n\u001b[1;32m 133\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(results) \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[0;32m--> 134\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mFileNotFoundError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNo files found in \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpath\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m with pattern \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpattern\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 136\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m random_seed \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 137\u001b[0m random\u001b[38;5;241m.\u001b[39mseed(random_seed)\n",
|
|
"\u001b[0;31mFileNotFoundError\u001b[0m: No files found in /content/drive/MyDrive/misinformation-data/ with pattern '['png', 'jpg', 'jpeg', 'gif', 'webp', 'avif', 'tiff']'"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# Here you need to provide the path to your google drive folder\n",
|
|
"# or local folder containing the images\n",
|
|
"images = mutils.find_files(\n",
|
|
" path=\"/content/drive/MyDrive/misinformation-data/\",\n",
|
|
" limit=10,\n",
|
|
")\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"We need to initialize the main dictionary that contains all information for the images and is updated through each subsequent analysis:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"metadata": {
|
|
"execution": {
|
|
"iopub.execute_input": "2023-07-07T14:23:36.928605Z",
|
|
"iopub.status.busy": "2023-07-07T14:23:36.927981Z",
|
|
"iopub.status.idle": "2023-07-07T14:23:36.964411Z",
|
|
"shell.execute_reply": "2023-07-07T14:23:36.963639Z"
|
|
}
|
|
},
|
|
"outputs": [
|
|
{
|
|
"ename": "NameError",
|
|
"evalue": "name 'images' is not defined",
|
|
"output_type": "error",
|
|
"traceback": [
|
|
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
|
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
|
|
"Cell \u001b[0;32mIn[4], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m mydict \u001b[38;5;241m=\u001b[39m mutils\u001b[38;5;241m.\u001b[39minitialize_dict(\u001b[43mimages\u001b[49m)\n",
|
|
"\u001b[0;31mNameError\u001b[0m: name 'images' is not defined"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"mydict = mutils.initialize_dict(images)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"To check the analysis, you can inspect the analyzed elements here. Loading the results takes a moment, so please be patient. If you are sure of what you are doing, you can skip this and directly export a csv file in the step below.\n",
|
|
"Here, we display the color detection results provided by `colorgram` and `colour` libraries. Click on the tabs to see the results in the right sidebar. You may need to increment the `port` number if you are already running several notebook instances on the same server."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"metadata": {
|
|
"execution": {
|
|
"iopub.execute_input": "2023-07-07T14:23:36.967620Z",
|
|
"iopub.status.busy": "2023-07-07T14:23:36.967379Z",
|
|
"iopub.status.idle": "2023-07-07T14:23:36.997170Z",
|
|
"shell.execute_reply": "2023-07-07T14:23:36.996423Z"
|
|
}
|
|
},
|
|
"outputs": [
|
|
{
|
|
"ename": "NameError",
|
|
"evalue": "name 'mydict' is not defined",
|
|
"output_type": "error",
|
|
"traceback": [
|
|
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
|
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
|
|
"Cell \u001b[0;32mIn[5], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m analysis_explorer \u001b[38;5;241m=\u001b[39m mdisplay\u001b[38;5;241m.\u001b[39mAnalysisExplorer(\u001b[43mmydict\u001b[49m, identify\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcolors\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 2\u001b[0m analysis_explorer\u001b[38;5;241m.\u001b[39mrun_server(port \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m8057\u001b[39m)\n",
|
|
"\u001b[0;31mNameError\u001b[0m: name 'mydict' is not defined"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"analysis_explorer = mdisplay.AnalysisExplorer(mydict, identify=\"colors\")\n",
|
|
"analysis_explorer.run_server(port = 8057)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Instead of inspecting each of the images, you can also directly carry out the analysis and export the result into a csv. This may take a while depending on how many images you have loaded."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 6,
|
|
"metadata": {
|
|
"execution": {
|
|
"iopub.execute_input": "2023-07-07T14:23:37.000244Z",
|
|
"iopub.status.busy": "2023-07-07T14:23:37.000010Z",
|
|
"iopub.status.idle": "2023-07-07T14:23:37.029903Z",
|
|
"shell.execute_reply": "2023-07-07T14:23:37.029098Z"
|
|
}
|
|
},
|
|
"outputs": [
|
|
{
|
|
"ename": "NameError",
|
|
"evalue": "name 'mydict' is not defined",
|
|
"output_type": "error",
|
|
"traceback": [
|
|
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
|
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
|
|
"Cell \u001b[0;32mIn[6], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m key \u001b[38;5;129;01min\u001b[39;00m \u001b[43mmydict\u001b[49m\u001b[38;5;241m.\u001b[39mkeys():\n\u001b[1;32m 2\u001b[0m mydict[key] \u001b[38;5;241m=\u001b[39m ammico\u001b[38;5;241m.\u001b[39mcolors\u001b[38;5;241m.\u001b[39mColorDetector(mydict[key])\u001b[38;5;241m.\u001b[39manalyse_image()\n",
|
|
"\u001b[0;31mNameError\u001b[0m: name 'mydict' is not defined"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"for key in mydict.keys():\n",
|
|
" mydict[key] = ammico.colors.ColorDetector(mydict[key]).analyse_image()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"These steps are required to convert the dictionary of dictionarys into a dictionary with lists, that can be converted into a pandas dataframe and exported to a csv file."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 7,
|
|
"metadata": {
|
|
"execution": {
|
|
"iopub.execute_input": "2023-07-07T14:23:37.033246Z",
|
|
"iopub.status.busy": "2023-07-07T14:23:37.032797Z",
|
|
"iopub.status.idle": "2023-07-07T14:23:37.063411Z",
|
|
"shell.execute_reply": "2023-07-07T14:23:37.062776Z"
|
|
}
|
|
},
|
|
"outputs": [
|
|
{
|
|
"ename": "NameError",
|
|
"evalue": "name 'mydict' is not defined",
|
|
"output_type": "error",
|
|
"traceback": [
|
|
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
|
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
|
|
"Cell \u001b[0;32mIn[7], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m outdict \u001b[38;5;241m=\u001b[39m mutils\u001b[38;5;241m.\u001b[39mappend_data_to_dict(\u001b[43mmydict\u001b[49m)\n\u001b[1;32m 2\u001b[0m df \u001b[38;5;241m=\u001b[39m mutils\u001b[38;5;241m.\u001b[39mdump_df(outdict)\n",
|
|
"\u001b[0;31mNameError\u001b[0m: name 'mydict' is not defined"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"outdict = mutils.append_data_to_dict(mydict)\n",
|
|
"df = mutils.dump_df(outdict)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Check the dataframe:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 8,
|
|
"metadata": {
|
|
"execution": {
|
|
"iopub.execute_input": "2023-07-07T14:23:37.067081Z",
|
|
"iopub.status.busy": "2023-07-07T14:23:37.066356Z",
|
|
"iopub.status.idle": "2023-07-07T14:23:37.100735Z",
|
|
"shell.execute_reply": "2023-07-07T14:23:37.100073Z"
|
|
}
|
|
},
|
|
"outputs": [
|
|
{
|
|
"ename": "NameError",
|
|
"evalue": "name 'df' is not defined",
|
|
"output_type": "error",
|
|
"traceback": [
|
|
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
|
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
|
|
"Cell \u001b[0;32mIn[8], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mdf\u001b[49m\u001b[38;5;241m.\u001b[39mhead(\u001b[38;5;241m10\u001b[39m)\n",
|
|
"\u001b[0;31mNameError\u001b[0m: name 'df' is not defined"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"df.head(10)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Write the csv file - here you should provide a file path and file name for the csv file to be written."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 9,
|
|
"metadata": {
|
|
"execution": {
|
|
"iopub.execute_input": "2023-07-07T14:23:37.104037Z",
|
|
"iopub.status.busy": "2023-07-07T14:23:37.103468Z",
|
|
"iopub.status.idle": "2023-07-07T14:23:37.138873Z",
|
|
"shell.execute_reply": "2023-07-07T14:23:37.138132Z"
|
|
}
|
|
},
|
|
"outputs": [
|
|
{
|
|
"ename": "NameError",
|
|
"evalue": "name 'df' is not defined",
|
|
"output_type": "error",
|
|
"traceback": [
|
|
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
|
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
|
|
"Cell \u001b[0;32mIn[9], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mdf\u001b[49m\u001b[38;5;241m.\u001b[39mto_csv(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m/content/drive/MyDrive/misinformation-data/data_out.csv\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
|
|
"\u001b[0;31mNameError\u001b[0m: name 'df' is not defined"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"df.to_csv(\"/content/drive/MyDrive/misinformation-data/data_out.csv\")"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3 (ipykernel)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.9.17"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 2
|
|
}
|