612 строки
		
	
	
		
			17 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
			
		
		
	
	
			612 строки
		
	
	
		
			17 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
| {
 | ||
|  "cells": [
 | ||
|   {
 | ||
|    "cell_type": "markdown",
 | ||
|    "metadata": {},
 | ||
|    "source": [
 | ||
|     "# Test area for AMITT code"
 | ||
|    ]
 | ||
|   },
 | ||
|   {
 | ||
|    "cell_type": "code",
 | ||
|    "execution_count": 2,
 | ||
|    "metadata": {},
 | ||
|    "outputs": [
 | ||
|     {
 | ||
|      "name": "stdout",
 | ||
|      "output_type": "stream",
 | ||
|      "text": [
 | ||
|       "dict_keys(['df_phases', 'df_frameworks', 'df_techniques', 'df_tasks', 'df_incidents', 'df_counters', 'df_detections', 'df_actortypes', 'df_resources', 'df_responsetypes', 'df_metatechniques', 'it', 'df_tactics', 'df_techniques_per_tactic', 'df_counters_per_tactic', 'phases', 'tactics', 'techniques', 'counters', 'metatechniques', 'actortypes', 'resources', 'num_tactics', 'cross_counterid_techniqueid', 'cross_counterid_resourceid', 'cross_counterid_actortypeid'])\n"
 | ||
|      ]
 | ||
|     },
 | ||
|     {
 | ||
|      "data": {
 | ||
|       "text/html": [
 | ||
|        "<div>\n",
 | ||
|        "<style scoped>\n",
 | ||
|        "    .dataframe tbody tr th:only-of-type {\n",
 | ||
|        "        vertical-align: middle;\n",
 | ||
|        "    }\n",
 | ||
|        "\n",
 | ||
|        "    .dataframe tbody tr th {\n",
 | ||
|        "        vertical-align: top;\n",
 | ||
|        "    }\n",
 | ||
|        "\n",
 | ||
|        "    .dataframe thead th {\n",
 | ||
|        "        text-align: right;\n",
 | ||
|        "    }\n",
 | ||
|        "</style>\n",
 | ||
|        "<table border=\"1\" class=\"dataframe\">\n",
 | ||
|        "  <thead>\n",
 | ||
|        "    <tr style=\"text-align: right;\">\n",
 | ||
|        "      <th></th>\n",
 | ||
|        "      <th>amitt_id</th>\n",
 | ||
|        "      <th>technique_id</th>\n",
 | ||
|        "    </tr>\n",
 | ||
|        "  </thead>\n",
 | ||
|        "  <tbody>\n",
 | ||
|        "    <tr>\n",
 | ||
|        "      <th>0</th>\n",
 | ||
|        "      <td>C00006</td>\n",
 | ||
|        "      <td>T0007</td>\n",
 | ||
|        "    </tr>\n",
 | ||
|        "    <tr>\n",
 | ||
|        "      <th>0</th>\n",
 | ||
|        "      <td>C00006</td>\n",
 | ||
|        "      <td>T0015</td>\n",
 | ||
|        "    </tr>\n",
 | ||
|        "    <tr>\n",
 | ||
|        "      <th>0</th>\n",
 | ||
|        "      <td>C00006</td>\n",
 | ||
|        "      <td>T0018</td>\n",
 | ||
|        "    </tr>\n",
 | ||
|        "    <tr>\n",
 | ||
|        "      <th>0</th>\n",
 | ||
|        "      <td>C00006</td>\n",
 | ||
|        "      <td>T0043</td>\n",
 | ||
|        "    </tr>\n",
 | ||
|        "    <tr>\n",
 | ||
|        "      <th>0</th>\n",
 | ||
|        "      <td>C00006</td>\n",
 | ||
|        "      <td>T0053</td>\n",
 | ||
|        "    </tr>\n",
 | ||
|        "    <tr>\n",
 | ||
|        "      <th>...</th>\n",
 | ||
|        "      <td>...</td>\n",
 | ||
|        "      <td>...</td>\n",
 | ||
|        "    </tr>\n",
 | ||
|        "    <tr>\n",
 | ||
|        "      <th>135</th>\n",
 | ||
|        "      <td>C00219</td>\n",
 | ||
|        "      <td>T0025</td>\n",
 | ||
|        "    </tr>\n",
 | ||
|        "    <tr>\n",
 | ||
|        "      <th>136</th>\n",
 | ||
|        "      <td>C00220</td>\n",
 | ||
|        "      <td></td>\n",
 | ||
|        "    </tr>\n",
 | ||
|        "    <tr>\n",
 | ||
|        "      <th>137</th>\n",
 | ||
|        "      <td>C00221</td>\n",
 | ||
|        "      <td></td>\n",
 | ||
|        "    </tr>\n",
 | ||
|        "    <tr>\n",
 | ||
|        "      <th>138</th>\n",
 | ||
|        "      <td>C00222</td>\n",
 | ||
|        "      <td></td>\n",
 | ||
|        "    </tr>\n",
 | ||
|        "    <tr>\n",
 | ||
|        "      <th>139</th>\n",
 | ||
|        "      <td>C00223</td>\n",
 | ||
|        "      <td></td>\n",
 | ||
|        "    </tr>\n",
 | ||
|        "  </tbody>\n",
 | ||
|        "</table>\n",
 | ||
|        "<p>898 rows × 2 columns</p>\n",
 | ||
|        "</div>"
 | ||
|       ],
 | ||
|       "text/plain": [
 | ||
|        "    amitt_id technique_id\n",
 | ||
|        "0     C00006        T0007\n",
 | ||
|        "0     C00006        T0015\n",
 | ||
|        "0     C00006        T0018\n",
 | ||
|        "0     C00006        T0043\n",
 | ||
|        "0     C00006        T0053\n",
 | ||
|        "..       ...          ...\n",
 | ||
|        "135   C00219        T0025\n",
 | ||
|        "136   C00220             \n",
 | ||
|        "137   C00221             \n",
 | ||
|        "138   C00222             \n",
 | ||
|        "139   C00223             \n",
 | ||
|        "\n",
 | ||
|        "[898 rows x 2 columns]"
 | ||
|       ]
 | ||
|      },
 | ||
|      "execution_count": 2,
 | ||
|      "metadata": {},
 | ||
|      "output_type": "execute_result"
 | ||
|     }
 | ||
|    ],
 | ||
|    "source": [
 | ||
|     "import pandas as pd\n",
 | ||
|     "import sqlite3 as sql\n",
 | ||
|     "from generate_amitt_ttps import Amitt\n",
 | ||
|     "\n",
 | ||
|     "\n",
 | ||
|     "# Generate AMITT datasets\n",
 | ||
|     "amitt = Amitt()\n",
 | ||
|     "\n",
 | ||
|     "# Check which amitt variables we can see from here\n",
 | ||
|     "print('{}'.format(vars(amitt).keys()))\n",
 | ||
|     "vars(amitt)['cross_counterid_techniqueid']"
 | ||
|    ]
 | ||
|   },
 | ||
|   {
 | ||
|    "cell_type": "code",
 | ||
|    "execution_count": 26,
 | ||
|    "metadata": {
 | ||
|     "scrolled": true
 | ||
|    },
 | ||
|    "outputs": [
 | ||
|     {
 | ||
|      "name": "stdout",
 | ||
|      "output_type": "stream",
 | ||
|      "text": [
 | ||
|       "df_phases loaded\n",
 | ||
|       "df_techniques loaded\n",
 | ||
|       "df_tasks loaded\n",
 | ||
|       "df_incidents loaded\n",
 | ||
|       "df_counters loaded\n",
 | ||
|       "df_detections loaded\n",
 | ||
|       "df_actors loaded\n",
 | ||
|       "df_resources loaded\n",
 | ||
|       "df_responsetypes loaded\n",
 | ||
|       "df_metatechniques loaded\n",
 | ||
|       "it loaded\n",
 | ||
|       "df_tactics loaded\n",
 | ||
|       "df_techniques_per_tactic loaded\n",
 | ||
|       "df_counters_per_tactic loaded\n",
 | ||
|       "phases not loaded\n",
 | ||
|       "tactics not loaded\n",
 | ||
|       "techniques not loaded\n",
 | ||
|       "counters not loaded\n",
 | ||
|       "metatechniques not loaded\n",
 | ||
|       "actors not loaded\n",
 | ||
|       "resources not loaded\n",
 | ||
|       "num_tactics not loaded\n",
 | ||
|       "cross_counterid_techniqueid loaded\n",
 | ||
|       "cross_counterid_resourceid loaded\n",
 | ||
|       "cross_counterid_actorid loaded\n"
 | ||
|      ]
 | ||
|     }
 | ||
|    ],
 | ||
|    "source": [
 | ||
|     "# Generate full sqlite database from the Amitt variables\n",
 | ||
|     "conn = sql.connect('amitt_sqlite.db')\n",
 | ||
|     "for tablename, table in vars(amitt).items():\n",
 | ||
|     "    if type(table) == pd.core.frame.DataFrame:\n",
 | ||
|     "        table.applymap(str).to_sql(tablename, conn)\n",
 | ||
|     "        print('{} loaded'.format(tablename))\n",
 | ||
|     "    else:\n",
 | ||
|     "        print('{} not loaded'.format(tablename))"
 | ||
|    ]
 | ||
|   },
 | ||
|   {
 | ||
|    "cell_type": "code",
 | ||
|    "execution_count": 41,
 | ||
|    "metadata": {
 | ||
|     "scrolled": true
 | ||
|    },
 | ||
|    "outputs": [
 | ||
|     {
 | ||
|      "data": {
 | ||
|       "text/html": [
 | ||
|        "<div>\n",
 | ||
|        "<style scoped>\n",
 | ||
|        "    .dataframe tbody tr th:only-of-type {\n",
 | ||
|        "        vertical-align: middle;\n",
 | ||
|        "    }\n",
 | ||
|        "\n",
 | ||
|        "    .dataframe tbody tr th {\n",
 | ||
|        "        vertical-align: top;\n",
 | ||
|        "    }\n",
 | ||
|        "\n",
 | ||
|        "    .dataframe thead th {\n",
 | ||
|        "        text-align: right;\n",
 | ||
|        "    }\n",
 | ||
|        "</style>\n",
 | ||
|        "<table border=\"1\" class=\"dataframe\">\n",
 | ||
|        "  <thead>\n",
 | ||
|        "    <tr style=\"text-align: right;\">\n",
 | ||
|        "      <th></th>\n",
 | ||
|        "      <th>id</th>\n",
 | ||
|        "      <th>actor_id</th>\n",
 | ||
|        "    </tr>\n",
 | ||
|        "  </thead>\n",
 | ||
|        "  <tbody>\n",
 | ||
|        "    <tr>\n",
 | ||
|        "      <th>0</th>\n",
 | ||
|        "      <td>C00006</td>\n",
 | ||
|        "      <td>A033</td>\n",
 | ||
|        "    </tr>\n",
 | ||
|        "    <tr>\n",
 | ||
|        "      <th>1</th>\n",
 | ||
|        "      <td>C00008</td>\n",
 | ||
|        "      <td>A007</td>\n",
 | ||
|        "    </tr>\n",
 | ||
|        "    <tr>\n",
 | ||
|        "      <th>2</th>\n",
 | ||
|        "      <td>C00009</td>\n",
 | ||
|        "      <td>A016</td>\n",
 | ||
|        "    </tr>\n",
 | ||
|        "    <tr>\n",
 | ||
|        "      <th>2</th>\n",
 | ||
|        "      <td>C00009</td>\n",
 | ||
|        "      <td>A006</td>\n",
 | ||
|        "    </tr>\n",
 | ||
|        "    <tr>\n",
 | ||
|        "      <th>3</th>\n",
 | ||
|        "      <td>C00010</td>\n",
 | ||
|        "      <td>A020</td>\n",
 | ||
|        "    </tr>\n",
 | ||
|        "    <tr>\n",
 | ||
|        "      <th>...</th>\n",
 | ||
|        "      <td>...</td>\n",
 | ||
|        "      <td>...</td>\n",
 | ||
|        "    </tr>\n",
 | ||
|        "    <tr>\n",
 | ||
|        "      <th>135</th>\n",
 | ||
|        "      <td>C00219</td>\n",
 | ||
|        "      <td></td>\n",
 | ||
|        "    </tr>\n",
 | ||
|        "    <tr>\n",
 | ||
|        "      <th>136</th>\n",
 | ||
|        "      <td>C00220</td>\n",
 | ||
|        "      <td></td>\n",
 | ||
|        "    </tr>\n",
 | ||
|        "    <tr>\n",
 | ||
|        "      <th>137</th>\n",
 | ||
|        "      <td>C00221</td>\n",
 | ||
|        "      <td></td>\n",
 | ||
|        "    </tr>\n",
 | ||
|        "    <tr>\n",
 | ||
|        "      <th>138</th>\n",
 | ||
|        "      <td>C00222</td>\n",
 | ||
|        "      <td></td>\n",
 | ||
|        "    </tr>\n",
 | ||
|        "    <tr>\n",
 | ||
|        "      <th>139</th>\n",
 | ||
|        "      <td>C00223</td>\n",
 | ||
|        "      <td></td>\n",
 | ||
|        "    </tr>\n",
 | ||
|        "  </tbody>\n",
 | ||
|        "</table>\n",
 | ||
|        "<p>166 rows × 2 columns</p>\n",
 | ||
|        "</div>"
 | ||
|       ],
 | ||
|       "text/plain": [
 | ||
|        "         id actor_id\n",
 | ||
|        "0    C00006     A033\n",
 | ||
|        "1    C00008     A007\n",
 | ||
|        "2    C00009     A016\n",
 | ||
|        "2    C00009     A006\n",
 | ||
|        "3    C00010     A020\n",
 | ||
|        "..      ...      ...\n",
 | ||
|        "135  C00219         \n",
 | ||
|        "136  C00220         \n",
 | ||
|        "137  C00221         \n",
 | ||
|        "138  C00222         \n",
 | ||
|        "139  C00223         \n",
 | ||
|        "\n",
 | ||
|        "[166 rows x 2 columns]"
 | ||
|       ]
 | ||
|      },
 | ||
|      "execution_count": 41,
 | ||
|      "metadata": {},
 | ||
|      "output_type": "execute_result"
 | ||
|     }
 | ||
|    ],
 | ||
|    "source": [
 | ||
|     "amitt.cross_counterid_actorid"
 | ||
|    ]
 | ||
|   },
 | ||
|   {
 | ||
|    "cell_type": "code",
 | ||
|    "execution_count": 3,
 | ||
|    "metadata": {},
 | ||
|    "outputs": [
 | ||
|     {
 | ||
|      "data": {
 | ||
|       "text/html": [
 | ||
|        "<div>\n",
 | ||
|        "<style scoped>\n",
 | ||
|        "    .dataframe tbody tr th:only-of-type {\n",
 | ||
|        "        vertical-align: middle;\n",
 | ||
|        "    }\n",
 | ||
|        "\n",
 | ||
|        "    .dataframe tbody tr th {\n",
 | ||
|        "        vertical-align: top;\n",
 | ||
|        "    }\n",
 | ||
|        "\n",
 | ||
|        "    .dataframe thead th {\n",
 | ||
|        "        text-align: right;\n",
 | ||
|        "    }\n",
 | ||
|        "</style>\n",
 | ||
|        "<table border=\"1\" class=\"dataframe\">\n",
 | ||
|        "  <thead>\n",
 | ||
|        "    <tr style=\"text-align: right;\">\n",
 | ||
|        "      <th></th>\n",
 | ||
|        "      <th>id</th>\n",
 | ||
|        "      <th>technique_id</th>\n",
 | ||
|        "      <th>Weight</th>\n",
 | ||
|        "    </tr>\n",
 | ||
|        "  </thead>\n",
 | ||
|        "  <tbody>\n",
 | ||
|        "    <tr>\n",
 | ||
|        "      <th>1</th>\n",
 | ||
|        "      <td>C00008</td>\n",
 | ||
|        "      <td>TA01</td>\n",
 | ||
|        "      <td>1</td>\n",
 | ||
|        "    </tr>\n",
 | ||
|        "    <tr>\n",
 | ||
|        "      <th>1</th>\n",
 | ||
|        "      <td>C00008</td>\n",
 | ||
|        "      <td>TA06</td>\n",
 | ||
|        "      <td>1</td>\n",
 | ||
|        "    </tr>\n",
 | ||
|        "    <tr>\n",
 | ||
|        "      <th>1</th>\n",
 | ||
|        "      <td>C00008</td>\n",
 | ||
|        "      <td>TA08</td>\n",
 | ||
|        "      <td>1</td>\n",
 | ||
|        "    </tr>\n",
 | ||
|        "    <tr>\n",
 | ||
|        "      <th>1</th>\n",
 | ||
|        "      <td>C00008</td>\n",
 | ||
|        "      <td>T0006</td>\n",
 | ||
|        "      <td>1</td>\n",
 | ||
|        "    </tr>\n",
 | ||
|        "    <tr>\n",
 | ||
|        "      <th>1</th>\n",
 | ||
|        "      <td>C00008</td>\n",
 | ||
|        "      <td>T0009</td>\n",
 | ||
|        "      <td>1</td>\n",
 | ||
|        "    </tr>\n",
 | ||
|        "    <tr>\n",
 | ||
|        "      <th>...</th>\n",
 | ||
|        "      <td>...</td>\n",
 | ||
|        "      <td>...</td>\n",
 | ||
|        "      <td>...</td>\n",
 | ||
|        "    </tr>\n",
 | ||
|        "    <tr>\n",
 | ||
|        "      <th>134</th>\n",
 | ||
|        "      <td>C00216</td>\n",
 | ||
|        "      <td>T0018</td>\n",
 | ||
|        "      <td>1</td>\n",
 | ||
|        "    </tr>\n",
 | ||
|        "    <tr>\n",
 | ||
|        "      <th>134</th>\n",
 | ||
|        "      <td>C00216</td>\n",
 | ||
|        "      <td>T0057</td>\n",
 | ||
|        "      <td>1</td>\n",
 | ||
|        "    </tr>\n",
 | ||
|        "    <tr>\n",
 | ||
|        "      <th>135</th>\n",
 | ||
|        "      <td>C00219</td>\n",
 | ||
|        "      <td>T0024</td>\n",
 | ||
|        "      <td>1</td>\n",
 | ||
|        "    </tr>\n",
 | ||
|        "    <tr>\n",
 | ||
|        "      <th>135</th>\n",
 | ||
|        "      <td>C00219</td>\n",
 | ||
|        "      <td>T0026</td>\n",
 | ||
|        "      <td>1</td>\n",
 | ||
|        "    </tr>\n",
 | ||
|        "    <tr>\n",
 | ||
|        "      <th>135</th>\n",
 | ||
|        "      <td>C00219</td>\n",
 | ||
|        "      <td>T0025</td>\n",
 | ||
|        "      <td>1</td>\n",
 | ||
|        "    </tr>\n",
 | ||
|        "  </tbody>\n",
 | ||
|        "</table>\n",
 | ||
|        "<p>717 rows × 3 columns</p>\n",
 | ||
|        "</div>"
 | ||
|       ],
 | ||
|       "text/plain": [
 | ||
|        "         id technique_id  Weight\n",
 | ||
|        "1    C00008         TA01       1\n",
 | ||
|        "1    C00008         TA06       1\n",
 | ||
|        "1    C00008         TA08       1\n",
 | ||
|        "1    C00008        T0006       1\n",
 | ||
|        "1    C00008        T0009       1\n",
 | ||
|        "..      ...          ...     ...\n",
 | ||
|        "134  C00216        T0018       1\n",
 | ||
|        "134  C00216        T0057       1\n",
 | ||
|        "135  C00219        T0024       1\n",
 | ||
|        "135  C00219        T0026       1\n",
 | ||
|        "135  C00219        T0025       1\n",
 | ||
|        "\n",
 | ||
|        "[717 rows x 3 columns]"
 | ||
|       ]
 | ||
|      },
 | ||
|      "execution_count": 3,
 | ||
|      "metadata": {},
 | ||
|      "output_type": "execute_result"
 | ||
|     }
 | ||
|    ],
 | ||
|    "source": [
 | ||
|     "ct = amitt.cross_counterid_techniqueid\n",
 | ||
|     "ct['Weight'] = 1\n",
 | ||
|     "ct = ct[ct['technique_id'].str.len() > 0]\n",
 | ||
|     "ct.to_csv('../visualisations/cross_counterid_techniqueid.csv', index=False, header=['Source','Target', 'Weight'])\n",
 | ||
|     "ct"
 | ||
|    ]
 | ||
|   },
 | ||
|   {
 | ||
|    "cell_type": "markdown",
 | ||
|    "metadata": {},
 | ||
|    "source": [
 | ||
|     "# fix the problem with excelfile changes\n",
 | ||
|     "\n",
 | ||
|     "Background: AMITT's master dataset is in an excelfile. Changes in this dont' show up in github, so it's difficult to tell what's changed between versions.  Code below checks for those differences - use this repeatedly until versions align. "
 | ||
|    ]
 | ||
|   },
 | ||
|   {
 | ||
|    "cell_type": "code",
 | ||
|    "execution_count": 141,
 | ||
|    "metadata": {},
 | ||
|    "outputs": [
 | ||
|     {
 | ||
|      "name": "stderr",
 | ||
|      "output_type": "stream",
 | ||
|      "text": [
 | ||
|       "<ipython-input-141-d791eafb5aa0>:9: FutureWarning: Passing a negative integer is deprecated in version 1.0 and will not be supported in future version. Instead, use None to not limit the column width.\n",
 | ||
|       "  pd.set_option('display.max_colwidth', -1)\n"
 | ||
|      ]
 | ||
|     }
 | ||
|    ],
 | ||
|    "source": [
 | ||
|     "import pandas as pd\n",
 | ||
|     "import sqlite3 as sql\n",
 | ||
|     "from generate_amitt_ttps import Amitt\n",
 | ||
|     "import pandas as pd\n",
 | ||
|     "import numpy as np\n",
 | ||
|     "import os\n",
 | ||
|     "from sklearn.feature_extraction.text import CountVectorizer\n",
 | ||
|     "pd.set_option('display.max_rows', 1000)\n",
 | ||
|     "pd.set_option('display.max_colwidth', -1)\n",
 | ||
|     "\n",
 | ||
|     "newfile = '../AMITT_MASTER_DATA/AMITT_TTPs_MASTER.xlsx'\n",
 | ||
|     "oldfile = 'AMITT_TTPs_MASTER_github_version.xlsx'\n",
 | ||
|     "\n",
 | ||
|     "# Load dfs from file\n",
 | ||
|     "newdfs = {}\n",
 | ||
|     "newxlsx = pd.ExcelFile(newfile)\n",
 | ||
|     "for sheetname in newxlsx.sheet_names:\n",
 | ||
|     "    newdfs[sheetname] = newxlsx.parse(sheetname)\n",
 | ||
|     "    newdfs[sheetname].fillna('', inplace=True)\n",
 | ||
|     "\n",
 | ||
|     "olddfs = {}\n",
 | ||
|     "oldxlsx = pd.ExcelFile(oldfile)\n",
 | ||
|     "for sheetname in oldxlsx.sheet_names:\n",
 | ||
|     "    olddfs[sheetname] = oldxlsx.parse(sheetname)\n",
 | ||
|     "    olddfs[sheetname].fillna('', inplace=True)\n",
 | ||
|     "\n",
 | ||
|     "addedtables = newdfs.keys() - olddfs.keys()\n",
 | ||
|     "losttables = olddfs.keys() - newdfs.keys()\n",
 | ||
|     "if len(addedtables) + len(losttables) > 0:\n",
 | ||
|     "    print('Table changes: new tables are {}, lost tables are {}'.format(addedtables, losttables))\n",
 | ||
|     "\n",
 | ||
|     "def investigate_table(table):\n",
 | ||
|     "    print('\\n\\nTable {} is changed'.format(table))\n",
 | ||
|     "    # Column headings\n",
 | ||
|     "    coldiffs = set(newdfs[table].columns).symmetric_difference(set(olddfs[table].columns))\n",
 | ||
|     "    if len(coldiffs) > 0:\n",
 | ||
|     "        print('column differences: {}'.format(coldiffs))\n",
 | ||
|     "    # length\n",
 | ||
|     "    if len(newdfs[table]) != len(olddfs[table]):\n",
 | ||
|     "        print('length differences: new {} old {}'.format(len(newdfs[table]), len(olddfs[table])))\n",
 | ||
|     "\n",
 | ||
|     "    # column by column\n",
 | ||
|     "    for column in newdfs[table].columns:\n",
 | ||
|     "        coldiffs = newdfs[table][column] != olddfs[table][column]\n",
 | ||
|     "        if len(newdfs[table][coldiffs]) > 0:\n",
 | ||
|     "            print('Differences in column {}'.format(column))\n",
 | ||
|     "    return\n",
 | ||
|     "\n",
 | ||
|     "for table in newdfs.keys():\n",
 | ||
|     "    if newdfs[table].equals(olddfs[table]) == False:\n",
 | ||
|     "        investigate_table(table)        "
 | ||
|    ]
 | ||
|   },
 | ||
|   {
 | ||
|    "cell_type": "code",
 | ||
|    "execution_count": 142,
 | ||
|    "metadata": {},
 | ||
|    "outputs": [
 | ||
|     {
 | ||
|      "data": {
 | ||
|       "text/html": [
 | ||
|        "<div>\n",
 | ||
|        "<style scoped>\n",
 | ||
|        "    .dataframe tbody tr th:only-of-type {\n",
 | ||
|        "        vertical-align: middle;\n",
 | ||
|        "    }\n",
 | ||
|        "\n",
 | ||
|        "    .dataframe tbody tr th {\n",
 | ||
|        "        vertical-align: top;\n",
 | ||
|        "    }\n",
 | ||
|        "\n",
 | ||
|        "    .dataframe thead th {\n",
 | ||
|        "        text-align: right;\n",
 | ||
|        "    }\n",
 | ||
|        "</style>\n",
 | ||
|        "<table border=\"1\" class=\"dataframe\">\n",
 | ||
|        "  <thead>\n",
 | ||
|        "    <tr style=\"text-align: right;\">\n",
 | ||
|        "      <th></th>\n",
 | ||
|        "      <th>amitt_id</th>\n",
 | ||
|        "      <th>new</th>\n",
 | ||
|        "      <th>old</th>\n",
 | ||
|        "    </tr>\n",
 | ||
|        "  </thead>\n",
 | ||
|        "  <tbody>\n",
 | ||
|        "  </tbody>\n",
 | ||
|        "</table>\n",
 | ||
|        "</div>"
 | ||
|       ],
 | ||
|       "text/plain": [
 | ||
|        "Empty DataFrame\n",
 | ||
|        "Columns: [amitt_id, new, old]\n",
 | ||
|        "Index: []"
 | ||
|       ]
 | ||
|      },
 | ||
|      "execution_count": 142,
 | ||
|      "metadata": {},
 | ||
|      "output_type": "execute_result"
 | ||
|     }
 | ||
|    ],
 | ||
|    "source": [
 | ||
|     "# Look at individual table differences\n",
 | ||
|     "table = 'countermeasures'\n",
 | ||
|     "column = 'summary'\n",
 | ||
|     "coldiffs = newdfs[table][column] != olddfs[table][column]\n",
 | ||
|     "diffcols = pd.DataFrame()\n",
 | ||
|     "diffcols['amitt_id'] = newdfs[table][coldiffs]['amitt_id']\n",
 | ||
|     "diffcols['new'] = newdfs[table][coldiffs][column]\n",
 | ||
|     "diffcols['old'] = olddfs[table][coldiffs][column]\n",
 | ||
|     "diffcols[diffcols['old'] != '']"
 | ||
|    ]
 | ||
|   },
 | ||
|   {
 | ||
|    "cell_type": "code",
 | ||
|    "execution_count": null,
 | ||
|    "metadata": {},
 | ||
|    "outputs": [],
 | ||
|    "source": []
 | ||
|   }
 | ||
|  ],
 | ||
|  "metadata": {
 | ||
|   "kernelspec": {
 | ||
|    "display_name": "Python 3",
 | ||
|    "language": "python",
 | ||
|    "name": "python3"
 | ||
|   },
 | ||
|   "language_info": {
 | ||
|    "codemirror_mode": {
 | ||
|     "name": "ipython",
 | ||
|     "version": 3
 | ||
|    },
 | ||
|    "file_extension": ".py",
 | ||
|    "mimetype": "text/x-python",
 | ||
|    "name": "python",
 | ||
|    "nbconvert_exporter": "python",
 | ||
|    "pygments_lexer": "ipython3",
 | ||
|    "version": "3.8.3"
 | ||
|   }
 | ||
|  },
 | ||
|  "nbformat": 4,
 | ||
|  "nbformat_minor": 4
 | ||
| }
 | 
