612 строки
		
	
	
		
			17 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
			
		
		
	
	
			612 строки
		
	
	
		
			17 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
{
 | 
						||
 "cells": [
 | 
						||
  {
 | 
						||
   "cell_type": "markdown",
 | 
						||
   "metadata": {},
 | 
						||
   "source": [
 | 
						||
    "# Test area for AMITT code"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "code",
 | 
						||
   "execution_count": 2,
 | 
						||
   "metadata": {},
 | 
						||
   "outputs": [
 | 
						||
    {
 | 
						||
     "name": "stdout",
 | 
						||
     "output_type": "stream",
 | 
						||
     "text": [
 | 
						||
      "dict_keys(['df_phases', 'df_frameworks', 'df_techniques', 'df_tasks', 'df_incidents', 'df_counters', 'df_detections', 'df_actortypes', 'df_resources', 'df_responsetypes', 'df_metatechniques', 'it', 'df_tactics', 'df_techniques_per_tactic', 'df_counters_per_tactic', 'phases', 'tactics', 'techniques', 'counters', 'metatechniques', 'actortypes', 'resources', 'num_tactics', 'cross_counterid_techniqueid', 'cross_counterid_resourceid', 'cross_counterid_actortypeid'])\n"
 | 
						||
     ]
 | 
						||
    },
 | 
						||
    {
 | 
						||
     "data": {
 | 
						||
      "text/html": [
 | 
						||
       "<div>\n",
 | 
						||
       "<style scoped>\n",
 | 
						||
       "    .dataframe tbody tr th:only-of-type {\n",
 | 
						||
       "        vertical-align: middle;\n",
 | 
						||
       "    }\n",
 | 
						||
       "\n",
 | 
						||
       "    .dataframe tbody tr th {\n",
 | 
						||
       "        vertical-align: top;\n",
 | 
						||
       "    }\n",
 | 
						||
       "\n",
 | 
						||
       "    .dataframe thead th {\n",
 | 
						||
       "        text-align: right;\n",
 | 
						||
       "    }\n",
 | 
						||
       "</style>\n",
 | 
						||
       "<table border=\"1\" class=\"dataframe\">\n",
 | 
						||
       "  <thead>\n",
 | 
						||
       "    <tr style=\"text-align: right;\">\n",
 | 
						||
       "      <th></th>\n",
 | 
						||
       "      <th>amitt_id</th>\n",
 | 
						||
       "      <th>technique_id</th>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "  </thead>\n",
 | 
						||
       "  <tbody>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>0</th>\n",
 | 
						||
       "      <td>C00006</td>\n",
 | 
						||
       "      <td>T0007</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>0</th>\n",
 | 
						||
       "      <td>C00006</td>\n",
 | 
						||
       "      <td>T0015</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>0</th>\n",
 | 
						||
       "      <td>C00006</td>\n",
 | 
						||
       "      <td>T0018</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>0</th>\n",
 | 
						||
       "      <td>C00006</td>\n",
 | 
						||
       "      <td>T0043</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>0</th>\n",
 | 
						||
       "      <td>C00006</td>\n",
 | 
						||
       "      <td>T0053</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>...</th>\n",
 | 
						||
       "      <td>...</td>\n",
 | 
						||
       "      <td>...</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>135</th>\n",
 | 
						||
       "      <td>C00219</td>\n",
 | 
						||
       "      <td>T0025</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>136</th>\n",
 | 
						||
       "      <td>C00220</td>\n",
 | 
						||
       "      <td></td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>137</th>\n",
 | 
						||
       "      <td>C00221</td>\n",
 | 
						||
       "      <td></td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>138</th>\n",
 | 
						||
       "      <td>C00222</td>\n",
 | 
						||
       "      <td></td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>139</th>\n",
 | 
						||
       "      <td>C00223</td>\n",
 | 
						||
       "      <td></td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "  </tbody>\n",
 | 
						||
       "</table>\n",
 | 
						||
       "<p>898 rows × 2 columns</p>\n",
 | 
						||
       "</div>"
 | 
						||
      ],
 | 
						||
      "text/plain": [
 | 
						||
       "    amitt_id technique_id\n",
 | 
						||
       "0     C00006        T0007\n",
 | 
						||
       "0     C00006        T0015\n",
 | 
						||
       "0     C00006        T0018\n",
 | 
						||
       "0     C00006        T0043\n",
 | 
						||
       "0     C00006        T0053\n",
 | 
						||
       "..       ...          ...\n",
 | 
						||
       "135   C00219        T0025\n",
 | 
						||
       "136   C00220             \n",
 | 
						||
       "137   C00221             \n",
 | 
						||
       "138   C00222             \n",
 | 
						||
       "139   C00223             \n",
 | 
						||
       "\n",
 | 
						||
       "[898 rows x 2 columns]"
 | 
						||
      ]
 | 
						||
     },
 | 
						||
     "execution_count": 2,
 | 
						||
     "metadata": {},
 | 
						||
     "output_type": "execute_result"
 | 
						||
    }
 | 
						||
   ],
 | 
						||
   "source": [
 | 
						||
    "import pandas as pd\n",
 | 
						||
    "import sqlite3 as sql\n",
 | 
						||
    "from generate_amitt_ttps import Amitt\n",
 | 
						||
    "\n",
 | 
						||
    "\n",
 | 
						||
    "# Generate AMITT datasets\n",
 | 
						||
    "amitt = Amitt()\n",
 | 
						||
    "\n",
 | 
						||
    "# Check which amitt variables we can see from here\n",
 | 
						||
    "print('{}'.format(vars(amitt).keys()))\n",
 | 
						||
    "vars(amitt)['cross_counterid_techniqueid']"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "code",
 | 
						||
   "execution_count": 26,
 | 
						||
   "metadata": {
 | 
						||
    "scrolled": true
 | 
						||
   },
 | 
						||
   "outputs": [
 | 
						||
    {
 | 
						||
     "name": "stdout",
 | 
						||
     "output_type": "stream",
 | 
						||
     "text": [
 | 
						||
      "df_phases loaded\n",
 | 
						||
      "df_techniques loaded\n",
 | 
						||
      "df_tasks loaded\n",
 | 
						||
      "df_incidents loaded\n",
 | 
						||
      "df_counters loaded\n",
 | 
						||
      "df_detections loaded\n",
 | 
						||
      "df_actors loaded\n",
 | 
						||
      "df_resources loaded\n",
 | 
						||
      "df_responsetypes loaded\n",
 | 
						||
      "df_metatechniques loaded\n",
 | 
						||
      "it loaded\n",
 | 
						||
      "df_tactics loaded\n",
 | 
						||
      "df_techniques_per_tactic loaded\n",
 | 
						||
      "df_counters_per_tactic loaded\n",
 | 
						||
      "phases not loaded\n",
 | 
						||
      "tactics not loaded\n",
 | 
						||
      "techniques not loaded\n",
 | 
						||
      "counters not loaded\n",
 | 
						||
      "metatechniques not loaded\n",
 | 
						||
      "actors not loaded\n",
 | 
						||
      "resources not loaded\n",
 | 
						||
      "num_tactics not loaded\n",
 | 
						||
      "cross_counterid_techniqueid loaded\n",
 | 
						||
      "cross_counterid_resourceid loaded\n",
 | 
						||
      "cross_counterid_actorid loaded\n"
 | 
						||
     ]
 | 
						||
    }
 | 
						||
   ],
 | 
						||
   "source": [
 | 
						||
    "# Generate full sqlite database from the Amitt variables\n",
 | 
						||
    "conn = sql.connect('amitt_sqlite.db')\n",
 | 
						||
    "for tablename, table in vars(amitt).items():\n",
 | 
						||
    "    if type(table) == pd.core.frame.DataFrame:\n",
 | 
						||
    "        table.applymap(str).to_sql(tablename, conn)\n",
 | 
						||
    "        print('{} loaded'.format(tablename))\n",
 | 
						||
    "    else:\n",
 | 
						||
    "        print('{} not loaded'.format(tablename))"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "code",
 | 
						||
   "execution_count": 41,
 | 
						||
   "metadata": {
 | 
						||
    "scrolled": true
 | 
						||
   },
 | 
						||
   "outputs": [
 | 
						||
    {
 | 
						||
     "data": {
 | 
						||
      "text/html": [
 | 
						||
       "<div>\n",
 | 
						||
       "<style scoped>\n",
 | 
						||
       "    .dataframe tbody tr th:only-of-type {\n",
 | 
						||
       "        vertical-align: middle;\n",
 | 
						||
       "    }\n",
 | 
						||
       "\n",
 | 
						||
       "    .dataframe tbody tr th {\n",
 | 
						||
       "        vertical-align: top;\n",
 | 
						||
       "    }\n",
 | 
						||
       "\n",
 | 
						||
       "    .dataframe thead th {\n",
 | 
						||
       "        text-align: right;\n",
 | 
						||
       "    }\n",
 | 
						||
       "</style>\n",
 | 
						||
       "<table border=\"1\" class=\"dataframe\">\n",
 | 
						||
       "  <thead>\n",
 | 
						||
       "    <tr style=\"text-align: right;\">\n",
 | 
						||
       "      <th></th>\n",
 | 
						||
       "      <th>id</th>\n",
 | 
						||
       "      <th>actor_id</th>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "  </thead>\n",
 | 
						||
       "  <tbody>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>0</th>\n",
 | 
						||
       "      <td>C00006</td>\n",
 | 
						||
       "      <td>A033</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>1</th>\n",
 | 
						||
       "      <td>C00008</td>\n",
 | 
						||
       "      <td>A007</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>2</th>\n",
 | 
						||
       "      <td>C00009</td>\n",
 | 
						||
       "      <td>A016</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>2</th>\n",
 | 
						||
       "      <td>C00009</td>\n",
 | 
						||
       "      <td>A006</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>3</th>\n",
 | 
						||
       "      <td>C00010</td>\n",
 | 
						||
       "      <td>A020</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>...</th>\n",
 | 
						||
       "      <td>...</td>\n",
 | 
						||
       "      <td>...</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>135</th>\n",
 | 
						||
       "      <td>C00219</td>\n",
 | 
						||
       "      <td></td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>136</th>\n",
 | 
						||
       "      <td>C00220</td>\n",
 | 
						||
       "      <td></td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>137</th>\n",
 | 
						||
       "      <td>C00221</td>\n",
 | 
						||
       "      <td></td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>138</th>\n",
 | 
						||
       "      <td>C00222</td>\n",
 | 
						||
       "      <td></td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>139</th>\n",
 | 
						||
       "      <td>C00223</td>\n",
 | 
						||
       "      <td></td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "  </tbody>\n",
 | 
						||
       "</table>\n",
 | 
						||
       "<p>166 rows × 2 columns</p>\n",
 | 
						||
       "</div>"
 | 
						||
      ],
 | 
						||
      "text/plain": [
 | 
						||
       "         id actor_id\n",
 | 
						||
       "0    C00006     A033\n",
 | 
						||
       "1    C00008     A007\n",
 | 
						||
       "2    C00009     A016\n",
 | 
						||
       "2    C00009     A006\n",
 | 
						||
       "3    C00010     A020\n",
 | 
						||
       "..      ...      ...\n",
 | 
						||
       "135  C00219         \n",
 | 
						||
       "136  C00220         \n",
 | 
						||
       "137  C00221         \n",
 | 
						||
       "138  C00222         \n",
 | 
						||
       "139  C00223         \n",
 | 
						||
       "\n",
 | 
						||
       "[166 rows x 2 columns]"
 | 
						||
      ]
 | 
						||
     },
 | 
						||
     "execution_count": 41,
 | 
						||
     "metadata": {},
 | 
						||
     "output_type": "execute_result"
 | 
						||
    }
 | 
						||
   ],
 | 
						||
   "source": [
 | 
						||
    "amitt.cross_counterid_actorid"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "code",
 | 
						||
   "execution_count": 3,
 | 
						||
   "metadata": {},
 | 
						||
   "outputs": [
 | 
						||
    {
 | 
						||
     "data": {
 | 
						||
      "text/html": [
 | 
						||
       "<div>\n",
 | 
						||
       "<style scoped>\n",
 | 
						||
       "    .dataframe tbody tr th:only-of-type {\n",
 | 
						||
       "        vertical-align: middle;\n",
 | 
						||
       "    }\n",
 | 
						||
       "\n",
 | 
						||
       "    .dataframe tbody tr th {\n",
 | 
						||
       "        vertical-align: top;\n",
 | 
						||
       "    }\n",
 | 
						||
       "\n",
 | 
						||
       "    .dataframe thead th {\n",
 | 
						||
       "        text-align: right;\n",
 | 
						||
       "    }\n",
 | 
						||
       "</style>\n",
 | 
						||
       "<table border=\"1\" class=\"dataframe\">\n",
 | 
						||
       "  <thead>\n",
 | 
						||
       "    <tr style=\"text-align: right;\">\n",
 | 
						||
       "      <th></th>\n",
 | 
						||
       "      <th>id</th>\n",
 | 
						||
       "      <th>technique_id</th>\n",
 | 
						||
       "      <th>Weight</th>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "  </thead>\n",
 | 
						||
       "  <tbody>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>1</th>\n",
 | 
						||
       "      <td>C00008</td>\n",
 | 
						||
       "      <td>TA01</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>1</th>\n",
 | 
						||
       "      <td>C00008</td>\n",
 | 
						||
       "      <td>TA06</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>1</th>\n",
 | 
						||
       "      <td>C00008</td>\n",
 | 
						||
       "      <td>TA08</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>1</th>\n",
 | 
						||
       "      <td>C00008</td>\n",
 | 
						||
       "      <td>T0006</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>1</th>\n",
 | 
						||
       "      <td>C00008</td>\n",
 | 
						||
       "      <td>T0009</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>...</th>\n",
 | 
						||
       "      <td>...</td>\n",
 | 
						||
       "      <td>...</td>\n",
 | 
						||
       "      <td>...</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>134</th>\n",
 | 
						||
       "      <td>C00216</td>\n",
 | 
						||
       "      <td>T0018</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>134</th>\n",
 | 
						||
       "      <td>C00216</td>\n",
 | 
						||
       "      <td>T0057</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>135</th>\n",
 | 
						||
       "      <td>C00219</td>\n",
 | 
						||
       "      <td>T0024</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>135</th>\n",
 | 
						||
       "      <td>C00219</td>\n",
 | 
						||
       "      <td>T0026</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>135</th>\n",
 | 
						||
       "      <td>C00219</td>\n",
 | 
						||
       "      <td>T0025</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "  </tbody>\n",
 | 
						||
       "</table>\n",
 | 
						||
       "<p>717 rows × 3 columns</p>\n",
 | 
						||
       "</div>"
 | 
						||
      ],
 | 
						||
      "text/plain": [
 | 
						||
       "         id technique_id  Weight\n",
 | 
						||
       "1    C00008         TA01       1\n",
 | 
						||
       "1    C00008         TA06       1\n",
 | 
						||
       "1    C00008         TA08       1\n",
 | 
						||
       "1    C00008        T0006       1\n",
 | 
						||
       "1    C00008        T0009       1\n",
 | 
						||
       "..      ...          ...     ...\n",
 | 
						||
       "134  C00216        T0018       1\n",
 | 
						||
       "134  C00216        T0057       1\n",
 | 
						||
       "135  C00219        T0024       1\n",
 | 
						||
       "135  C00219        T0026       1\n",
 | 
						||
       "135  C00219        T0025       1\n",
 | 
						||
       "\n",
 | 
						||
       "[717 rows x 3 columns]"
 | 
						||
      ]
 | 
						||
     },
 | 
						||
     "execution_count": 3,
 | 
						||
     "metadata": {},
 | 
						||
     "output_type": "execute_result"
 | 
						||
    }
 | 
						||
   ],
 | 
						||
   "source": [
 | 
						||
    "ct = amitt.cross_counterid_techniqueid\n",
 | 
						||
    "ct['Weight'] = 1\n",
 | 
						||
    "ct = ct[ct['technique_id'].str.len() > 0]\n",
 | 
						||
    "ct.to_csv('../visualisations/cross_counterid_techniqueid.csv', index=False, header=['Source','Target', 'Weight'])\n",
 | 
						||
    "ct"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "markdown",
 | 
						||
   "metadata": {},
 | 
						||
   "source": [
 | 
						||
    "# fix the problem with excelfile changes\n",
 | 
						||
    "\n",
 | 
						||
    "Background: AMITT's master dataset is in an excelfile. Changes in this dont' show up in github, so it's difficult to tell what's changed between versions.  Code below checks for those differences - use this repeatedly until versions align. "
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "code",
 | 
						||
   "execution_count": 141,
 | 
						||
   "metadata": {},
 | 
						||
   "outputs": [
 | 
						||
    {
 | 
						||
     "name": "stderr",
 | 
						||
     "output_type": "stream",
 | 
						||
     "text": [
 | 
						||
      "<ipython-input-141-d791eafb5aa0>:9: FutureWarning: Passing a negative integer is deprecated in version 1.0 and will not be supported in future version. Instead, use None to not limit the column width.\n",
 | 
						||
      "  pd.set_option('display.max_colwidth', -1)\n"
 | 
						||
     ]
 | 
						||
    }
 | 
						||
   ],
 | 
						||
   "source": [
 | 
						||
    "import pandas as pd\n",
 | 
						||
    "import sqlite3 as sql\n",
 | 
						||
    "from generate_amitt_ttps import Amitt\n",
 | 
						||
    "import pandas as pd\n",
 | 
						||
    "import numpy as np\n",
 | 
						||
    "import os\n",
 | 
						||
    "from sklearn.feature_extraction.text import CountVectorizer\n",
 | 
						||
    "pd.set_option('display.max_rows', 1000)\n",
 | 
						||
    "pd.set_option('display.max_colwidth', -1)\n",
 | 
						||
    "\n",
 | 
						||
    "newfile = '../AMITT_MASTER_DATA/AMITT_TTPs_MASTER.xlsx'\n",
 | 
						||
    "oldfile = 'AMITT_TTPs_MASTER_github_version.xlsx'\n",
 | 
						||
    "\n",
 | 
						||
    "# Load dfs from file\n",
 | 
						||
    "newdfs = {}\n",
 | 
						||
    "newxlsx = pd.ExcelFile(newfile)\n",
 | 
						||
    "for sheetname in newxlsx.sheet_names:\n",
 | 
						||
    "    newdfs[sheetname] = newxlsx.parse(sheetname)\n",
 | 
						||
    "    newdfs[sheetname].fillna('', inplace=True)\n",
 | 
						||
    "\n",
 | 
						||
    "olddfs = {}\n",
 | 
						||
    "oldxlsx = pd.ExcelFile(oldfile)\n",
 | 
						||
    "for sheetname in oldxlsx.sheet_names:\n",
 | 
						||
    "    olddfs[sheetname] = oldxlsx.parse(sheetname)\n",
 | 
						||
    "    olddfs[sheetname].fillna('', inplace=True)\n",
 | 
						||
    "\n",
 | 
						||
    "addedtables = newdfs.keys() - olddfs.keys()\n",
 | 
						||
    "losttables = olddfs.keys() - newdfs.keys()\n",
 | 
						||
    "if len(addedtables) + len(losttables) > 0:\n",
 | 
						||
    "    print('Table changes: new tables are {}, lost tables are {}'.format(addedtables, losttables))\n",
 | 
						||
    "\n",
 | 
						||
    "def investigate_table(table):\n",
 | 
						||
    "    print('\\n\\nTable {} is changed'.format(table))\n",
 | 
						||
    "    # Column headings\n",
 | 
						||
    "    coldiffs = set(newdfs[table].columns).symmetric_difference(set(olddfs[table].columns))\n",
 | 
						||
    "    if len(coldiffs) > 0:\n",
 | 
						||
    "        print('column differences: {}'.format(coldiffs))\n",
 | 
						||
    "    # length\n",
 | 
						||
    "    if len(newdfs[table]) != len(olddfs[table]):\n",
 | 
						||
    "        print('length differences: new {} old {}'.format(len(newdfs[table]), len(olddfs[table])))\n",
 | 
						||
    "\n",
 | 
						||
    "    # column by column\n",
 | 
						||
    "    for column in newdfs[table].columns:\n",
 | 
						||
    "        coldiffs = newdfs[table][column] != olddfs[table][column]\n",
 | 
						||
    "        if len(newdfs[table][coldiffs]) > 0:\n",
 | 
						||
    "            print('Differences in column {}'.format(column))\n",
 | 
						||
    "    return\n",
 | 
						||
    "\n",
 | 
						||
    "for table in newdfs.keys():\n",
 | 
						||
    "    if newdfs[table].equals(olddfs[table]) == False:\n",
 | 
						||
    "        investigate_table(table)        "
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "code",
 | 
						||
   "execution_count": 142,
 | 
						||
   "metadata": {},
 | 
						||
   "outputs": [
 | 
						||
    {
 | 
						||
     "data": {
 | 
						||
      "text/html": [
 | 
						||
       "<div>\n",
 | 
						||
       "<style scoped>\n",
 | 
						||
       "    .dataframe tbody tr th:only-of-type {\n",
 | 
						||
       "        vertical-align: middle;\n",
 | 
						||
       "    }\n",
 | 
						||
       "\n",
 | 
						||
       "    .dataframe tbody tr th {\n",
 | 
						||
       "        vertical-align: top;\n",
 | 
						||
       "    }\n",
 | 
						||
       "\n",
 | 
						||
       "    .dataframe thead th {\n",
 | 
						||
       "        text-align: right;\n",
 | 
						||
       "    }\n",
 | 
						||
       "</style>\n",
 | 
						||
       "<table border=\"1\" class=\"dataframe\">\n",
 | 
						||
       "  <thead>\n",
 | 
						||
       "    <tr style=\"text-align: right;\">\n",
 | 
						||
       "      <th></th>\n",
 | 
						||
       "      <th>amitt_id</th>\n",
 | 
						||
       "      <th>new</th>\n",
 | 
						||
       "      <th>old</th>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "  </thead>\n",
 | 
						||
       "  <tbody>\n",
 | 
						||
       "  </tbody>\n",
 | 
						||
       "</table>\n",
 | 
						||
       "</div>"
 | 
						||
      ],
 | 
						||
      "text/plain": [
 | 
						||
       "Empty DataFrame\n",
 | 
						||
       "Columns: [amitt_id, new, old]\n",
 | 
						||
       "Index: []"
 | 
						||
      ]
 | 
						||
     },
 | 
						||
     "execution_count": 142,
 | 
						||
     "metadata": {},
 | 
						||
     "output_type": "execute_result"
 | 
						||
    }
 | 
						||
   ],
 | 
						||
   "source": [
 | 
						||
    "# Look at individual table differences\n",
 | 
						||
    "table = 'countermeasures'\n",
 | 
						||
    "column = 'summary'\n",
 | 
						||
    "coldiffs = newdfs[table][column] != olddfs[table][column]\n",
 | 
						||
    "diffcols = pd.DataFrame()\n",
 | 
						||
    "diffcols['amitt_id'] = newdfs[table][coldiffs]['amitt_id']\n",
 | 
						||
    "diffcols['new'] = newdfs[table][coldiffs][column]\n",
 | 
						||
    "diffcols['old'] = olddfs[table][coldiffs][column]\n",
 | 
						||
    "diffcols[diffcols['old'] != '']"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "code",
 | 
						||
   "execution_count": null,
 | 
						||
   "metadata": {},
 | 
						||
   "outputs": [],
 | 
						||
   "source": []
 | 
						||
  }
 | 
						||
 ],
 | 
						||
 "metadata": {
 | 
						||
  "kernelspec": {
 | 
						||
   "display_name": "Python 3",
 | 
						||
   "language": "python",
 | 
						||
   "name": "python3"
 | 
						||
  },
 | 
						||
  "language_info": {
 | 
						||
   "codemirror_mode": {
 | 
						||
    "name": "ipython",
 | 
						||
    "version": 3
 | 
						||
   },
 | 
						||
   "file_extension": ".py",
 | 
						||
   "mimetype": "text/x-python",
 | 
						||
   "name": "python",
 | 
						||
   "nbconvert_exporter": "python",
 | 
						||
   "pygments_lexer": "ipython3",
 | 
						||
   "version": "3.8.3"
 | 
						||
  }
 | 
						||
 },
 | 
						||
 "nbformat": 4,
 | 
						||
 "nbformat_minor": 4
 | 
						||
}
 |