AMITT/HTML_GENERATING_CODE/.ipynb_checkpoints/AMITT_code_testbed-checkpoint.ipynb
2021-06-25 08:52:18 +01:00

934 строки
29 KiB
Plaintext
Исходник Ответственный История

Этот файл содержит неоднозначные символы Юникода

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Test area for AMITT code"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"dict_keys(['df_phases', 'df_frameworks', 'df_techniques', 'df_tasks', 'df_incidents', 'df_counters', 'df_detections', 'df_actortypes', 'df_resources', 'df_responsetypes', 'df_metatechniques', 'it', 'df_tactics', 'df_techniques_per_tactic', 'df_counters_per_tactic', 'phases', 'tactics', 'techniques', 'counters', 'metatechniques', 'actortypes', 'resources', 'num_tactics', 'cross_counterid_techniqueid', 'cross_counterid_resourceid', 'cross_counterid_actortypeid'])\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>amitt_id</th>\n",
" <th>technique_id</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>C00006</td>\n",
" <td>T0007</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>C00006</td>\n",
" <td>T0015</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>C00006</td>\n",
" <td>T0018</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>C00006</td>\n",
" <td>T0043</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>C00006</td>\n",
" <td>T0053</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>135</th>\n",
" <td>C00219</td>\n",
" <td>T0025</td>\n",
" </tr>\n",
" <tr>\n",
" <th>136</th>\n",
" <td>C00220</td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>137</th>\n",
" <td>C00221</td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>138</th>\n",
" <td>C00222</td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>139</th>\n",
" <td>C00223</td>\n",
" <td></td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>898 rows × 2 columns</p>\n",
"</div>"
],
"text/plain": [
" amitt_id technique_id\n",
"0 C00006 T0007\n",
"0 C00006 T0015\n",
"0 C00006 T0018\n",
"0 C00006 T0043\n",
"0 C00006 T0053\n",
".. ... ...\n",
"135 C00219 T0025\n",
"136 C00220 \n",
"137 C00221 \n",
"138 C00222 \n",
"139 C00223 \n",
"\n",
"[898 rows x 2 columns]"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"import sqlite3 as sql\n",
"from generate_amitt_ttps import Amitt\n",
"\n",
"\n",
"# Generate AMITT datasets\n",
"amitt = Amitt()\n",
"\n",
"# Check which amitt variables we can see from here\n",
"print('{}'.format(vars(amitt).keys()))\n",
"vars(amitt)['cross_counterid_techniqueid']"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>amitt_id</th>\n",
" <th>technique_id</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>C00006</td>\n",
" <td>T0007</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>C00006</td>\n",
" <td>T0015</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>C00006</td>\n",
" <td>T0018</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>C00006</td>\n",
" <td>T0043</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>C00006</td>\n",
" <td>T0053</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>135</th>\n",
" <td>C00219</td>\n",
" <td>T0025</td>\n",
" </tr>\n",
" <tr>\n",
" <th>136</th>\n",
" <td>C00220</td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>137</th>\n",
" <td>C00221</td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>138</th>\n",
" <td>C00222</td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>139</th>\n",
" <td>C00223</td>\n",
" <td></td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>898 rows × 2 columns</p>\n",
"</div>"
],
"text/plain": [
" amitt_id technique_id\n",
"0 C00006 T0007\n",
"0 C00006 T0015\n",
"0 C00006 T0018\n",
"0 C00006 T0043\n",
"0 C00006 T0053\n",
".. ... ...\n",
"135 C00219 T0025\n",
"136 C00220 \n",
"137 C00221 \n",
"138 C00222 \n",
"139 C00223 \n",
"\n",
"[898 rows x 2 columns]"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"amitt.cross_counterid_techniqueid"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>amitt_id</th>\n",
" <th>tactic_id</th>\n",
" <th>name</th>\n",
" <th>summary</th>\n",
" <th>id</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>T0001</td>\n",
" <td>TA01</td>\n",
" <td>5Ds (dismiss, distort, distract, dismay, divide)</td>\n",
" <td>Nimmo's \"4Ds of propaganda\": dismiss, distort,...</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>T0002</td>\n",
" <td>TA01</td>\n",
" <td>Facilitate State Propaganda</td>\n",
" <td>Organize citizens around pro-state messaging. ...</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>T0003</td>\n",
" <td>TA01</td>\n",
" <td>Leverage Existing Narratives</td>\n",
" <td>Use or adapt existing narrative themes, where ...</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>T0004</td>\n",
" <td>TA01</td>\n",
" <td>Competing Narratives</td>\n",
" <td>Advance competing narratives connected to same...</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>T0005</td>\n",
" <td>TA02</td>\n",
" <td>Center of Gravity Analysis</td>\n",
" <td>Recon/research to identify \"the source of powe...</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>59</th>\n",
" <td>T0060</td>\n",
" <td>TA11</td>\n",
" <td>Continue to amplify</td>\n",
" <td>continue narrative or message amplification af...</td>\n",
" <td>60</td>\n",
" </tr>\n",
" <tr>\n",
" <th>60</th>\n",
" <td>T0061</td>\n",
" <td>TA10</td>\n",
" <td>Sell merchandising</td>\n",
" <td>Sell hats, t-shirts, flags and other branded c...</td>\n",
" <td>61</td>\n",
" </tr>\n",
" <tr>\n",
" <th>61</th>\n",
" <td>T0062</td>\n",
" <td>TA12</td>\n",
" <td>Behaviour changes</td>\n",
" <td>Monitor and evaluate behaviour changes from mi...</td>\n",
" <td>62</td>\n",
" </tr>\n",
" <tr>\n",
" <th>62</th>\n",
" <td>T0063</td>\n",
" <td>TA12</td>\n",
" <td>Message reach</td>\n",
" <td>Monitor and evaluate message reach in misinfor...</td>\n",
" <td>63</td>\n",
" </tr>\n",
" <tr>\n",
" <th>63</th>\n",
" <td>T0064</td>\n",
" <td>TA12</td>\n",
" <td>Social media engagement</td>\n",
" <td>Monitor and evaluate social media engagement i...</td>\n",
" <td>64</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>64 rows × 5 columns</p>\n",
"</div>"
],
"text/plain": [
" amitt_id tactic_id name \\\n",
"0 T0001 TA01 5Ds (dismiss, distort, distract, dismay, divide) \n",
"1 T0002 TA01 Facilitate State Propaganda \n",
"2 T0003 TA01 Leverage Existing Narratives \n",
"3 T0004 TA01 Competing Narratives \n",
"4 T0005 TA02 Center of Gravity Analysis \n",
".. ... ... ... \n",
"59 T0060 TA11 Continue to amplify \n",
"60 T0061 TA10 Sell merchandising \n",
"61 T0062 TA12 Behaviour changes \n",
"62 T0063 TA12 Message reach \n",
"63 T0064 TA12 Social media engagement \n",
"\n",
" summary id \n",
"0 Nimmo's \"4Ds of propaganda\": dismiss, distort,... 1 \n",
"1 Organize citizens around pro-state messaging. ... 2 \n",
"2 Use or adapt existing narrative themes, where ... 3 \n",
"3 Advance competing narratives connected to same... 4 \n",
"4 Recon/research to identify \"the source of powe... 5 \n",
".. ... .. \n",
"59 continue narrative or message amplification af... 60 \n",
"60 Sell hats, t-shirts, flags and other branded c... 61 \n",
"61 Monitor and evaluate behaviour changes from mi... 62 \n",
"62 Monitor and evaluate message reach in misinfor... 63 \n",
"63 Monitor and evaluate social media engagement i... 64 \n",
"\n",
"[64 rows x 5 columns]"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Generate minimal sqlite database from the Amitt variables\n",
"conn = sql.connect('amittsite.sqlite')\n",
"\n",
"def add_table(dataframe, tablename, columns): \n",
" # Create sql table\n",
" colnames = ', '.join(['{} TEXT NOT NULL'.format(col) for col in columns])\n",
" conn.execute(\"DROP TABLE IF EXISTS {}\".format(tablename))\n",
" conn.execute('''CREATE TABLE {} (id INTEGER PRIMARY KEY AUTOINCREMENT, {});'''.format(tablename, colnames))\n",
" #populate table from dataframe\n",
" newtable = dataframe[columns].copy().applymap(str)\n",
" newtable['id'] = range(1,len(newtable)+1)\n",
" newtable.to_sql(tablename, conn, index=False, if_exists='append')\n",
" conn.commit()\n",
" return newtable\n",
"\n",
"#newtable = add_table(amitt.df_actortypes, 'actor_type', ['amitt_id', 'sector_id', 'framework_id', 'name', 'summary'])\n",
"# counter\n",
"newtable = add_table(amitt.df_counters, 'counter', ['amitt_id', 'tactic_id', 'metatechnique_id', 'name', 'summary'])\n",
"# dataset\n",
"newtable = add_table(amitt.df_frameworks, 'framework', ['amitt_id', 'name', 'summary'])\n",
"# incident\n",
"# metatechnique\n",
"newtable = add_table(amitt.df_metatechniques, 'metatechnique', ['amitt_id', 'name', 'summary'])\n",
"newtable = add_table(amitt.df_phases, 'phase', ['amitt_id', 'name', 'rank', 'summary'])\n",
"# playbook\n",
"# reference\n",
"# response_type\n",
"# sector\n",
"newtable = add_table(amitt.df_tactics, 'tactic', ['amitt_id', 'phase_id', 'name', 'rank', 'summary'])\n",
"newtable = add_table(amitt.df_tasks, 'task', ['amitt_id', 'tactic_id', 'framework_id', 'name', 'summary'])\n",
"newtable = add_table(amitt.df_techniques, 'technique', ['amitt_id', 'tactic_id', 'name', 'summary'])\n",
"# techniques_counters\n",
"\n",
"conn.execute(\"DROP TABLE IF EXISTS {}\".format('user'))\n",
"conn.execute('''CREATE TABLE user (id INTEGER PRIMARY KEY AUTOINCREMENT, username TEXT NOT NULL UNIQUE, password TEXT NOT NULL);''')\n",
"\n",
"conn.close()\n",
"newtable"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"df_phases loaded\n",
"df_techniques loaded\n",
"df_tasks loaded\n",
"df_incidents loaded\n",
"df_counters loaded\n",
"df_detections loaded\n",
"df_actors loaded\n",
"df_resources loaded\n",
"df_responsetypes loaded\n",
"df_metatechniques loaded\n",
"it loaded\n",
"df_tactics loaded\n",
"df_techniques_per_tactic loaded\n",
"df_counters_per_tactic loaded\n",
"phases not loaded\n",
"tactics not loaded\n",
"techniques not loaded\n",
"counters not loaded\n",
"metatechniques not loaded\n",
"actors not loaded\n",
"resources not loaded\n",
"num_tactics not loaded\n",
"cross_counterid_techniqueid loaded\n",
"cross_counterid_resourceid loaded\n",
"cross_counterid_actorid loaded\n"
]
}
],
"source": [
"# Generate full sqlite database from the Amitt variables\n",
"conn = sql.connect('amitt_sqlite.db')\n",
"for tablename, table in vars(amitt).items():\n",
" if type(table) == pd.core.frame.DataFrame:\n",
" table.applymap(str).to_sql(tablename, conn)\n",
" print('{} loaded'.format(tablename))\n",
" else:\n",
" print('{} not loaded'.format(tablename))"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>actor_id</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>C00006</td>\n",
" <td>A033</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>C00008</td>\n",
" <td>A007</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>C00009</td>\n",
" <td>A016</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>C00009</td>\n",
" <td>A006</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>C00010</td>\n",
" <td>A020</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>135</th>\n",
" <td>C00219</td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>136</th>\n",
" <td>C00220</td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>137</th>\n",
" <td>C00221</td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>138</th>\n",
" <td>C00222</td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>139</th>\n",
" <td>C00223</td>\n",
" <td></td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>166 rows × 2 columns</p>\n",
"</div>"
],
"text/plain": [
" id actor_id\n",
"0 C00006 A033\n",
"1 C00008 A007\n",
"2 C00009 A016\n",
"2 C00009 A006\n",
"3 C00010 A020\n",
".. ... ...\n",
"135 C00219 \n",
"136 C00220 \n",
"137 C00221 \n",
"138 C00222 \n",
"139 C00223 \n",
"\n",
"[166 rows x 2 columns]"
]
},
"execution_count": 41,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"amitt.cross_counterid_actorid"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>technique_id</th>\n",
" <th>Weight</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>C00008</td>\n",
" <td>TA01</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>C00008</td>\n",
" <td>TA06</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>C00008</td>\n",
" <td>TA08</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>C00008</td>\n",
" <td>T0006</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>C00008</td>\n",
" <td>T0009</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>134</th>\n",
" <td>C00216</td>\n",
" <td>T0018</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>134</th>\n",
" <td>C00216</td>\n",
" <td>T0057</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>135</th>\n",
" <td>C00219</td>\n",
" <td>T0024</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>135</th>\n",
" <td>C00219</td>\n",
" <td>T0026</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>135</th>\n",
" <td>C00219</td>\n",
" <td>T0025</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>717 rows × 3 columns</p>\n",
"</div>"
],
"text/plain": [
" id technique_id Weight\n",
"1 C00008 TA01 1\n",
"1 C00008 TA06 1\n",
"1 C00008 TA08 1\n",
"1 C00008 T0006 1\n",
"1 C00008 T0009 1\n",
".. ... ... ...\n",
"134 C00216 T0018 1\n",
"134 C00216 T0057 1\n",
"135 C00219 T0024 1\n",
"135 C00219 T0026 1\n",
"135 C00219 T0025 1\n",
"\n",
"[717 rows x 3 columns]"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ct = amitt.cross_counterid_techniqueid\n",
"ct['Weight'] = 1\n",
"ct = ct[ct['technique_id'].str.len() > 0]\n",
"ct.to_csv('../visualisations/cross_counterid_techniqueid.csv', index=False, header=['Source','Target', 'Weight'])\n",
"ct"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# fix the problem with excelfile changes"
]
},
{
"cell_type": "code",
"execution_count": 141,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"<ipython-input-141-d791eafb5aa0>:9: FutureWarning: Passing a negative integer is deprecated in version 1.0 and will not be supported in future version. Instead, use None to not limit the column width.\n",
" pd.set_option('display.max_colwidth', -1)\n"
]
}
],
"source": [
"import pandas as pd\n",
"import sqlite3 as sql\n",
"from generate_amitt_ttps import Amitt\n",
"import pandas as pd\n",
"import numpy as np\n",
"import os\n",
"from sklearn.feature_extraction.text import CountVectorizer\n",
"pd.set_option('display.max_rows', 1000)\n",
"pd.set_option('display.max_colwidth', -1)\n",
"\n",
"newfile = '../AMITT_MASTER_DATA/AMITT_TTPs_MASTER.xlsx'\n",
"oldfile = 'AMITT_TTPs_MASTER_github_version.xlsx'\n",
"\n",
"# Load dfs from file\n",
"newdfs = {}\n",
"newxlsx = pd.ExcelFile(newfile)\n",
"for sheetname in newxlsx.sheet_names:\n",
" newdfs[sheetname] = newxlsx.parse(sheetname)\n",
" newdfs[sheetname].fillna('', inplace=True)\n",
"\n",
"olddfs = {}\n",
"oldxlsx = pd.ExcelFile(oldfile)\n",
"for sheetname in oldxlsx.sheet_names:\n",
" olddfs[sheetname] = oldxlsx.parse(sheetname)\n",
" olddfs[sheetname].fillna('', inplace=True)\n",
"\n",
"addedtables = newdfs.keys() - olddfs.keys()\n",
"losttables = olddfs.keys() - newdfs.keys()\n",
"if len(addedtables) + len(losttables) > 0:\n",
" print('Table changes: new tables are {}, lost tables are {}'.format(addedtables, losttables))\n",
"\n",
"def investigate_table(table):\n",
" print('\\n\\nTable {} is changed'.format(table))\n",
" # Column headings\n",
" coldiffs = set(newdfs[table].columns).symmetric_difference(set(olddfs[table].columns))\n",
" if len(coldiffs) > 0:\n",
" print('column differences: {}'.format(coldiffs))\n",
" # length\n",
" if len(newdfs[table]) != len(olddfs[table]):\n",
" print('length differences: new {} old {}'.format(len(newdfs[table]), len(olddfs[table])))\n",
"\n",
" # column by column\n",
" for column in newdfs[table].columns:\n",
" coldiffs = newdfs[table][column] != olddfs[table][column]\n",
" if len(newdfs[table][coldiffs]) > 0:\n",
" print('Differences in column {}'.format(column))\n",
" return\n",
"\n",
"for table in newdfs.keys():\n",
" if newdfs[table].equals(olddfs[table]) == False:\n",
" investigate_table(table) "
]
},
{
"cell_type": "code",
"execution_count": 142,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>amitt_id</th>\n",
" <th>new</th>\n",
" <th>old</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: [amitt_id, new, old]\n",
"Index: []"
]
},
"execution_count": 142,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Look at individual table differences\n",
"table = 'countermeasures'\n",
"column = 'summary'\n",
"coldiffs = newdfs[table][column] != olddfs[table][column]\n",
"diffcols = pd.DataFrame()\n",
"diffcols['amitt_id'] = newdfs[table][coldiffs]['amitt_id']\n",
"diffcols['new'] = newdfs[table][coldiffs][column]\n",
"diffcols['old'] = olddfs[table][coldiffs][column]\n",
"diffcols[diffcols['old'] != '']"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}