612 строки
17 KiB
Plaintext
612 строки
17 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Test area for AMITT code"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 2,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"dict_keys(['df_phases', 'df_frameworks', 'df_techniques', 'df_tasks', 'df_incidents', 'df_counters', 'df_detections', 'df_actortypes', 'df_resources', 'df_responsetypes', 'df_metatechniques', 'it', 'df_tactics', 'df_techniques_per_tactic', 'df_counters_per_tactic', 'phases', 'tactics', 'techniques', 'counters', 'metatechniques', 'actortypes', 'resources', 'num_tactics', 'cross_counterid_techniqueid', 'cross_counterid_resourceid', 'cross_counterid_actortypeid'])\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>amitt_id</th>\n",
|
||
" <th>technique_id</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>C00006</td>\n",
|
||
" <td>T0007</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>C00006</td>\n",
|
||
" <td>T0015</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>C00006</td>\n",
|
||
" <td>T0018</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>C00006</td>\n",
|
||
" <td>T0043</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>C00006</td>\n",
|
||
" <td>T0053</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>135</th>\n",
|
||
" <td>C00219</td>\n",
|
||
" <td>T0025</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>136</th>\n",
|
||
" <td>C00220</td>\n",
|
||
" <td></td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>137</th>\n",
|
||
" <td>C00221</td>\n",
|
||
" <td></td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>138</th>\n",
|
||
" <td>C00222</td>\n",
|
||
" <td></td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>139</th>\n",
|
||
" <td>C00223</td>\n",
|
||
" <td></td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>898 rows × 2 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" amitt_id technique_id\n",
|
||
"0 C00006 T0007\n",
|
||
"0 C00006 T0015\n",
|
||
"0 C00006 T0018\n",
|
||
"0 C00006 T0043\n",
|
||
"0 C00006 T0053\n",
|
||
".. ... ...\n",
|
||
"135 C00219 T0025\n",
|
||
"136 C00220 \n",
|
||
"137 C00221 \n",
|
||
"138 C00222 \n",
|
||
"139 C00223 \n",
|
||
"\n",
|
||
"[898 rows x 2 columns]"
|
||
]
|
||
},
|
||
"execution_count": 2,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"import pandas as pd\n",
|
||
"import sqlite3 as sql\n",
|
||
"from generate_amitt_ttps import Amitt\n",
|
||
"\n",
|
||
"\n",
|
||
"# Generate AMITT datasets\n",
|
||
"amitt = Amitt()\n",
|
||
"\n",
|
||
"# Check which amitt variables we can see from here\n",
|
||
"print('{}'.format(vars(amitt).keys()))\n",
|
||
"vars(amitt)['cross_counterid_techniqueid']"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 26,
|
||
"metadata": {
|
||
"scrolled": true
|
||
},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"df_phases loaded\n",
|
||
"df_techniques loaded\n",
|
||
"df_tasks loaded\n",
|
||
"df_incidents loaded\n",
|
||
"df_counters loaded\n",
|
||
"df_detections loaded\n",
|
||
"df_actors loaded\n",
|
||
"df_resources loaded\n",
|
||
"df_responsetypes loaded\n",
|
||
"df_metatechniques loaded\n",
|
||
"it loaded\n",
|
||
"df_tactics loaded\n",
|
||
"df_techniques_per_tactic loaded\n",
|
||
"df_counters_per_tactic loaded\n",
|
||
"phases not loaded\n",
|
||
"tactics not loaded\n",
|
||
"techniques not loaded\n",
|
||
"counters not loaded\n",
|
||
"metatechniques not loaded\n",
|
||
"actors not loaded\n",
|
||
"resources not loaded\n",
|
||
"num_tactics not loaded\n",
|
||
"cross_counterid_techniqueid loaded\n",
|
||
"cross_counterid_resourceid loaded\n",
|
||
"cross_counterid_actorid loaded\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# Generate full sqlite database from the Amitt variables\n",
|
||
"conn = sql.connect('amitt_sqlite.db')\n",
|
||
"for tablename, table in vars(amitt).items():\n",
|
||
" if type(table) == pd.core.frame.DataFrame:\n",
|
||
" table.applymap(str).to_sql(tablename, conn)\n",
|
||
" print('{} loaded'.format(tablename))\n",
|
||
" else:\n",
|
||
" print('{} not loaded'.format(tablename))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 41,
|
||
"metadata": {
|
||
"scrolled": true
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>actor_id</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>C00006</td>\n",
|
||
" <td>A033</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>C00008</td>\n",
|
||
" <td>A007</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>C00009</td>\n",
|
||
" <td>A016</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>C00009</td>\n",
|
||
" <td>A006</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>C00010</td>\n",
|
||
" <td>A020</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>135</th>\n",
|
||
" <td>C00219</td>\n",
|
||
" <td></td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>136</th>\n",
|
||
" <td>C00220</td>\n",
|
||
" <td></td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>137</th>\n",
|
||
" <td>C00221</td>\n",
|
||
" <td></td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>138</th>\n",
|
||
" <td>C00222</td>\n",
|
||
" <td></td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>139</th>\n",
|
||
" <td>C00223</td>\n",
|
||
" <td></td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>166 rows × 2 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id actor_id\n",
|
||
"0 C00006 A033\n",
|
||
"1 C00008 A007\n",
|
||
"2 C00009 A016\n",
|
||
"2 C00009 A006\n",
|
||
"3 C00010 A020\n",
|
||
".. ... ...\n",
|
||
"135 C00219 \n",
|
||
"136 C00220 \n",
|
||
"137 C00221 \n",
|
||
"138 C00222 \n",
|
||
"139 C00223 \n",
|
||
"\n",
|
||
"[166 rows x 2 columns]"
|
||
]
|
||
},
|
||
"execution_count": 41,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"amitt.cross_counterid_actorid"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 3,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>technique_id</th>\n",
|
||
" <th>Weight</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>C00008</td>\n",
|
||
" <td>TA01</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>C00008</td>\n",
|
||
" <td>TA06</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>C00008</td>\n",
|
||
" <td>TA08</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>C00008</td>\n",
|
||
" <td>T0006</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>C00008</td>\n",
|
||
" <td>T0009</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>134</th>\n",
|
||
" <td>C00216</td>\n",
|
||
" <td>T0018</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>134</th>\n",
|
||
" <td>C00216</td>\n",
|
||
" <td>T0057</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>135</th>\n",
|
||
" <td>C00219</td>\n",
|
||
" <td>T0024</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>135</th>\n",
|
||
" <td>C00219</td>\n",
|
||
" <td>T0026</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>135</th>\n",
|
||
" <td>C00219</td>\n",
|
||
" <td>T0025</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>717 rows × 3 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id technique_id Weight\n",
|
||
"1 C00008 TA01 1\n",
|
||
"1 C00008 TA06 1\n",
|
||
"1 C00008 TA08 1\n",
|
||
"1 C00008 T0006 1\n",
|
||
"1 C00008 T0009 1\n",
|
||
".. ... ... ...\n",
|
||
"134 C00216 T0018 1\n",
|
||
"134 C00216 T0057 1\n",
|
||
"135 C00219 T0024 1\n",
|
||
"135 C00219 T0026 1\n",
|
||
"135 C00219 T0025 1\n",
|
||
"\n",
|
||
"[717 rows x 3 columns]"
|
||
]
|
||
},
|
||
"execution_count": 3,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"ct = amitt.cross_counterid_techniqueid\n",
|
||
"ct['Weight'] = 1\n",
|
||
"ct = ct[ct['technique_id'].str.len() > 0]\n",
|
||
"ct.to_csv('../visualisations/cross_counterid_techniqueid.csv', index=False, header=['Source','Target', 'Weight'])\n",
|
||
"ct"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# fix the problem with excelfile changes\n",
|
||
"\n",
|
||
"Background: AMITT's master dataset is in an excelfile. Changes in this dont' show up in github, so it's difficult to tell what's changed between versions. Code below checks for those differences - use this repeatedly until versions align. "
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 141,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"<ipython-input-141-d791eafb5aa0>:9: FutureWarning: Passing a negative integer is deprecated in version 1.0 and will not be supported in future version. Instead, use None to not limit the column width.\n",
|
||
" pd.set_option('display.max_colwidth', -1)\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"import pandas as pd\n",
|
||
"import sqlite3 as sql\n",
|
||
"from generate_amitt_ttps import Amitt\n",
|
||
"import pandas as pd\n",
|
||
"import numpy as np\n",
|
||
"import os\n",
|
||
"from sklearn.feature_extraction.text import CountVectorizer\n",
|
||
"pd.set_option('display.max_rows', 1000)\n",
|
||
"pd.set_option('display.max_colwidth', -1)\n",
|
||
"\n",
|
||
"newfile = '../AMITT_MASTER_DATA/AMITT_TTPs_MASTER.xlsx'\n",
|
||
"oldfile = 'AMITT_TTPs_MASTER_github_version.xlsx'\n",
|
||
"\n",
|
||
"# Load dfs from file\n",
|
||
"newdfs = {}\n",
|
||
"newxlsx = pd.ExcelFile(newfile)\n",
|
||
"for sheetname in newxlsx.sheet_names:\n",
|
||
" newdfs[sheetname] = newxlsx.parse(sheetname)\n",
|
||
" newdfs[sheetname].fillna('', inplace=True)\n",
|
||
"\n",
|
||
"olddfs = {}\n",
|
||
"oldxlsx = pd.ExcelFile(oldfile)\n",
|
||
"for sheetname in oldxlsx.sheet_names:\n",
|
||
" olddfs[sheetname] = oldxlsx.parse(sheetname)\n",
|
||
" olddfs[sheetname].fillna('', inplace=True)\n",
|
||
"\n",
|
||
"addedtables = newdfs.keys() - olddfs.keys()\n",
|
||
"losttables = olddfs.keys() - newdfs.keys()\n",
|
||
"if len(addedtables) + len(losttables) > 0:\n",
|
||
" print('Table changes: new tables are {}, lost tables are {}'.format(addedtables, losttables))\n",
|
||
"\n",
|
||
"def investigate_table(table):\n",
|
||
" print('\\n\\nTable {} is changed'.format(table))\n",
|
||
" # Column headings\n",
|
||
" coldiffs = set(newdfs[table].columns).symmetric_difference(set(olddfs[table].columns))\n",
|
||
" if len(coldiffs) > 0:\n",
|
||
" print('column differences: {}'.format(coldiffs))\n",
|
||
" # length\n",
|
||
" if len(newdfs[table]) != len(olddfs[table]):\n",
|
||
" print('length differences: new {} old {}'.format(len(newdfs[table]), len(olddfs[table])))\n",
|
||
"\n",
|
||
" # column by column\n",
|
||
" for column in newdfs[table].columns:\n",
|
||
" coldiffs = newdfs[table][column] != olddfs[table][column]\n",
|
||
" if len(newdfs[table][coldiffs]) > 0:\n",
|
||
" print('Differences in column {}'.format(column))\n",
|
||
" return\n",
|
||
"\n",
|
||
"for table in newdfs.keys():\n",
|
||
" if newdfs[table].equals(olddfs[table]) == False:\n",
|
||
" investigate_table(table) "
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 142,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>amitt_id</th>\n",
|
||
" <th>new</th>\n",
|
||
" <th>old</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
"Empty DataFrame\n",
|
||
"Columns: [amitt_id, new, old]\n",
|
||
"Index: []"
|
||
]
|
||
},
|
||
"execution_count": 142,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# Look at individual table differences\n",
|
||
"table = 'countermeasures'\n",
|
||
"column = 'summary'\n",
|
||
"coldiffs = newdfs[table][column] != olddfs[table][column]\n",
|
||
"diffcols = pd.DataFrame()\n",
|
||
"diffcols['amitt_id'] = newdfs[table][coldiffs]['amitt_id']\n",
|
||
"diffcols['new'] = newdfs[table][coldiffs][column]\n",
|
||
"diffcols['old'] = olddfs[table][coldiffs][column]\n",
|
||
"diffcols[diffcols['old'] != '']"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 3",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.8.3"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 4
|
||
}
|