AMITT/HTML_GENERATING_CODE/.ipynb_checkpoints/AMITT_code_testbed-checkpoint.ipynb
2021-06-28 17:27:36 +01:00

612 строки
17 KiB
Plaintext
Исходник Ответственный История

Этот файл содержит неоднозначные символы Юникода

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Test area for AMITT code"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"dict_keys(['df_phases', 'df_frameworks', 'df_techniques', 'df_tasks', 'df_incidents', 'df_counters', 'df_detections', 'df_actortypes', 'df_resources', 'df_responsetypes', 'df_metatechniques', 'it', 'df_tactics', 'df_techniques_per_tactic', 'df_counters_per_tactic', 'phases', 'tactics', 'techniques', 'counters', 'metatechniques', 'actortypes', 'resources', 'num_tactics', 'cross_counterid_techniqueid', 'cross_counterid_resourceid', 'cross_counterid_actortypeid'])\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>amitt_id</th>\n",
" <th>technique_id</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>C00006</td>\n",
" <td>T0007</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>C00006</td>\n",
" <td>T0015</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>C00006</td>\n",
" <td>T0018</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>C00006</td>\n",
" <td>T0043</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>C00006</td>\n",
" <td>T0053</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>135</th>\n",
" <td>C00219</td>\n",
" <td>T0025</td>\n",
" </tr>\n",
" <tr>\n",
" <th>136</th>\n",
" <td>C00220</td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>137</th>\n",
" <td>C00221</td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>138</th>\n",
" <td>C00222</td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>139</th>\n",
" <td>C00223</td>\n",
" <td></td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>898 rows × 2 columns</p>\n",
"</div>"
],
"text/plain": [
" amitt_id technique_id\n",
"0 C00006 T0007\n",
"0 C00006 T0015\n",
"0 C00006 T0018\n",
"0 C00006 T0043\n",
"0 C00006 T0053\n",
".. ... ...\n",
"135 C00219 T0025\n",
"136 C00220 \n",
"137 C00221 \n",
"138 C00222 \n",
"139 C00223 \n",
"\n",
"[898 rows x 2 columns]"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"import sqlite3 as sql\n",
"from generate_amitt_ttps import Amitt\n",
"\n",
"\n",
"# Generate AMITT datasets\n",
"amitt = Amitt()\n",
"\n",
"# Check which amitt variables we can see from here\n",
"print('{}'.format(vars(amitt).keys()))\n",
"vars(amitt)['cross_counterid_techniqueid']"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"df_phases loaded\n",
"df_techniques loaded\n",
"df_tasks loaded\n",
"df_incidents loaded\n",
"df_counters loaded\n",
"df_detections loaded\n",
"df_actors loaded\n",
"df_resources loaded\n",
"df_responsetypes loaded\n",
"df_metatechniques loaded\n",
"it loaded\n",
"df_tactics loaded\n",
"df_techniques_per_tactic loaded\n",
"df_counters_per_tactic loaded\n",
"phases not loaded\n",
"tactics not loaded\n",
"techniques not loaded\n",
"counters not loaded\n",
"metatechniques not loaded\n",
"actors not loaded\n",
"resources not loaded\n",
"num_tactics not loaded\n",
"cross_counterid_techniqueid loaded\n",
"cross_counterid_resourceid loaded\n",
"cross_counterid_actorid loaded\n"
]
}
],
"source": [
"# Generate full sqlite database from the Amitt variables\n",
"conn = sql.connect('amitt_sqlite.db')\n",
"for tablename, table in vars(amitt).items():\n",
" if type(table) == pd.core.frame.DataFrame:\n",
" table.applymap(str).to_sql(tablename, conn)\n",
" print('{} loaded'.format(tablename))\n",
" else:\n",
" print('{} not loaded'.format(tablename))"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>actor_id</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>C00006</td>\n",
" <td>A033</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>C00008</td>\n",
" <td>A007</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>C00009</td>\n",
" <td>A016</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>C00009</td>\n",
" <td>A006</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>C00010</td>\n",
" <td>A020</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>135</th>\n",
" <td>C00219</td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>136</th>\n",
" <td>C00220</td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>137</th>\n",
" <td>C00221</td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>138</th>\n",
" <td>C00222</td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>139</th>\n",
" <td>C00223</td>\n",
" <td></td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>166 rows × 2 columns</p>\n",
"</div>"
],
"text/plain": [
" id actor_id\n",
"0 C00006 A033\n",
"1 C00008 A007\n",
"2 C00009 A016\n",
"2 C00009 A006\n",
"3 C00010 A020\n",
".. ... ...\n",
"135 C00219 \n",
"136 C00220 \n",
"137 C00221 \n",
"138 C00222 \n",
"139 C00223 \n",
"\n",
"[166 rows x 2 columns]"
]
},
"execution_count": 41,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"amitt.cross_counterid_actorid"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>technique_id</th>\n",
" <th>Weight</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>C00008</td>\n",
" <td>TA01</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>C00008</td>\n",
" <td>TA06</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>C00008</td>\n",
" <td>TA08</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>C00008</td>\n",
" <td>T0006</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>C00008</td>\n",
" <td>T0009</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>134</th>\n",
" <td>C00216</td>\n",
" <td>T0018</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>134</th>\n",
" <td>C00216</td>\n",
" <td>T0057</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>135</th>\n",
" <td>C00219</td>\n",
" <td>T0024</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>135</th>\n",
" <td>C00219</td>\n",
" <td>T0026</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>135</th>\n",
" <td>C00219</td>\n",
" <td>T0025</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>717 rows × 3 columns</p>\n",
"</div>"
],
"text/plain": [
" id technique_id Weight\n",
"1 C00008 TA01 1\n",
"1 C00008 TA06 1\n",
"1 C00008 TA08 1\n",
"1 C00008 T0006 1\n",
"1 C00008 T0009 1\n",
".. ... ... ...\n",
"134 C00216 T0018 1\n",
"134 C00216 T0057 1\n",
"135 C00219 T0024 1\n",
"135 C00219 T0026 1\n",
"135 C00219 T0025 1\n",
"\n",
"[717 rows x 3 columns]"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ct = amitt.cross_counterid_techniqueid\n",
"ct['Weight'] = 1\n",
"ct = ct[ct['technique_id'].str.len() > 0]\n",
"ct.to_csv('../visualisations/cross_counterid_techniqueid.csv', index=False, header=['Source','Target', 'Weight'])\n",
"ct"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# fix the problem with excelfile changes\n",
"\n",
"Background: AMITT's master dataset is in an excelfile. Changes in this dont' show up in github, so it's difficult to tell what's changed between versions. Code below checks for those differences - use this repeatedly until versions align. "
]
},
{
"cell_type": "code",
"execution_count": 141,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"<ipython-input-141-d791eafb5aa0>:9: FutureWarning: Passing a negative integer is deprecated in version 1.0 and will not be supported in future version. Instead, use None to not limit the column width.\n",
" pd.set_option('display.max_colwidth', -1)\n"
]
}
],
"source": [
"import pandas as pd\n",
"import sqlite3 as sql\n",
"from generate_amitt_ttps import Amitt\n",
"import pandas as pd\n",
"import numpy as np\n",
"import os\n",
"from sklearn.feature_extraction.text import CountVectorizer\n",
"pd.set_option('display.max_rows', 1000)\n",
"pd.set_option('display.max_colwidth', -1)\n",
"\n",
"newfile = '../AMITT_MASTER_DATA/AMITT_TTPs_MASTER.xlsx'\n",
"oldfile = 'AMITT_TTPs_MASTER_github_version.xlsx'\n",
"\n",
"# Load dfs from file\n",
"newdfs = {}\n",
"newxlsx = pd.ExcelFile(newfile)\n",
"for sheetname in newxlsx.sheet_names:\n",
" newdfs[sheetname] = newxlsx.parse(sheetname)\n",
" newdfs[sheetname].fillna('', inplace=True)\n",
"\n",
"olddfs = {}\n",
"oldxlsx = pd.ExcelFile(oldfile)\n",
"for sheetname in oldxlsx.sheet_names:\n",
" olddfs[sheetname] = oldxlsx.parse(sheetname)\n",
" olddfs[sheetname].fillna('', inplace=True)\n",
"\n",
"addedtables = newdfs.keys() - olddfs.keys()\n",
"losttables = olddfs.keys() - newdfs.keys()\n",
"if len(addedtables) + len(losttables) > 0:\n",
" print('Table changes: new tables are {}, lost tables are {}'.format(addedtables, losttables))\n",
"\n",
"def investigate_table(table):\n",
" print('\\n\\nTable {} is changed'.format(table))\n",
" # Column headings\n",
" coldiffs = set(newdfs[table].columns).symmetric_difference(set(olddfs[table].columns))\n",
" if len(coldiffs) > 0:\n",
" print('column differences: {}'.format(coldiffs))\n",
" # length\n",
" if len(newdfs[table]) != len(olddfs[table]):\n",
" print('length differences: new {} old {}'.format(len(newdfs[table]), len(olddfs[table])))\n",
"\n",
" # column by column\n",
" for column in newdfs[table].columns:\n",
" coldiffs = newdfs[table][column] != olddfs[table][column]\n",
" if len(newdfs[table][coldiffs]) > 0:\n",
" print('Differences in column {}'.format(column))\n",
" return\n",
"\n",
"for table in newdfs.keys():\n",
" if newdfs[table].equals(olddfs[table]) == False:\n",
" investigate_table(table) "
]
},
{
"cell_type": "code",
"execution_count": 142,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>amitt_id</th>\n",
" <th>new</th>\n",
" <th>old</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: [amitt_id, new, old]\n",
"Index: []"
]
},
"execution_count": 142,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Look at individual table differences\n",
"table = 'countermeasures'\n",
"column = 'summary'\n",
"coldiffs = newdfs[table][column] != olddfs[table][column]\n",
"diffcols = pd.DataFrame()\n",
"diffcols['amitt_id'] = newdfs[table][coldiffs]['amitt_id']\n",
"diffcols['new'] = newdfs[table][coldiffs][column]\n",
"diffcols['old'] = olddfs[table][coldiffs][column]\n",
"diffcols[diffcols['old'] != '']"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}