AMITT/HTML_GENERATING_CODE/.ipynb_checkpoints/AMITT_code_testbed-checkpoint.ipynb

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Test area for AMITT code"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "dict_keys(['df_phases', 'df_frameworks', 'df_techniques', 'df_tasks', 'df_incidents', 'df_counters', 'df_detections', 'df_actortypes', 'df_resources', 'df_responsetypes', 'df_metatechniques', 'it', 'df_tactics', 'df_techniques_per_tactic', 'df_counters_per_tactic', 'phases', 'tactics', 'techniques', 'counters', 'metatechniques', 'actortypes', 'resources', 'num_tactics', 'cross_counterid_techniqueid', 'cross_counterid_resourceid', 'cross_counterid_actortypeid'])\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>amitt_id</th>\n",
       "      <th>technique_id</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>C00006</td>\n",
       "      <td>T0007</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>C00006</td>\n",
       "      <td>T0015</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>C00006</td>\n",
       "      <td>T0018</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>C00006</td>\n",
       "      <td>T0043</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>C00006</td>\n",
       "      <td>T0053</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>135</th>\n",
       "      <td>C00219</td>\n",
       "      <td>T0025</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>136</th>\n",
       "      <td>C00220</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>137</th>\n",
       "      <td>C00221</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>138</th>\n",
       "      <td>C00222</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>139</th>\n",
       "      <td>C00223</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>898 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "    amitt_id technique_id\n",
       "0     C00006        T0007\n",
       "0     C00006        T0015\n",
       "0     C00006        T0018\n",
       "0     C00006        T0043\n",
       "0     C00006        T0053\n",
       "..       ...          ...\n",
       "135   C00219        T0025\n",
       "136   C00220             \n",
       "137   C00221             \n",
       "138   C00222             \n",
       "139   C00223             \n",
       "\n",
       "[898 rows x 2 columns]"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import sqlite3 as sql\n",
    "from generate_amitt_ttps import Amitt\n",
    "\n",
    "\n",
    "# Generate AMITT datasets\n",
    "amitt = Amitt()\n",
    "\n",
    "# Check which amitt variables we can see from here\n",
    "print('{}'.format(vars(amitt).keys()))\n",
    "vars(amitt)['cross_counterid_techniqueid']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "df_phases loaded\n",
      "df_techniques loaded\n",
      "df_tasks loaded\n",
      "df_incidents loaded\n",
      "df_counters loaded\n",
      "df_detections loaded\n",
      "df_actors loaded\n",
      "df_resources loaded\n",
      "df_responsetypes loaded\n",
      "df_metatechniques loaded\n",
      "it loaded\n",
      "df_tactics loaded\n",
      "df_techniques_per_tactic loaded\n",
      "df_counters_per_tactic loaded\n",
      "phases not loaded\n",
      "tactics not loaded\n",
      "techniques not loaded\n",
      "counters not loaded\n",
      "metatechniques not loaded\n",
      "actors not loaded\n",
      "resources not loaded\n",
      "num_tactics not loaded\n",
      "cross_counterid_techniqueid loaded\n",
      "cross_counterid_resourceid loaded\n",
      "cross_counterid_actorid loaded\n"
     ]
    }
   ],
   "source": [
    "# Generate full sqlite database from the Amitt variables\n",
    "conn = sql.connect('amitt_sqlite.db')\n",
    "for tablename, table in vars(amitt).items():\n",
    "    if type(table) == pd.core.frame.DataFrame:\n",
    "        table.applymap(str).to_sql(tablename, conn)\n",
    "        print('{} loaded'.format(tablename))\n",
    "    else:\n",
    "        print('{} not loaded'.format(tablename))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>actor_id</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>C00006</td>\n",
       "      <td>A033</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>C00008</td>\n",
       "      <td>A007</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>C00009</td>\n",
       "      <td>A016</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>C00009</td>\n",
       "      <td>A006</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>C00010</td>\n",
       "      <td>A020</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>135</th>\n",
       "      <td>C00219</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>136</th>\n",
       "      <td>C00220</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>137</th>\n",
       "      <td>C00221</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>138</th>\n",
       "      <td>C00222</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>139</th>\n",
       "      <td>C00223</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>166 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "         id actor_id\n",
       "0    C00006     A033\n",
       "1    C00008     A007\n",
       "2    C00009     A016\n",
       "2    C00009     A006\n",
       "3    C00010     A020\n",
       "..      ...      ...\n",
       "135  C00219         \n",
       "136  C00220         \n",
       "137  C00221         \n",
       "138  C00222         \n",
       "139  C00223         \n",
       "\n",
       "[166 rows x 2 columns]"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "amitt.cross_counterid_actorid"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>technique_id</th>\n",
       "      <th>Weight</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>C00008</td>\n",
       "      <td>TA01</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>C00008</td>\n",
       "      <td>TA06</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>C00008</td>\n",
       "      <td>TA08</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>C00008</td>\n",
       "      <td>T0006</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>C00008</td>\n",
       "      <td>T0009</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>134</th>\n",
       "      <td>C00216</td>\n",
       "      <td>T0018</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>134</th>\n",
       "      <td>C00216</td>\n",
       "      <td>T0057</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>135</th>\n",
       "      <td>C00219</td>\n",
       "      <td>T0024</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>135</th>\n",
       "      <td>C00219</td>\n",
       "      <td>T0026</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>135</th>\n",
       "      <td>C00219</td>\n",
       "      <td>T0025</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>717 rows × 3 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "         id technique_id  Weight\n",
       "1    C00008         TA01       1\n",
       "1    C00008         TA06       1\n",
       "1    C00008         TA08       1\n",
       "1    C00008        T0006       1\n",
       "1    C00008        T0009       1\n",
       "..      ...          ...     ...\n",
       "134  C00216        T0018       1\n",
       "134  C00216        T0057       1\n",
       "135  C00219        T0024       1\n",
       "135  C00219        T0026       1\n",
       "135  C00219        T0025       1\n",
       "\n",
       "[717 rows x 3 columns]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ct = amitt.cross_counterid_techniqueid\n",
    "ct['Weight'] = 1\n",
    "ct = ct[ct['technique_id'].str.len() > 0]\n",
    "ct.to_csv('../visualisations/cross_counterid_techniqueid.csv', index=False, header=['Source','Target', 'Weight'])\n",
    "ct"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# fix the problem with excelfile changes\n",
    "\n",
    "Background: AMITT's master dataset is in an excelfile. Changes in this dont' show up in github, so it's difficult to tell what's changed between versions.  Code below checks for those differences - use this repeatedly until versions align. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 141,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "<ipython-input-141-d791eafb5aa0>:9: FutureWarning: Passing a negative integer is deprecated in version 1.0 and will not be supported in future version. Instead, use None to not limit the column width.\n",
      "  pd.set_option('display.max_colwidth', -1)\n"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import sqlite3 as sql\n",
    "from generate_amitt_ttps import Amitt\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import os\n",
    "from sklearn.feature_extraction.text import CountVectorizer\n",
    "pd.set_option('display.max_rows', 1000)\n",
    "pd.set_option('display.max_colwidth', -1)\n",
    "\n",
    "newfile = '../AMITT_MASTER_DATA/AMITT_TTPs_MASTER.xlsx'\n",
    "oldfile = 'AMITT_TTPs_MASTER_github_version.xlsx'\n",
    "\n",
    "# Load dfs from file\n",
    "newdfs = {}\n",
    "newxlsx = pd.ExcelFile(newfile)\n",
    "for sheetname in newxlsx.sheet_names:\n",
    "    newdfs[sheetname] = newxlsx.parse(sheetname)\n",
    "    newdfs[sheetname].fillna('', inplace=True)\n",
    "\n",
    "olddfs = {}\n",
    "oldxlsx = pd.ExcelFile(oldfile)\n",
    "for sheetname in oldxlsx.sheet_names:\n",
    "    olddfs[sheetname] = oldxlsx.parse(sheetname)\n",
    "    olddfs[sheetname].fillna('', inplace=True)\n",
    "\n",
    "addedtables = newdfs.keys() - olddfs.keys()\n",
    "losttables = olddfs.keys() - newdfs.keys()\n",
    "if len(addedtables) + len(losttables) > 0:\n",
    "    print('Table changes: new tables are {}, lost tables are {}'.format(addedtables, losttables))\n",
    "\n",
    "def investigate_table(table):\n",
    "    print('\\n\\nTable {} is changed'.format(table))\n",
    "    # Column headings\n",
    "    coldiffs = set(newdfs[table].columns).symmetric_difference(set(olddfs[table].columns))\n",
    "    if len(coldiffs) > 0:\n",
    "        print('column differences: {}'.format(coldiffs))\n",
    "    # length\n",
    "    if len(newdfs[table]) != len(olddfs[table]):\n",
    "        print('length differences: new {} old {}'.format(len(newdfs[table]), len(olddfs[table])))\n",
    "\n",
    "    # column by column\n",
    "    for column in newdfs[table].columns:\n",
    "        coldiffs = newdfs[table][column] != olddfs[table][column]\n",
    "        if len(newdfs[table][coldiffs]) > 0:\n",
    "            print('Differences in column {}'.format(column))\n",
    "    return\n",
    "\n",
    "for table in newdfs.keys():\n",
    "    if newdfs[table].equals(olddfs[table]) == False:\n",
    "        investigate_table(table)        "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 142,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>amitt_id</th>\n",
       "      <th>new</th>\n",
       "      <th>old</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "Empty DataFrame\n",
       "Columns: [amitt_id, new, old]\n",
       "Index: []"
      ]
     },
     "execution_count": 142,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Look at individual table differences\n",
    "table = 'countermeasures'\n",
    "column = 'summary'\n",
    "coldiffs = newdfs[table][column] != olddfs[table][column]\n",
    "diffcols = pd.DataFrame()\n",
    "diffcols['amitt_id'] = newdfs[table][coldiffs]['amitt_id']\n",
    "diffcols['new'] = newdfs[table][coldiffs][column]\n",
    "diffcols['old'] = olddfs[table][coldiffs][column]\n",
    "diffcols[diffcols['old'] != '']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}