AMITT/HTML_GENERATING_CODE/.ipynb_checkpoints/AMITT_code_testbed-checkpoint.ipynb

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Test area for AMITT code"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "dict_keys(['df_phases', 'df_frameworks', 'df_techniques', 'df_tasks', 'df_incidents', 'df_counters', 'df_detections', 'df_actortypes', 'df_resources', 'df_responsetypes', 'df_metatechniques', 'it', 'df_tactics', 'df_techniques_per_tactic', 'df_counters_per_tactic', 'phases', 'tactics', 'techniques', 'counters', 'metatechniques', 'actortypes', 'resources', 'num_tactics', 'cross_counterid_techniqueid', 'cross_counterid_resourceid', 'cross_counterid_actortypeid'])\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>amitt_id</th>\n",
       "      <th>technique_id</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>C00006</td>\n",
       "      <td>T0007</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>C00006</td>\n",
       "      <td>T0015</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>C00006</td>\n",
       "      <td>T0018</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>C00006</td>\n",
       "      <td>T0043</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>C00006</td>\n",
       "      <td>T0053</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>135</th>\n",
       "      <td>C00219</td>\n",
       "      <td>T0025</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>136</th>\n",
       "      <td>C00220</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>137</th>\n",
       "      <td>C00221</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>138</th>\n",
       "      <td>C00222</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>139</th>\n",
       "      <td>C00223</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>898 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "    amitt_id technique_id\n",
       "0     C00006        T0007\n",
       "0     C00006        T0015\n",
       "0     C00006        T0018\n",
       "0     C00006        T0043\n",
       "0     C00006        T0053\n",
       "..       ...          ...\n",
       "135   C00219        T0025\n",
       "136   C00220             \n",
       "137   C00221             \n",
       "138   C00222             \n",
       "139   C00223             \n",
       "\n",
       "[898 rows x 2 columns]"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import sqlite3 as sql\n",
    "from generate_amitt_ttps import Amitt\n",
    "\n",
    "\n",
    "# Generate AMITT datasets\n",
    "amitt = Amitt()\n",
    "\n",
    "# Check which amitt variables we can see from here\n",
    "print('{}'.format(vars(amitt).keys()))\n",
    "vars(amitt)['cross_counterid_techniqueid']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>amitt_id</th>\n",
       "      <th>technique_id</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>C00006</td>\n",
       "      <td>T0007</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>C00006</td>\n",
       "      <td>T0015</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>C00006</td>\n",
       "      <td>T0018</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>C00006</td>\n",
       "      <td>T0043</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>C00006</td>\n",
       "      <td>T0053</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>135</th>\n",
       "      <td>C00219</td>\n",
       "      <td>T0025</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>136</th>\n",
       "      <td>C00220</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>137</th>\n",
       "      <td>C00221</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>138</th>\n",
       "      <td>C00222</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>139</th>\n",
       "      <td>C00223</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>898 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "    amitt_id technique_id\n",
       "0     C00006        T0007\n",
       "0     C00006        T0015\n",
       "0     C00006        T0018\n",
       "0     C00006        T0043\n",
       "0     C00006        T0053\n",
       "..       ...          ...\n",
       "135   C00219        T0025\n",
       "136   C00220             \n",
       "137   C00221             \n",
       "138   C00222             \n",
       "139   C00223             \n",
       "\n",
       "[898 rows x 2 columns]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "amitt.cross_counterid_techniqueid"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>amitt_id</th>\n",
       "      <th>tactic_id</th>\n",
       "      <th>name</th>\n",
       "      <th>summary</th>\n",
       "      <th>id</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>T0001</td>\n",
       "      <td>TA01</td>\n",
       "      <td>5Ds (dismiss, distort, distract, dismay, divide)</td>\n",
       "      <td>Nimmo's \"4Ds of propaganda\": dismiss, distort,...</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>T0002</td>\n",
       "      <td>TA01</td>\n",
       "      <td>Facilitate State Propaganda</td>\n",
       "      <td>Organize citizens around pro-state messaging. ...</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>T0003</td>\n",
       "      <td>TA01</td>\n",
       "      <td>Leverage Existing Narratives</td>\n",
       "      <td>Use or adapt existing narrative themes, where ...</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>T0004</td>\n",
       "      <td>TA01</td>\n",
       "      <td>Competing Narratives</td>\n",
       "      <td>Advance competing narratives connected to same...</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>T0005</td>\n",
       "      <td>TA02</td>\n",
       "      <td>Center of Gravity Analysis</td>\n",
       "      <td>Recon/research to identify \"the source of powe...</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>59</th>\n",
       "      <td>T0060</td>\n",
       "      <td>TA11</td>\n",
       "      <td>Continue to amplify</td>\n",
       "      <td>continue narrative or message amplification af...</td>\n",
       "      <td>60</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>60</th>\n",
       "      <td>T0061</td>\n",
       "      <td>TA10</td>\n",
       "      <td>Sell merchandising</td>\n",
       "      <td>Sell hats, t-shirts, flags and other branded c...</td>\n",
       "      <td>61</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>61</th>\n",
       "      <td>T0062</td>\n",
       "      <td>TA12</td>\n",
       "      <td>Behaviour changes</td>\n",
       "      <td>Monitor and evaluate behaviour changes from mi...</td>\n",
       "      <td>62</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>62</th>\n",
       "      <td>T0063</td>\n",
       "      <td>TA12</td>\n",
       "      <td>Message reach</td>\n",
       "      <td>Monitor and evaluate message reach in misinfor...</td>\n",
       "      <td>63</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>63</th>\n",
       "      <td>T0064</td>\n",
       "      <td>TA12</td>\n",
       "      <td>Social media engagement</td>\n",
       "      <td>Monitor and evaluate social media engagement i...</td>\n",
       "      <td>64</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>64 rows × 5 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   amitt_id tactic_id                                              name  \\\n",
       "0     T0001      TA01  5Ds (dismiss, distort, distract, dismay, divide)   \n",
       "1     T0002      TA01                       Facilitate State Propaganda   \n",
       "2     T0003      TA01                      Leverage Existing Narratives   \n",
       "3     T0004      TA01                              Competing Narratives   \n",
       "4     T0005      TA02                        Center of Gravity Analysis   \n",
       "..      ...       ...                                               ...   \n",
       "59    T0060      TA11                               Continue to amplify   \n",
       "60    T0061      TA10                                Sell merchandising   \n",
       "61    T0062      TA12                                 Behaviour changes   \n",
       "62    T0063      TA12                                     Message reach   \n",
       "63    T0064      TA12                           Social media engagement   \n",
       "\n",
       "                                              summary  id  \n",
       "0   Nimmo's \"4Ds of propaganda\": dismiss, distort,...   1  \n",
       "1   Organize citizens around pro-state messaging. ...   2  \n",
       "2   Use or adapt existing narrative themes, where ...   3  \n",
       "3   Advance competing narratives connected to same...   4  \n",
       "4   Recon/research to identify \"the source of powe...   5  \n",
       "..                                                ...  ..  \n",
       "59  continue narrative or message amplification af...  60  \n",
       "60  Sell hats, t-shirts, flags and other branded c...  61  \n",
       "61  Monitor and evaluate behaviour changes from mi...  62  \n",
       "62  Monitor and evaluate message reach in misinfor...  63  \n",
       "63  Monitor and evaluate social media engagement i...  64  \n",
       "\n",
       "[64 rows x 5 columns]"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Generate minimal sqlite database from the Amitt variables\n",
    "conn = sql.connect('amittsite.sqlite')\n",
    "\n",
    "def add_table(dataframe, tablename, columns):    \n",
    "    # Create sql table\n",
    "    colnames = ', '.join(['{} TEXT NOT NULL'.format(col) for col in columns])\n",
    "    conn.execute(\"DROP TABLE IF EXISTS {}\".format(tablename))\n",
    "    conn.execute('''CREATE TABLE {} (id INTEGER PRIMARY KEY AUTOINCREMENT, {});'''.format(tablename, colnames))\n",
    "    #populate table from dataframe\n",
    "    newtable = dataframe[columns].copy().applymap(str)\n",
    "    newtable['id'] = range(1,len(newtable)+1)\n",
    "    newtable.to_sql(tablename, conn, index=False, if_exists='append')\n",
    "    conn.commit()\n",
    "    return newtable\n",
    "\n",
    "#newtable = add_table(amitt.df_actortypes, 'actor_type', ['amitt_id', 'sector_id', 'framework_id', 'name', 'summary'])\n",
    "# counter\n",
    "newtable = add_table(amitt.df_counters, 'counter', ['amitt_id', 'tactic_id', 'metatechnique_id', 'name', 'summary'])\n",
    "# dataset\n",
    "newtable = add_table(amitt.df_frameworks, 'framework', ['amitt_id', 'name', 'summary'])\n",
    "# incident\n",
    "# metatechnique\n",
    "newtable = add_table(amitt.df_metatechniques, 'metatechnique', ['amitt_id', 'name', 'summary'])\n",
    "newtable = add_table(amitt.df_phases, 'phase', ['amitt_id', 'name', 'rank', 'summary'])\n",
    "# playbook\n",
    "# reference\n",
    "# response_type\n",
    "# sector\n",
    "newtable = add_table(amitt.df_tactics, 'tactic', ['amitt_id', 'phase_id', 'name', 'rank', 'summary'])\n",
    "newtable = add_table(amitt.df_tasks, 'task', ['amitt_id', 'tactic_id', 'framework_id', 'name', 'summary'])\n",
    "newtable = add_table(amitt.df_techniques, 'technique', ['amitt_id', 'tactic_id', 'name', 'summary'])\n",
    "# techniques_counters\n",
    "\n",
    "conn.execute(\"DROP TABLE IF EXISTS {}\".format('user'))\n",
    "conn.execute('''CREATE TABLE user (id INTEGER PRIMARY KEY AUTOINCREMENT, username TEXT NOT NULL UNIQUE, password TEXT NOT NULL);''')\n",
    "\n",
    "conn.close()\n",
    "newtable"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "df_phases loaded\n",
      "df_techniques loaded\n",
      "df_tasks loaded\n",
      "df_incidents loaded\n",
      "df_counters loaded\n",
      "df_detections loaded\n",
      "df_actors loaded\n",
      "df_resources loaded\n",
      "df_responsetypes loaded\n",
      "df_metatechniques loaded\n",
      "it loaded\n",
      "df_tactics loaded\n",
      "df_techniques_per_tactic loaded\n",
      "df_counters_per_tactic loaded\n",
      "phases not loaded\n",
      "tactics not loaded\n",
      "techniques not loaded\n",
      "counters not loaded\n",
      "metatechniques not loaded\n",
      "actors not loaded\n",
      "resources not loaded\n",
      "num_tactics not loaded\n",
      "cross_counterid_techniqueid loaded\n",
      "cross_counterid_resourceid loaded\n",
      "cross_counterid_actorid loaded\n"
     ]
    }
   ],
   "source": [
    "# Generate full sqlite database from the Amitt variables\n",
    "conn = sql.connect('amitt_sqlite.db')\n",
    "for tablename, table in vars(amitt).items():\n",
    "    if type(table) == pd.core.frame.DataFrame:\n",
    "        table.applymap(str).to_sql(tablename, conn)\n",
    "        print('{} loaded'.format(tablename))\n",
    "    else:\n",
    "        print('{} not loaded'.format(tablename))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>actor_id</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>C00006</td>\n",
       "      <td>A033</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>C00008</td>\n",
       "      <td>A007</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>C00009</td>\n",
       "      <td>A016</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>C00009</td>\n",
       "      <td>A006</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>C00010</td>\n",
       "      <td>A020</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>135</th>\n",
       "      <td>C00219</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>136</th>\n",
       "      <td>C00220</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>137</th>\n",
       "      <td>C00221</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>138</th>\n",
       "      <td>C00222</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>139</th>\n",
       "      <td>C00223</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>166 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "         id actor_id\n",
       "0    C00006     A033\n",
       "1    C00008     A007\n",
       "2    C00009     A016\n",
       "2    C00009     A006\n",
       "3    C00010     A020\n",
       "..      ...      ...\n",
       "135  C00219         \n",
       "136  C00220         \n",
       "137  C00221         \n",
       "138  C00222         \n",
       "139  C00223         \n",
       "\n",
       "[166 rows x 2 columns]"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "amitt.cross_counterid_actorid"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>technique_id</th>\n",
       "      <th>Weight</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>C00008</td>\n",
       "      <td>TA01</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>C00008</td>\n",
       "      <td>TA06</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>C00008</td>\n",
       "      <td>TA08</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>C00008</td>\n",
       "      <td>T0006</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>C00008</td>\n",
       "      <td>T0009</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>134</th>\n",
       "      <td>C00216</td>\n",
       "      <td>T0018</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>134</th>\n",
       "      <td>C00216</td>\n",
       "      <td>T0057</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>135</th>\n",
       "      <td>C00219</td>\n",
       "      <td>T0024</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>135</th>\n",
       "      <td>C00219</td>\n",
       "      <td>T0026</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>135</th>\n",
       "      <td>C00219</td>\n",
       "      <td>T0025</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>717 rows × 3 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "         id technique_id  Weight\n",
       "1    C00008         TA01       1\n",
       "1    C00008         TA06       1\n",
       "1    C00008         TA08       1\n",
       "1    C00008        T0006       1\n",
       "1    C00008        T0009       1\n",
       "..      ...          ...     ...\n",
       "134  C00216        T0018       1\n",
       "134  C00216        T0057       1\n",
       "135  C00219        T0024       1\n",
       "135  C00219        T0026       1\n",
       "135  C00219        T0025       1\n",
       "\n",
       "[717 rows x 3 columns]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ct = amitt.cross_counterid_techniqueid\n",
    "ct['Weight'] = 1\n",
    "ct = ct[ct['technique_id'].str.len() > 0]\n",
    "ct.to_csv('../visualisations/cross_counterid_techniqueid.csv', index=False, header=['Source','Target', 'Weight'])\n",
    "ct"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# fix the problem with excelfile changes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 141,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "<ipython-input-141-d791eafb5aa0>:9: FutureWarning: Passing a negative integer is deprecated in version 1.0 and will not be supported in future version. Instead, use None to not limit the column width.\n",
      "  pd.set_option('display.max_colwidth', -1)\n"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import sqlite3 as sql\n",
    "from generate_amitt_ttps import Amitt\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import os\n",
    "from sklearn.feature_extraction.text import CountVectorizer\n",
    "pd.set_option('display.max_rows', 1000)\n",
    "pd.set_option('display.max_colwidth', -1)\n",
    "\n",
    "newfile = '../AMITT_MASTER_DATA/AMITT_TTPs_MASTER.xlsx'\n",
    "oldfile = 'AMITT_TTPs_MASTER_github_version.xlsx'\n",
    "\n",
    "# Load dfs from file\n",
    "newdfs = {}\n",
    "newxlsx = pd.ExcelFile(newfile)\n",
    "for sheetname in newxlsx.sheet_names:\n",
    "    newdfs[sheetname] = newxlsx.parse(sheetname)\n",
    "    newdfs[sheetname].fillna('', inplace=True)\n",
    "\n",
    "olddfs = {}\n",
    "oldxlsx = pd.ExcelFile(oldfile)\n",
    "for sheetname in oldxlsx.sheet_names:\n",
    "    olddfs[sheetname] = oldxlsx.parse(sheetname)\n",
    "    olddfs[sheetname].fillna('', inplace=True)\n",
    "\n",
    "addedtables = newdfs.keys() - olddfs.keys()\n",
    "losttables = olddfs.keys() - newdfs.keys()\n",
    "if len(addedtables) + len(losttables) > 0:\n",
    "    print('Table changes: new tables are {}, lost tables are {}'.format(addedtables, losttables))\n",
    "\n",
    "def investigate_table(table):\n",
    "    print('\\n\\nTable {} is changed'.format(table))\n",
    "    # Column headings\n",
    "    coldiffs = set(newdfs[table].columns).symmetric_difference(set(olddfs[table].columns))\n",
    "    if len(coldiffs) > 0:\n",
    "        print('column differences: {}'.format(coldiffs))\n",
    "    # length\n",
    "    if len(newdfs[table]) != len(olddfs[table]):\n",
    "        print('length differences: new {} old {}'.format(len(newdfs[table]), len(olddfs[table])))\n",
    "\n",
    "    # column by column\n",
    "    for column in newdfs[table].columns:\n",
    "        coldiffs = newdfs[table][column] != olddfs[table][column]\n",
    "        if len(newdfs[table][coldiffs]) > 0:\n",
    "            print('Differences in column {}'.format(column))\n",
    "    return\n",
    "\n",
    "for table in newdfs.keys():\n",
    "    if newdfs[table].equals(olddfs[table]) == False:\n",
    "        investigate_table(table)        "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 142,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>amitt_id</th>\n",
       "      <th>new</th>\n",
       "      <th>old</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "Empty DataFrame\n",
       "Columns: [amitt_id, new, old]\n",
       "Index: []"
      ]
     },
     "execution_count": 142,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Look at individual table differences\n",
    "table = 'countermeasures'\n",
    "column = 'summary'\n",
    "coldiffs = newdfs[table][column] != olddfs[table][column]\n",
    "diffcols = pd.DataFrame()\n",
    "diffcols['amitt_id'] = newdfs[table][coldiffs]['amitt_id']\n",
    "diffcols['new'] = newdfs[table][coldiffs][column]\n",
    "diffcols['old'] = olddfs[table][coldiffs][column]\n",
    "diffcols[diffcols['old'] != '']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}