# Test area for AMITT code

In [2]:
import pandas as pd
import sqlite3 as sql
from generate_amitt_ttps import Amitt


# Generate AMITT datasets
amitt = Amitt()

# Check which amitt variables we can see from here
print('{}'.format(vars(amitt).keys()))
vars(amitt)['cross_counterid_techniqueid']

dict_keys(['df_phases', 'df_frameworks', 'df_techniques', 'df_tasks', 'df_incidents', 'df_counters', 'df_detections', 'df_actortypes', 'df_resources', 'df_responsetypes', 'df_metatechniques', 'it', 'df_tactics', 'df_techniques_per_tactic', 'df_counters_per_tactic', 'phases', 'tactics', 'techniques', 'counters', 'metatechniques', 'actortypes', 'resources', 'num_tactics', 'cross_counterid_techniqueid', 'cross_counterid_resourceid', 'cross_counterid_actortypeid'])


Unnamed: 0,amitt_id,technique_id
0,C00006,T0007
0,C00006,T0015
0,C00006,T0018
0,C00006,T0043
0,C00006,T0053
...,...,...
135,C00219,T0025
136,C00220,
137,C00221,
138,C00222,


In [3]:
amitt.cross_counterid_techniqueid

Unnamed: 0,amitt_id,technique_id
0,C00006,T0007
0,C00006,T0015
0,C00006,T0018
0,C00006,T0043
0,C00006,T0053
...,...,...
135,C00219,T0025
136,C00220,
137,C00221,
138,C00222,


In [10]:
# Generate minimal sqlite database from the Amitt variables
conn = sql.connect('amittsite.sqlite')

def add_table(dataframe, tablename, columns):    
    # Create sql table
    colnames = ', '.join(['{} TEXT NOT NULL'.format(col) for col in columns])
    conn.execute("DROP TABLE IF EXISTS {}".format(tablename))
    conn.execute('''CREATE TABLE {} (id INTEGER PRIMARY KEY AUTOINCREMENT, {});'''.format(tablename, colnames))
    #populate table from dataframe
    newtable = dataframe[columns].copy().applymap(str)
    newtable['id'] = range(1,len(newtable)+1)
    newtable.to_sql(tablename, conn, index=False, if_exists='append')
    conn.commit()
    return newtable

#newtable = add_table(amitt.df_actortypes, 'actor_type', ['amitt_id', 'sector_id', 'framework_id', 'name', 'summary'])
# counter
newtable = add_table(amitt.df_counters, 'counter', ['amitt_id', 'tactic_id', 'metatechnique_id', 'name', 'summary'])
# dataset
newtable = add_table(amitt.df_frameworks, 'framework', ['amitt_id', 'name', 'summary'])
# incident
# metatechnique
newtable = add_table(amitt.df_metatechniques, 'metatechnique', ['amitt_id', 'name', 'summary'])
newtable = add_table(amitt.df_phases, 'phase', ['amitt_id', 'name', 'rank', 'summary'])
# playbook
# reference
# response_type
# sector
newtable = add_table(amitt.df_tactics, 'tactic', ['amitt_id', 'phase_id', 'name', 'rank', 'summary'])
newtable = add_table(amitt.df_tasks, 'task', ['amitt_id', 'tactic_id', 'framework_id', 'name', 'summary'])
newtable = add_table(amitt.df_techniques, 'technique', ['amitt_id', 'tactic_id', 'name', 'summary'])
# techniques_counters

conn.execute("DROP TABLE IF EXISTS {}".format('user'))
conn.execute('''CREATE TABLE user (id INTEGER PRIMARY KEY AUTOINCREMENT, username TEXT NOT NULL UNIQUE, password TEXT NOT NULL);''')

conn.close()
newtable

Unnamed: 0,amitt_id,tactic_id,name,summary,id
0,T0001,TA01,"5Ds (dismiss, distort, distract, dismay, divide)","Nimmo's ""4Ds of propaganda"": dismiss, distort,...",1
1,T0002,TA01,Facilitate State Propaganda,Organize citizens around pro-state messaging. ...,2
2,T0003,TA01,Leverage Existing Narratives,"Use or adapt existing narrative themes, where ...",3
3,T0004,TA01,Competing Narratives,Advance competing narratives connected to same...,4
4,T0005,TA02,Center of Gravity Analysis,"Recon/research to identify ""the source of powe...",5
...,...,...,...,...,...
59,T0060,TA11,Continue to amplify,continue narrative or message amplification af...,60
60,T0061,TA10,Sell merchandising,"Sell hats, t-shirts, flags and other branded c...",61
61,T0062,TA12,Behaviour changes,Monitor and evaluate behaviour changes from mi...,62
62,T0063,TA12,Message reach,Monitor and evaluate message reach in misinfor...,63


In [26]:
# Generate full sqlite database from the Amitt variables
conn = sql.connect('amitt_sqlite.db')
for tablename, table in vars(amitt).items():
    if type(table) == pd.core.frame.DataFrame:
        table.applymap(str).to_sql(tablename, conn)
        print('{} loaded'.format(tablename))
    else:
        print('{} not loaded'.format(tablename))

df_phases loaded
df_techniques loaded
df_tasks loaded
df_incidents loaded
df_counters loaded
df_detections loaded
df_actors loaded
df_resources loaded
df_responsetypes loaded
df_metatechniques loaded
it loaded
df_tactics loaded
df_techniques_per_tactic loaded
df_counters_per_tactic loaded
phases not loaded
tactics not loaded
techniques not loaded
counters not loaded
metatechniques not loaded
actors not loaded
resources not loaded
num_tactics not loaded
cross_counterid_techniqueid loaded
cross_counterid_resourceid loaded
cross_counterid_actorid loaded


In [41]:
amitt.cross_counterid_actorid

Unnamed: 0,id,actor_id
0,C00006,A033
1,C00008,A007
2,C00009,A016
2,C00009,A006
3,C00010,A020
...,...,...
135,C00219,
136,C00220,
137,C00221,
138,C00222,


In [3]:
ct = amitt.cross_counterid_techniqueid
ct['Weight'] = 1
ct = ct[ct['technique_id'].str.len() > 0]
ct.to_csv('../visualisations/cross_counterid_techniqueid.csv', index=False, header=['Source','Target', 'Weight'])
ct

Unnamed: 0,id,technique_id,Weight
1,C00008,TA01,1
1,C00008,TA06,1
1,C00008,TA08,1
1,C00008,T0006,1
1,C00008,T0009,1
...,...,...,...
134,C00216,T0018,1
134,C00216,T0057,1
135,C00219,T0024,1
135,C00219,T0026,1


# fix the problem with excelfile changes

In [141]:
import pandas as pd
import sqlite3 as sql
from generate_amitt_ttps import Amitt
import pandas as pd
import numpy as np
import os
from sklearn.feature_extraction.text import CountVectorizer
pd.set_option('display.max_rows', 1000)
pd.set_option('display.max_colwidth', -1)

newfile = '../AMITT_MASTER_DATA/AMITT_TTPs_MASTER.xlsx'
oldfile = 'AMITT_TTPs_MASTER_github_version.xlsx'

# Load dfs from file
newdfs = {}
newxlsx = pd.ExcelFile(newfile)
for sheetname in newxlsx.sheet_names:
    newdfs[sheetname] = newxlsx.parse(sheetname)
    newdfs[sheetname].fillna('', inplace=True)

olddfs = {}
oldxlsx = pd.ExcelFile(oldfile)
for sheetname in oldxlsx.sheet_names:
    olddfs[sheetname] = oldxlsx.parse(sheetname)
    olddfs[sheetname].fillna('', inplace=True)

addedtables = newdfs.keys() - olddfs.keys()
losttables = olddfs.keys() - newdfs.keys()
if len(addedtables) + len(losttables) > 0:
    print('Table changes: new tables are {}, lost tables are {}'.format(addedtables, losttables))

def investigate_table(table):
    print('\n\nTable {} is changed'.format(table))
    # Column headings
    coldiffs = set(newdfs[table].columns).symmetric_difference(set(olddfs[table].columns))
    if len(coldiffs) > 0:
        print('column differences: {}'.format(coldiffs))
    # length
    if len(newdfs[table]) != len(olddfs[table]):
        print('length differences: new {} old {}'.format(len(newdfs[table]), len(olddfs[table])))

    # column by column
    for column in newdfs[table].columns:
        coldiffs = newdfs[table][column] != olddfs[table][column]
        if len(newdfs[table][coldiffs]) > 0:
            print('Differences in column {}'.format(column))
    return

for table in newdfs.keys():
    if newdfs[table].equals(olddfs[table]) == False:
        investigate_table(table)        

  pd.set_option('display.max_colwidth', -1)


In [142]:
# Look at individual table differences
table = 'countermeasures'
column = 'summary'
coldiffs = newdfs[table][column] != olddfs[table][column]
diffcols = pd.DataFrame()
diffcols['amitt_id'] = newdfs[table][coldiffs]['amitt_id']
diffcols['new'] = newdfs[table][coldiffs][column]
diffcols['old'] = olddfs[table][coldiffs][column]
diffcols[diffcols['old'] != '']

Unnamed: 0,amitt_id,new,old
