Source code for traffic_metrics.traffic_metrics

# Copyright (c) 2017  University of Houston

import os
import json
import time
from collections import OrderedDict
import dpkt
import logging
import warnings
import numpy as np
import pandas as pd
import jsbeautifier
from trafficdatasetmaker.trafficdatasetmaker import TrafficDatasetMaker
from .dashboard import PDUDashBoard, PKTDashBoard
from .trafficmetricstextusess import TrafficMetricsTextUsess
from .tmmetricsutilsusess import TmMetricsUtilsUsess
from .report2table import Report2Table
from .plotter import Selfvalidation, Crossvalidation, Datavalidation

try:
    warnings.simplefilter(action="ignore", category=pd.core.common.SettingWithCopyWarning)
except AttributeError:
    warnings.simplefilter(action="ignore", category=pd.errors.SettingWithCopyWarning)

logger = logging.getLogger(__name__)

def myconverter(obj):
    if isinstance(obj, dict) or isinstance(obj, OrderedDict):
        for key,val in obj.items():
            obj[key]=myconverter(val)
        return obj
    elif isinstance(obj, list):
        for val in obj:
            val=myconverter(val)
        return obj
    elif isinstance(obj, np.integer):
        return int(obj)
    elif isinstance(obj, np.floating):
        return float(obj)
    else:
        return obj
        

[docs]class Traffic_Metrics(): '''Traffic Metrics analyzes network traffic from a pcap pcap file (or from a dataset csv of packets or PDUs derived from a pcap file using the traffic dataset maker) to present a JSON report of metrics and a set of charts for metrics from the input traffic. The tool also excels at comparing multiple packet traces based on statistics of numerous metrics, and charts.'''
[docs] def __init__(self, analysistype, files, outdir, rundashboard=False, filter_str=None, filenames=None, analysis_kind='both'): ''' Parameters ---------- analysistype : str the type of analysis to perform, chosen from the set of {packets,pdus,both} files : list a list of strings where each represents the path to the input pcap or csv file to analyze outdir : str output directory to save csv files rundashboard : bool launch a webserver containing a dashboard for for interactive visualization of charts (default: False) filter_str : str a filter string to filter specific packets to analyze (default: None) filenames : list a list of strings where each represents the labels to use for each pcap or csv file to in the 'files' argument {default: None} analysis_kind : str perform analysis of entire input pcap file or per each application user-session in input. values can be selected from the set {both,per_user_session,both} (default: both) ''' self.analysis_type = analysistype self.files = files self.out_dir = os.path.abspath(outdir) self.rundashboard = rundashboard self.filter_str = filter_str self.dfs = [] self.dfs_names = filenames self.dfs_types = [] self.analysis_kind = analysis_kind
[docs] def run(self): '''start traffic metrics analysis process''' self.get_dfs() self.validatedfs() logger.debug('length before filter = %s', str([len(i) for i in self.dfs])) print (self.analysis_type, self.files, self.out_dir, self.filter_str) for i, df in enumerate(self.dfs): self.dfs[i] = TmMetricsUtilsUsess().get_filtered_df(df, self.filter_str) #print(i, len(df.index)) logger.debug('length after filter = %s', str([len(i) for i in self.dfs])) print(self.dfs_types) tmtext = TrafficMetricsTextUsess(self.dfs, self.out_dir, self.analysis_type, self.analysis_kind, self.dfs_types, dfs_names=self.dfs_names) report, report_usess = tmtext.run() # try: # logger.debug('creating plot and pdflatex reports') # Selfvalidation(report_usess, self.out_dir).run() # Datavalidation(report_usess, self.out_dir).run() # except: # logger.debug('***!!!! ERROR !!!!*** >>> but first writing json reports') # self.write_reports(report, report_usess) # raise # print(report) # print('*********') # print(report_usess) logger.debug('writing json reports') self.write_reports(report, report_usess) if self.rundashboard: print('making pktdashboard') if self.analysis_type == 'packets' and self.rundashboard: PKTDashBoard(self.dfs).makemydash() elif self.analysis_type == 'pdus' and self.rundashboard: PDUDashBoard(self.dfs).makemydash() else: raise ValueError('invalid analysis type')
def get_dfs(self): j = -1 for f in self.files: j += 1 df = None ftype = None try: with open(f, 'rb') as fi: dpkt_rdr = dpkt.pcap.Reader(fi) ftype = 'pcap' except Exception as e: logger.debug("infile (pcapfile) is not a valid pcapfile") if ftype == 'pcap': inp_fname = f.split('/')[-1] odir = self.out_dir+'/' + inp_fname + '_dataset' + str(j) TrafficDatasetMaker(f, odir, 'pcap', datasets=['all'], skip_extra_calculations=True).makecsvs() if self.analysis_type == 'packets': df = pd.read_csv(odir + '/all_pkts.csv', engine='python') df_type = 'packets-csv' else: df = pd.read_csv(odir + '/all_pdus.csv', engine='python') df_type = 'pdus-csv' else: logger.debug('%s is not pcap - trying to open as csv', f) try: df = pd.read_csv(f, engine='python') if 'l2_d_size' in df: df_type = 'packets-csv' elif 'global_pdu_idx' in df: df_type = 'pdus-csv' else: raise ValueError('file: "', f, '" has unknown csv file format' ) except: raise ValueError('file: "', f, '" is not a valid pcap or csv file' ) logger.debug('%s dataset type after dataset extraction is %s', f, df_type) if df is not None: self.dfs.append(df) self.dfs_types.append(df_type) def validatedfs(self): for df in self.dfs: if self.analysis_type in ['packets', 'both'] and 'l2_d_size' not in df: raise ValueError('Invalid csv file: is not a valid for pkts analysis') if self.analysis_kind in['per_user_session', 'both'] and 'user_sess_idx' not in df: raise ValueError('Invalid input file: user session index colum is required') def write_reports(self, report, report_usess): opts = jsbeautifier.default_options() opts.indent_size = 2 if report: reporti = self.remove_key_vals(['x_vals', 'y_vals', 'per_usess_xys'], report) res1 = jsbeautifier.beautify(json.dumps(myconverter(reporti)), opts) with open(self.out_dir + '/tm_report.json', 'w') as repfile: repfile.write(res1) Report2Table().convert(self.out_dir + '/tm_report.json', self.out_dir + '/tm_report.csv') if report_usess: report_usessi = self.remove_key_vals(['x_vals', 'y_vals', 'per_usess_xys'], report_usess) res2 = jsbeautifier.beautify(json.dumps(myconverter(report_usessi)), opts) with open(self.out_dir + '/tm_usess_report.json', 'w') as repfile: repfile.write(res2) #Report2Table().convert(self.out_dir + '/traffic_metrics_usess_report.json', # self.out_dir + '/traffic_metrics_usess_report.csv') def remove_key_vals(self, keys, var): if isinstance(var, dict): ndict = OrderedDict() for k, v in var.items(): if k not in keys: ndict[k]=self.remove_key_vals(keys, v) return ndict elif isinstance(var, list): nlist = [] for v in var: nlist.append(self.remove_key_vals(keys, v)) return nlist else: return var