-
Notifications
You must be signed in to change notification settings - Fork 0
/
data_extract.py
146 lines (114 loc) · 4.47 KB
/
data_extract.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri May 10 19:43:41 2019
@author: anthonycamper but mostly \/
@author: Code from ronaldeddings
"""
#GOALS
#List of Protocols & Ports src -> dst
#Deliminated into columns
# =============================================================================
# Imports
# =============================================================================
import os
from scapy.all import *
from pathlib import Path
import pandas as pd
import binascii # Binary to Ascii
from progressbar import ProgressBar
import tkinter as tk
from tkinter import filedialog
from tkinter import *
# =============================================================================
# Read in data
# =============================================================================
#file_name = input('enter file name:')
#pcap_file = open("data/" + file_name)
#data_folder = Path("data/")
#pcap_file = data_folder / "wrccdc2012.pcap"
#pcap_file = ("data/wrccdc2012.pcap")
def readfile():
root = Tk()
root.filename = filedialog.askopenfilename(initialdir = "/",title = "Select file",filetypes = (("pcap","*.pcap"),("all files","*.*")))
pcap_file = root.filename
global ourpcap
ourpcap = rdpcap(pcap_file)
root.destroy()
# =============================================================================
# Protocol Types into Array & Read in pcap file
# =============================================================================
#Could just set it to rootfilename but who needs clean code
#packets = rdpcap(root.filename)
# =============================================================================
# Convert PCAP to DataFrame
# SOURCE: https://github.com/ronaldeddings/Packet-Analytics/blob/master/Packet-Analytics.ipynb
# I am in forever debt to @ronaldeddings for this
# =============================================================================
# Collect field names from IP/TCP/UDP (These will be columns in DF)
# Create blank DataFrame
pbar = ProgressBar()
def extract(ourpcap):
ip_fields = [field.name for field in IP().fields_desc]
tcp_fields = [field.name for field in TCP().fields_desc]
udp_fields = [field.name for field in UDP().fields_desc]
dataframe_fields = ip_fields + ['time'] + tcp_fields + ['payload','payload_raw','payload_hex']
df = pd.DataFrame(columns=dataframe_fields)
for packet in pbar(ourpcap[IP]):
# Field array for each row of DataFrame
field_values = []
# Add all IP fields to dataframe
for field in ip_fields:
if field == 'options':
# Retrieving number of options defined in IP Header
field_values.append(len(packet[IP].fields[field]))
else:
field_values.append(packet[IP].fields[field])
field_values.append(packet.time)
layer_type = type(packet[IP].payload)
for field in tcp_fields:
try:
if field == 'options':
field_values.append(len(packet[layer_type].fields[field]))
else:
field_values.append(packet[layer_type].fields[field])
except:
field_values.append(None)
# Append payload
field_values.append(len(packet[layer_type].payload))
field_values.append(packet[layer_type].payload.original)
field_values.append(binascii.hexlify(packet[layer_type].payload.original))
# Add row to DF
df_append = pd.DataFrame([field_values], columns=dataframe_fields)
df = pd.concat([df, df_append], axis=0)
df = df.reset_index()
# Drop old index column
df = df.drop(columns="index")
def main():
readfile()
print(ourpcap.listname, "Opened")
extract(ourpcap)
# Reset Index
main()
#print(df.shape)
# =============================================================================
# Sessions
# =============================================================================
#s = ourpcap.sessions()
#s
#for k,v in s.items():
# v.summary()
# for pkt in v:
# pkt.show()
# =============================================================================
# Read in raw packets
# =============================================================================
#itt = RawPcapReader(pcap_file)
#for (pkt_data, pkt_metadata,) in itt:
# len(pkt_data)
# ether_pkt = Ether(pkt_data)#.fields)
# ether_pkt.summary()
# len(pkt_metadata)
# print(pkt_metadata)
#print(itt)
#len(ourpcap)