-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathData.py
54 lines (41 loc) · 1.49 KB
/
Data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import random
import pandas as pd
class Point:
def __init__(self, location, label=None):
self.location = location
self.label = label
class TestTrainPoint(Point):
def __init__(self, location, label, test_label=None):
super().__init__(location, label)
self.test_label = test_label
class DataSet:
def __init__(self, points=None):
self.points = []
if points:
self.points = points
self.locations = []
self.labels = []
self.test_set = []
self.train_set = []
def location_label_split(self):
for point in self.points:
self.locations.append(point.location)
self.labels.append(point.label)
def dataframe_to_dataset(self, file_location, labelColumn):
df = pd.read_csv(file_location)
labels = df.loc[:, labelColumn]
cols = [col for col in df.columns if col != labelColumn]
for i in range(0, len(labels)):
entry_values = []
for col in cols:
location_i = df.loc[i, col]
entry_values.append(location_i)
point = Point(entry_values, labels[i])
self.points.append(point)
def split_dataset(self, split_size):
for point in self.points:
if random.random() <= split_size:
point_test = TestTrainPoint(point.location, point.label)
self.test_set.append(point_test)
else:
self.train_set.append(point)