-
Notifications
You must be signed in to change notification settings - Fork 1
/
DataSet.py
35 lines (31 loc) · 1.39 KB
/
DataSet.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
class DataSet:
def __init__(self, examples=None, inputs=None, attributes=None, target=None, attrnames=None, values=None):
self.examples = examples
self.target = target
self.values = values
self.inputs = inputs
self.attributes = attributes
self.attrnames = attrnames or attributes
# examples : is the list of all examples of the dataset
# target : is the position of the attribute that indicates the classification of the example
# values : contains the values of the classification
# inputs : attributes except the classification
# attributes : all the attributes
# attrnames : name of the attributes in the dataset
#parsing del file .txt e ritorna un DataSet
def setDataSet(file, attrnames, target, values):
examples = []
for line in open(file).readlines():
content = line.split(',')
content = [c.rstrip() for c in content]
example = []
for j in range(0, len(content)):
example.append(content[j])
if '?' not in example: # non considero le righe del dataset con attributi mancanti (in MammographicMasses)
examples.append(example)
attributes = []
for i in range(0, len(example)):
attributes.append(i)
inputs = list(attributes)
inputs.pop(inputs.index(target))
return DataSet(examples, inputs, attributes, target, attrnames, values)