-
Notifications
You must be signed in to change notification settings - Fork 0
/
digitization.py
89 lines (72 loc) · 2.44 KB
/
digitization.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
Author : Fatih Kahraman
Mail : fatih.khrmn@hotmail.com
"""
import pandas as pd
class Converter:
def __init__(self, path=None, df=None):
self._path = path
self._df = df
self._content = ""
self._labels = []
self._dataframe = ""
self._path_flag = not path == None
self.readLabel()
self.createVeriable()
self.matchColumn()
self.convert()
self.createDf()
# Read Label
def readLabel(self):
if self._path_flag:
f = open(self._path, 'r')
text = f.readlines()
self._content = text[1:]
self._labels = text[0].replace('\n', "").split(',')
else:
self._labels = self._df.columns.values
def createVeriable(self):
for l in self._labels:
globals()[l] = []
def matchColumn(self):
if self._path_flag:
for c in self._content:
text = c.replace('\n', "").split(',')
for index, l in enumerate(self._labels):
(globals()[l]).append(text[index])
else:
for text in self._df.values:
for index, l in enumerate(self._labels):
(globals()[l]).append(text[index])
# Categoric data to numbers
def convert(self):
globals()["all"] = []
for l in self._labels:
encoder = list(set(globals()[l]))
if len(encoder) == len(globals()[l]):
encoder = globals()[l]
globals()[l+'_npy'] = []
for content in globals()[l]:
for index, cntnt in enumerate(encoder):
if content == cntnt:
(globals()[l+'_npy']).append(index)
(globals()["all"]).append(globals()[l+'_npy'])
# Create to Data Frame
def createDf(self):
self._dataframe = pd.DataFrame((globals()["all"])).transpose()
self._dataframe.columns = self._labels
# You can drop to column easyly
def dropColumn(self,columnName):
del self._dataframe[columnName]
# Save digit csv
def recordDf(self):
if self._path_flag:
path_name = self._path.replace(".csv", "")
else:
path_name = 'df'
self._dataframe.to_csv(f"{path_name}_digit.csv", index=False)
# Return Digit
def getProcessedDf(self):
return self._dataframe