-
Notifications
You must be signed in to change notification settings - Fork 2
/
yesTunaCleaner.py
88 lines (74 loc) · 3.1 KB
/
yesTunaCleaner.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import random
import os
import pandas as pd
def cleanFile(sourceFile, targetFolderForCleanedFiles):
try:
dataFrameLatLngSource = pd.read_csv(sourceFile)
except Exception as e:
print("An error happened when trying to read the CSV :(", e)
listOfLat = []
listOfLng = []
listOfSST = [] # new
listOfChlorophyll = [] # new
listOfTuna = []#new tuna
for i in range(0, len(dataFrameLatLngSource["lat"])):
if dataFrameLatLngSource["tuna"][i] == 1:
# access first dataframe index (after header)
listOfLat.append(dataFrameLatLngSource["lat"][i])
listOfLng.append(dataFrameLatLngSource["lon"][i])
listOfSST.append(dataFrameLatLngSource["sst"][i]) # new
listOfChlorophyll.append(
dataFrameLatLngSource["chlorophyll"][i]) # new
listOfTuna.append(dataFrameLatLngSource["tuna"][i]) #new tuna
#----If tuna ==1 then keep, else delete
#----
dataCoba = {"lat": listOfLat,
"lon": listOfLng,
"sst": listOfSST,
"chlorophyll": listOfChlorophyll,
"tuna": listOfTuna}
dataFrameCoba = pd.DataFrame(dataCoba)
# print("--\n\n")
# print(dataFrameCoba[["lat", "lon"]])
dataFrameToSave = pd.DataFrame(listOfLat, columns=["Latitude"])
try:
dataFrameCoba.to_csv(
str(".\\"+targetFolderForCleanedFiles+"\\"+sourceFile[-14:]), index=False)
print("Data saved successfuly to", str(
".\\"+targetFolderForCleanedFiles+"\\"+sourceFile[-14:]))
except Exception as e:
print("Error: failed to save to csv :( \n", e) # TODO: Fix this
#---MAIN---#
# ---Load Tuna CSV File Folder---#
listOfFiles = []
# by default the "." value will make it the current working directory of the py file
defaultFolderDirectory = "."
folderOfDataToClean = "\\oldPredictedTuna\\"
try:
for folderName, subFolders, fileNames in os.walk(defaultFolderDirectory+folderOfDataToClean):
for files in fileNames:
listOfFiles.append(files)
print("\nSuccessfuly loaded List of Files:",
len(listOfFiles), "files total")
except Exception as e:
print("ERROR files not found in: ", os.getcwd())
print(e)
print("\nI stopped the app for you :)")
raise SystemExit
# END OF: Load Tuna CSV File Folder---
# Check if the target folder for the cleaned data exists, if it doesnt then create one
cleanedFolderPath = "oldPredictedTunaCleaned"
if(os.path.exists(cleanedFolderPath)):
print(cleanedFolderPath, "exists")
else:
print("Doesnt exist, and creating the folder")
try:
os.makedirs(cleanedFolderPath)
print("Successfuly created the 'Cleaned' folder path")
except Exception as e:
print("Failed to created the 'Cleaned' folder path", e)
# For-loop to clean the files
for i in range(len(listOfFiles)):
cleanFile(defaultFolderDirectory+folderOfDataToClean +
listOfFiles[i], cleanedFolderPath)
print("Successfully cleaned", len(listOfFiles), "files")