-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathremove_bad_characters.py
41 lines (38 loc) · 1.91 KB
/
remove_bad_characters.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import os
import encodemaptoimage as emi
list_of_english_characters = ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q",
"r", "s", "t", "u", "v", "w", "x", "y", "z", "A", "B", "C", "D", "E", "F", "G", "H",
"I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y",
"Z", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", " ", "!", "\"", "#", "$", "%",
"&", "'", "(", ")", "*", "+", ",", "-", ".", "/", ":", ";", "<", "=", ">", "?", "@",
"[", "\\", "]", "^", "_", "`", "{", "|", "}", "~", "\n", "\r", "\t"]
def main():
path = "./Datasets/Train"
print(path)
for i, folder in enumerate(os.listdir(path)):
print(folder)
osu_file = emi.get_map_file(path + "/" + folder)
print(osu_file)
#remove every character in the file that gives an encoding error
with open(path + "/" + folder + "/" + osu_file, "r", encoding='utf-8') as f:
#read each line of the file
while True:
try:
lines = f.readlines()
#remove every character that is not in the list of english characters
for j, line in enumerate(lines):
new_line = ""
for character in line:
if character in list_of_english_characters:
new_line += character
lines[j] = new_line
#replace the line in the file with the new line
except UnicodeDecodeError:
print("UnicodeDecodeError")
break
except StopIteration:
break
if i > 0:
break
if __name__ == "__main__":
main()