-
Notifications
You must be signed in to change notification settings - Fork 0
/
1 Chapter One Pre-Processing.py
139 lines (122 loc) · 5.01 KB
/
1 Chapter One Pre-Processing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
%reset -f
#----------------------------------------------------
# Chapter One
#----------------------------------------------------
# 1.Pre-Processing (Body Motion Data)
# Parsing
# Data Interpolation/Resizing
# Denoising
# NaNs
# Normalization
# Standardization
#----------------------------------------------------
import numpy as np
import os
from scipy.interpolate import interp1d
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.impute import SimpleImputer
import warnings
import time
# Record the start time
start_time = time.time()
# Suppress warnings
warnings.filterwarnings("ignore")
# Parse BVH files
def parse_bvh(file_path):
"""Parse the BVH file to extract the header and motion data."""
print(f"Parsing file: {file_path}")
with open(file_path, 'r') as file:
lines = file.readlines()
header, motion_data = [], []
capture_data = False
for line in lines:
if "MOTION" in line:
capture_data = True
elif capture_data:
if line.strip().startswith("Frames") or line.strip().startswith("Frame Time"):
continue
motion_data.append(np.fromstring(line, sep=' '))
else:
header.append(line)
print(f"Finished parsing: {file_path}")
return header, np.array(motion_data)
def read_bvh(filename):
"""Reads a BVH file and returns motion data parsed."""
header, motion_data = parse_bvh(filename)
return motion_data
def interpolate_frames(motion_data, target_frame_count):
"""Interpolates BVH frames to match a target frame count."""
print(f"Interpolating frames to match target frame count: {target_frame_count}")
original_frame_count = len(motion_data)
original_time = np.linspace(0, 1, original_frame_count)
target_time = np.linspace(0, 1, target_frame_count)
interpolated_frames = []
for frame in np.array(motion_data).T:
interpolator = interp1d(original_time, frame.astype(float), kind='linear')
interpolated_frame = interpolator(target_time)
interpolated_frames.append(interpolated_frame)
print(f"Finished interpolation for frames: Original frame count = {original_frame_count}, Target frame count = {target_frame_count}")
return np.array(interpolated_frames).T
def find_max_frames(folder_path):
"""Finds the maximum number of frames among all BVH files in a folder."""
print(f"Finding maximum number of frames in folder: {folder_path}")
max_frames = 0
for filename in os.listdir(folder_path):
if filename.endswith('.bvh'):
motion_data = read_bvh(os.path.join(folder_path, filename))
max_frames = max(max_frames, len(motion_data))
print(f"Maximum frames found: {max_frames}")
return max_frames
def process_bvh_files(folder_path, max_frames):
"""Processes each BVH file in the folder after interpolating to the same number of frames."""
print(f"Processing BVH files in folder: {folder_path}")
all_features = []
for filename in os.listdir(folder_path):
if filename.endswith('.bvh'):
print(f"Processing file: {filename}")
full_path = os.path.join(folder_path, filename)
motion_data = read_bvh(full_path)
interpolated_frames = interpolate_frames(motion_data, max_frames)
all_features.append(interpolated_frames.flatten()) # Flatten the data for processing
print(f"Finished processing file: {filename}")
print(f"Finished processing all BVH files in folder: {folder_path}")
return np.array(all_features)
# -------------------------------
# Train Folder
train_folder_path = 'Small Dataset/'
# -------------------------------
# Find maximum frame size in the training data
print("Finding maximum frames in the training dataset...")
max_frames_train = find_max_frames(train_folder_path)
# Process BVH files using the maximum frame size
print("Processing training data...")
all_features_train = process_bvh_files(train_folder_path, max_frames_train)
# Train Labels
print("Assigning labels to the dataset...")
C_Angry = [0] * 42
C_Depressed = [1] * 42
C_Neutral = [2] * 42
C_Proud = [3] * 32
Labels = np.array(C_Angry + C_Depressed + C_Neutral + C_Proud, dtype=np.int32)
print("Labels assigned.")
# Standardize the data
print("Standardizing the data...")
scaler = StandardScaler()
X_standardized = scaler.fit_transform(all_features_train)
print("Data standardized.")
# Normalize the data (min-max scaling)
print("Normalizing the data...")
normalizer = MinMaxScaler()
X_normalized = normalizer.fit_transform(X_standardized)
print("Data normalized.")
# Handle NaN values
print("Handling NaN values...")
imputer = SimpleImputer(strategy='mean')
X_cleaned = imputer.fit_transform(X_normalized)
print("NaN values handled.")
# Final feature set and labels
X = X_cleaned
Y = Labels
# Print completion message
end_time = time.time()
print(f"Pre-processing completed in {end_time - start_time:.2f} seconds.")