-
Notifications
You must be signed in to change notification settings - Fork 273
/
data_loading.py
executable file
·115 lines (87 loc) · 3.02 KB
/
data_loading.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
"""Time-series Generative Adversarial Networks (TimeGAN) Codebase.
Reference: Jinsung Yoon, Daniel Jarrett, Mihaela van der Schaar,
"Time-series Generative Adversarial Networks,"
Neural Information Processing Systems (NeurIPS), 2019.
Paper link: https://papers.nips.cc/paper/8789-time-series-generative-adversarial-networks
Last updated Date: April 24th 2020
Code author: Jinsung Yoon (jsyoon0823@gmail.com)
-----------------------------
data_loading.py
(0) MinMaxScaler: Min Max normalizer
(1) sine_data_generation: Generate sine dataset
(2) real_data_loading: Load and preprocess real data
- stock_data: https://finance.yahoo.com/quote/GOOG/history?p=GOOG
- energy_data: http://archive.ics.uci.edu/ml/datasets/Appliances+energy+prediction
"""
## Necessary Packages
import numpy as np
def MinMaxScaler(data):
"""Min Max normalizer.
Args:
- data: original data
Returns:
- norm_data: normalized data
"""
numerator = data - np.min(data, 0)
denominator = np.max(data, 0) - np.min(data, 0)
norm_data = numerator / (denominator + 1e-7)
return norm_data
def sine_data_generation (no, seq_len, dim):
"""Sine data generation.
Args:
- no: the number of samples
- seq_len: sequence length of the time-series
- dim: feature dimensions
Returns:
- data: generated data
"""
# Initialize the output
data = list()
# Generate sine data
for i in range(no):
# Initialize each time-series
temp = list()
# For each feature
for k in range(dim):
# Randomly drawn frequency and phase
freq = np.random.uniform(0, 0.1)
phase = np.random.uniform(0, 0.1)
# Generate sine signal based on the drawn frequency and phase
temp_data = [np.sin(freq * j + phase) for j in range(seq_len)]
temp.append(temp_data)
# Align row/column
temp = np.transpose(np.asarray(temp))
# Normalize to [0,1]
temp = (temp + 1)*0.5
# Stack the generated data
data.append(temp)
return data
def real_data_loading (data_name, seq_len):
"""Load and preprocess real-world datasets.
Args:
- data_name: stock or energy
- seq_len: sequence length
Returns:
- data: preprocessed data.
"""
assert data_name in ['stock','energy']
if data_name == 'stock':
ori_data = np.loadtxt('data/stock_data.csv', delimiter = ",",skiprows = 1)
elif data_name == 'energy':
ori_data = np.loadtxt('data/energy_data.csv', delimiter = ",",skiprows = 1)
# Flip the data to make chronological data
ori_data = ori_data[::-1]
# Normalize the data
ori_data = MinMaxScaler(ori_data)
# Preprocess the dataset
temp_data = []
# Cut data by sequence length
for i in range(0, len(ori_data) - seq_len):
_x = ori_data[i:i + seq_len]
temp_data.append(_x)
# Mix the datasets (to make it similar to i.i.d)
idx = np.random.permutation(len(temp_data))
data = []
for i in range(len(temp_data)):
data.append(temp_data[idx[i]])
return data