-
Notifications
You must be signed in to change notification settings - Fork 2
/
ts_window.py
148 lines (122 loc) · 5.84 KB
/
ts_window.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
'''
Created on August 17, 2020
@author: hmo (hyunho.mo@unitn.it)
'''
import logging
import pandas as pd
import numpy as np
import sklearn as sk
from sklearn import preprocessing
class ts_win(object):
'''
class for time series window application
'''
def __init__(self):
'''
Constructor
@param none
'''
self.__logger = logging.getLogger('application of time series window for preparing network input')
## function to reshape features into (samples, time steps, features)
@staticmethod
def gen_sequence(id_df, seq_length, seq_cols):
""" Only sequences that meet the window-length are considered, no padding is used. This means for testing
we need to drop those which are below the window-length. An alternative would be to pad sequences so that
we can use shorter ones """
# for one id I put all the rows in a single matrix
data_matrix = id_df[seq_cols].values
num_elements = data_matrix.shape[0]
# Iterate over two lists in parallel.
# For example id1 have 192 rows and sequence_length is equal to 50
# so zip iterate over two following list of numbers (0,142),(50,192)
# 0 50 -> from row 0 to row 50
# 1 51 -> from row 1 to row 51