forked from yscacaca/DeepSense
-
Notifications
You must be signed in to change notification settings - Fork 4
/
har_tfrecord_util.py
88 lines (70 loc) · 3.13 KB
/
har_tfrecord_util.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import os
import tensorflow as tf
import numpy as np
# todo: change the path to your own data folder path
TF_RECORD_PATH = r'/home/username/pycharm/DeepSense/sepHARData_a'
DATA_FOLDER_PATH = r'/home/username/pycharm/DeepSense/sepHARData_a'
SEPCTURAL_SAMPLES = 10
FEATURE_DIM = SEPCTURAL_SAMPLES*6*2
WIDE = 20
OUT_DIM = 6#len(idDict)
BATCH_SIZE = 64
def csv_to_example(fname):
text = np.loadtxt(fname, delimiter=',')
features = text[:WIDE*FEATURE_DIM]
label = text[WIDE*FEATURE_DIM:]
features = tf.reshape(features, [WIDE, FEATURE_DIM])
example = tf.train.Example(features=tf.train.Features(feature={
"label": tf.train.Feature(float_list=tf.train.FloatList(value=label)),
'example': tf.train.Feature(float_list=tf.train.FloatList(value=features))
}))
return example
def read_and_decode(tfrec_path):
filename_queue = tf.train.string_input_producer([tfrec_path])
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
features = tf.parse_single_example(serialized_example,
features={
'label': tf.FixedLenFeature([OUT_DIM], tf.float32),
'example': tf.FixedLenFeature([WIDE*FEATURE_DIM], tf.float32),
})
# tf.reshape(features['example'], shape=[None, FEATURE_DIM])
# features['label'].set_shape([None])
return features['example'], features['label']
def input_pipeline_har(tfrec_path, batch_size, shuffle_sample=True, num_epochs=None):
example, label = read_and_decode(tfrec_path)
example = tf.expand_dims(example, 0)
example = tf.reshape(example, shape=(WIDE, FEATURE_DIM))
min_after_dequeue = 1000 # int(0.4*len(csvFileList)) #1000
capacity = min_after_dequeue + 3 * batch_size
if shuffle_sample:
example_batch, label_batch = tf.train.shuffle_batch(
[example, label], batch_size=batch_size, num_threads=16, capacity=capacity,
min_after_dequeue=min_after_dequeue)
else:
example_batch, label_batch = tf.train.batch(
[example, label], batch_size=batch_size, num_threads=16)
return example_batch, label_batch
def main(_):
writer = tf.python_io.TFRecordWriter(os.path.join(TF_RECORD_PATH, 'train.tfrecord'))
train_path = os.path.join(DATA_FOLDER_PATH, 'train')
train_files = os.listdir(train_path)
for f in train_files:
f_pre, f_suf = f.split('.')
if f_suf == 'csv':
f_path = os.path.join(train_path, f)
example = csv_to_example(f_path)
writer.write(example.SerializeToString())
writer.close()
writer = tf.python_io.TFRecordWriter(os.path.join(TF_RECORD_PATH, 'eval.tfrecord'))
train_path = os.path.join(DATA_FOLDER_PATH, 'eval')
train_files = os.listdir(train_path)
for f in train_files:
f_pre, f_suf = f.split('.')
if f_suf == 'csv':
f_path = os.path.join(train_path, f)
example = csv_to_example(f_path)
writer.write(example.SerializeToString())
writer.close()
if __name__ == '__main__':
tf.app.run()