forked from tesladdicts/testatus
-
Notifications
You must be signed in to change notification settings - Fork 0
/
json2s3.py
executable file
·86 lines (77 loc) · 3.04 KB
/
json2s3.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#!/usr/bin/env python3
""" Reformat json files from tesla-parser to match Kinesis in S3
Reformat json files created by tesla-parser into a directory
structure and file format similar to AWS Kinesis. The created
files can be uploaded to s3
"""
import time
import json
import argparse
import re
import secrets
import os
import errno
# from time import strftime
from sys import stdin
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--streamname', help='Name of stream',
required=True)
parser.add_argument('--mins',
help="minutes in each file (default=5, max=60)",
default=5, type=int)
args = parser.parse_args()
# Read each line from stdin keying on fromtime
lastfilebase = ''
outfile = None
for line in stdin:
# find timestamp
if line[:1] == '{':
record = json.loads(line)
tstamp = int(record['retrevial_time'])
# print('time is {}'.format(record.get('retrevial_time')))
if line[:1] == '#':
ts = re.match(r"^# (\d{10})[\. ]", line)
if not ts:
ts = re.search(r"\) at (\d{10}$)", line)
if not ts:
print(line)
quit(1)
tstamp = int(ts.group(1))
# print('time is {}'.format(ts.group(1)))
# Rollback minutes to last 5m mark
minutes = int(time.strftime('%M', time.gmtime(tstamp)))
minutes = minutes - (minutes % args.mins)
# Build the filepath
timepath = time.strftime('%Y,%m,%d,%H', time.gmtime(tstamp))
outfiledir = os.path.join(args.streamname, *timepath.split(','))
# outfilebase is the first part before the random strings are
# added.
outfilebase = ('{}-1-{}-{:02d}-00'
.format(args.streamname,
time.strftime(
'%Y-%m-%d-%H', time.gmtime(tstamp)),
minutes))
if (lastfilebase != outfilebase):
# Time to create a new file
if outfile is not None:
outfile.close()
outfilename = '{}-{}-{}-{}-{}-{}'.format(outfilebase,
secrets.token_hex(4),
secrets.token_hex(2),
secrets.token_hex(2),
secrets.token_hex(2),
secrets.token_hex(6))
if not os.path.exists(outfiledir):
try:
os.makedirs(outfiledir)
except OSError as e:
if e.error != errno.EEXIST:
raise
outfilepath = os.path.join(outfiledir, outfilename)
outfile = open(outfilepath, "w")
lastfilebase = outfilebase
print(outfilepath)
outfile.write(line)
if __name__ == "__main__":
main()