-
Notifications
You must be signed in to change notification settings - Fork 6
/
hobo.py
209 lines (163 loc) · 6.94 KB
/
hobo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
"""
Python package for working with HOBO data logger data.
Specifically, this has been tested with CSV exports from Onset HOBO U23-001
data loggers. It may or may not handle data from other HOBO loggers, though
HoboWare likely produces a compatible CSV format for other similar hardware.
License: As a work of the United States Government, this project is in the
public domain within the United States.
2016-02-24 David A. Riggs, Physical Science Tech, Lava Beds National Monument
"""
from __future__ import print_function
import sys
import re
import csv
from datetime import datetime, timedelta, tzinfo
try:
from StringIO import StringIO
except ImportError:
from io import StringIO
__version__ = '0.0.2-dev'
__all__ = 'HoboCSVReader',
TIME_FMTS = [
'%m/%d/%y %I:%M:%S %p', # Hoboware export
'%Y-%m-%d %H:%M:%S', # HOBO MX2301
'%m/%d/%Y %H:%M' # Excel edit and save (*thanks* Microsoft)
]
TZ_REGEX = re.compile('GMT\s?[-+]\d\d:\d\d')
SN_REGEX = re.compile('(?:LGR S/N: |Serial Number:)(\d+)')
class TZFixedOffset(tzinfo):
"""
A fixed-offset timezone implementation for HOBO format `GMT-07:00`.
"""
def __init__(self, offset):
if type(offset) in (int, float):
self.offset_hrs = offset
elif type(offset) == str:
if not TZ_REGEX.match(offset) or offset[-2:] != '00':
raise ValueError(offset)
self.offset_hrs = int(offset[-6:-3]) # extract whole hour and sign
else:
raise ValueError(offset)
self.offset = timedelta(hours=self.offset_hrs)
def utcoffset(self, dt):
return self.offset
def dst(self, dt):
return timedelta(0)
def tzname(self, dt):
return str(self)
def __str__(self):
return 'GMT%+03d:00' % self.offset_hrs
def __eq__(self, other):
return other is not None and self.offset == other.offset
def __repr__(self):
return str(self)
def timestamp(s, tz=None):
"""Parse a HOBO timestamp value to Python DateTime"""
for fmt in TIME_FMTS:
try:
dt = datetime.strptime(s, fmt)
return dt.replace(tzinfo=tz) if tz else dt
except ValueError as e:
pass
raise ValueError('time data "%s" does not match formats: %s' % (s, ', '.join(TIME_FMTS)))
class HoboCSVReader(object):
"""
Iterator over a HOBO CSV file, produces (timestamp, temperature, RH,
battery) rows.
:param str fname: CSV filename
:param tzinfo as_timezone: explicit timezone to cast timestamps to
:param bool strict: whether we should be strict or lenient in parsing CSV
:raises Exception: if this doesn't appear to be a HOBOware or BoxCar exported CSV
:raises ValueError: if required columns representing timestamp or temperature can't be located
:ivar str fname:
:ivar str title:
:ivar str sn:
:ivar tzinfo tz:
:ivar tzinfo as_timezone:
"""
def __init__(self, fname, as_timezone=None, strict=True):
self.fname = fname
self._f = open(fname, 'rt')
self._itimestamp, self._itemp, self._irh, self._ibatt, self.title, self.sn = None, None, None, None, None, None
header = self._find_headers()
if self._itimestamp is None:
raise ValueError('Unable to find required timestamp column!')
if self._itemp is None:
raise ValueError('Unable to find required temperature column!')
tz_match = TZ_REGEX.search(header)
self.tz = TZFixedOffset(tz_match.group()) if tz_match else None
self.as_timezone = TZFixedOffset(as_timezone) if type(as_timezone) in (int, float, str) else as_timezone
self._reader = csv.reader(self._f, strict=strict)
def _find_col_timestamp(self, headers):
for i, header in enumerate(headers):
if 'Date Time' in header:
return i
def _find_col_temperature(self, headers):
for i, header in enumerate(headers):
if 'High Res. Temp.' in header or 'High-Res Temp' in header:
return i
for i, header in enumerate(headers):
for s in ('Temp,', 'Temp.', 'Temperature'):
if s in header:
return i
def _find_col_rh(self, headers):
for i, header in enumerate(headers):
if 'RH,' in header:
return i
def _find_col_battery(self, headers):
for i, header in enumerate(headers):
if 'Batt, V' in header:
return i
def _find_columns(self, header):
"""Find and set integer index for (timestamp, temp, RH, battery) as private ivars"""
headers = next(csv.reader(StringIO(header)))
self._itimestamp = self._find_col_timestamp(headers)
self._itemp = self._find_col_temperature(headers)
self._irh = self._find_col_rh(headers)
self._ibatt = self._find_col_battery(headers)
def _find_headers(self):
while self._itimestamp is None:
header = next(self._f)
if self.title is None:
self.title = header.strip() # TODO: rip out "Plot Title:"
if self.sn is None:
sn_match = SN_REGEX.search(header)
self.sn = sn_match.groups()[0] if sn_match else None
self._find_columns(header)
return header
def __iter__(self):
"""
Iterator for accessing the actual CSV rows.
:return: yields (timestamp, temperature, RH, battery)
:rtype: tuple(datetime, float, float, float)
"""
for row in self._reader:
if not row[self._itemp].strip(): # is this too lenient?
continue # skip event-only rows
if not row[0].strip():
continue # skip blank rows
ts = timestamp(row[self._itimestamp], self.tz)
if self.as_timezone:
ts = ts.astimezone(self.as_timezone)
temp = float(row[self._itemp])
rh = float(row[self._irh]) if self._irh is not None and row[self._irh] else None
batt = float(row[self._ibatt]) if self._ibatt is not None else None
yield ts, temp, rh, batt
def unzip(self):
"""
Rather than iterate row-by-row, return individual column lists
(timestamps, temperatures, RHs, batts)
:rtype: tuple of lists (timestamps, temperatures, RHs, batts)
"""
return zip(*[row for row in self])
def __enter__(self):
return self
def __exit__(self, exc_type, exc_value, traceback):
self._f.close()
if __name__ == '__main__':
if len(sys.argv) < 2:
print('usage: %s CSVFILE' % sys.argv[0], file=sys.stderr)
sys.exit(2)
with HoboCSVReader(sys.argv[1]) as reader:
for row in reader:
print(row)