-
Notifications
You must be signed in to change notification settings - Fork 283
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Py3k consistent unicode handling #1782
Changes from all commits
8980645
37dc9f2
f52aa8d
199bec1
35d3547
244ced6
b759876
6682bce
a02e863
3ffd5de
33f2e50
9717793
f9cc0f9
875178a
732f6bf
45328f6
9619cdf
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -963,10 +963,10 @@ def collapsed(self, dims_to_collapse=None): | |
for index in np.ndindex(shape): | ||
index_slice = (slice(None),) + tuple(index) | ||
bounds.append(serialize(self.bounds[index_slice])) | ||
dtype = np.dtype('S{}'.format(max(map(len, bounds)))) | ||
dtype = np.dtype('U{}'.format(max(map(len, bounds)))) | ||
bounds = np.array(bounds, dtype=dtype).reshape((1,) + shape) | ||
points = serialize(self.points) | ||
dtype = np.dtype('S{}'.format(len(points))) | ||
dtype = np.dtype('U{}'.format(len(points))) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These change the Python 2 behaviour. i.e. If I collapse a cube and get the value of a textual, collapsed coordinate it will now be an instance of |
||
# Create the new collapsed coordinate. | ||
coord = self.copy(points=np.array(points, dtype=dtype), | ||
bounds=bounds) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1863,7 +1863,7 @@ def vector_summary(vector_coords, cube_header, max_line_offset): | |
if self.attributes: | ||
attribute_lines = [] | ||
for name, value in sorted(six.iteritems(self.attributes)): | ||
value = iris.util.clip_string(unicode(value)) | ||
value = iris.util.clip_string(six.text_type(value)) | ||
line = u'{pad:{width}}{name}: {value}'.format(pad=' ', | ||
width=indent, | ||
name=name, | ||
|
@@ -1893,7 +1893,11 @@ def assert_valid(self): | |
warnings.warn('Cube.assert_valid() has been deprecated.') | ||
|
||
def __str__(self): | ||
return self.summary().encode(errors='replace') | ||
# six has a decorator for this bit, but it doesn't do errors='replace'. | ||
if six.PY3: | ||
return self.summary() | ||
else: | ||
return self.summary().encode(errors='replace') | ||
|
||
def __unicode__(self): | ||
return self.summary() | ||
|
@@ -2302,7 +2306,8 @@ def _as_list_of_coords(self, names_or_coords): | |
Convert a name, coord, or list of names/coords to a list of coords. | ||
""" | ||
# If not iterable, convert to list of a single item | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This comment is a little misleading with py3k strings having gained an |
||
if not hasattr(names_or_coords, '__iter__'): | ||
if (not hasattr(names_or_coords, '__iter__') or | ||
isinstance(names_or_coords, str)): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could these checks be simplified to just checking if There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It would also need to check for a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't believe so - this is just checking whether the input is of type There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This method takes a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree with the new logic in these "iterable but not a string" cases -- i.e. it works and does what we want.
My reasoning is that this test really does just the same thing for either Python2 or 3. So, I'm suggesting we should really check for an instance of "basestring" first (equivalently, "six.string_types" for portability). Anyway, I believe the above form is more consistent with our usage elsewhere. |
||
names_or_coords = [names_or_coords] | ||
|
||
coords = [] | ||
|
@@ -2348,7 +2353,8 @@ def slices_over(self, ref_to_slice): | |
|
||
""" | ||
# Required to handle a mix between types. | ||
if not hasattr(ref_to_slice, '__iter__'): | ||
if (not hasattr(ref_to_slice, '__iter__') or | ||
isinstance(ref_to_slice, str)): | ||
ref_to_slice = [ref_to_slice] | ||
|
||
slice_dims = set() | ||
|
@@ -2408,7 +2414,8 @@ def slices(self, ref_to_slice, ordered=True): | |
raise TypeError("'ordered' argument to slices must be boolean.") | ||
|
||
# Required to handle a mix between types | ||
if not hasattr(ref_to_slice, '__iter__'): | ||
if (not hasattr(ref_to_slice, '__iter__') or | ||
isinstance(ref_to_slice, str)): | ||
ref_to_slice = [ref_to_slice] | ||
|
||
dim_to_slice = [] | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -64,11 +64,13 @@ | |
_load_rules = None | ||
|
||
|
||
CENTRE_TITLES = {'egrr': 'U.K. Met Office - Exeter', | ||
'ecmf': 'European Centre for Medium Range Weather Forecasts', | ||
'rjtd': 'Tokyo, Japan Meteorological Agency', | ||
'55' : 'San Francisco', | ||
'kwbc': 'US National Weather Service, National Centres for Environmental Prediction'} | ||
CENTRE_TITLES = { | ||
'egrr': u'U.K. Met Office - Exeter', | ||
'ecmf': u'European Centre for Medium Range Weather Forecasts', | ||
'rjtd': u'Tokyo, Japan Meteorological Agency', | ||
'55': u'San Francisco', | ||
'kwbc': u'US National Weather Service, National Centres for Environmental ' | ||
u'Prediction'} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a change of behaviour for Python 2. |
||
|
||
TIME_RANGE_INDICATORS = {0:'none', 1:'none', 3:'time mean', 4:'time sum', | ||
5:'time _difference', 10:'none', | ||
|
@@ -449,7 +451,7 @@ def _compute_extra_keys(self): | |
#originating centre | ||
#TODO #574 Expand to include sub-centre | ||
self.extra_keys['_originatingCentre'] = CENTRE_TITLES.get( | ||
centre, "unknown centre %s" % centre) | ||
centre, u'unknown centre %s' % centre) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a change of behaviour for Python 2. |
||
|
||
#forecast time unit as a cm string | ||
#TODO #575 Do we want PP or GRIB style forecast delta? | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -78,11 +78,11 @@ def read_header(file_handle): | |
header = {} | ||
header['NAME Version'] = next(file_handle).strip() | ||
for line in file_handle: | ||
words = line.split(':', 1) | ||
words = line.split(b':', 1) | ||
if len(words) != 2: | ||
break | ||
key, value = [word.strip() for word in words] | ||
header[key] = value | ||
header[key.decode()] = value | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It applies to most/all of the changes to the NAME loader, but this is a change to the Python 2 behaviour. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since the keys are always strings, I'm not sure it makes much difference in this case. Python 2 is very lax, e.g., |
||
|
||
# Cast some values into floats or integers if they match a | ||
# given name. Set any empty string values to None. | ||
|
@@ -97,6 +97,8 @@ def read_header(file_handle): | |
'Number of fields', | ||
'Number of series']: | ||
header[key] = int(value) | ||
else: | ||
header[key] = value.decode() | ||
else: | ||
header[key] = None | ||
|
||
|
@@ -118,7 +120,7 @@ def _read_data_arrays(file_handle, n_arrays, shape): | |
for line in file_handle: | ||
# Split the line by comma, removing the last empty column | ||
# caused by the trailing comma | ||
vals = line.split(',')[:-1] | ||
vals = line.split(b',')[:-1] | ||
|
||
# Cast the x and y grid positions to integers and convert | ||
# them to zero based indices | ||
|
@@ -518,7 +520,7 @@ def load_NAMEIII_field(filename): | |
# Loading a file gives a generator of lines which can be progressed using | ||
# the next() function. This will come in handy as we wish to progress | ||
# through the file line by line. | ||
with open(filename, 'r') as file_handle: | ||
with open(filename, 'rb') as file_handle: | ||
# Create a dictionary which can hold the header metadata about this | ||
# file. | ||
header = read_header(file_handle) | ||
|
@@ -536,7 +538,8 @@ def load_NAMEIII_field(filename): | |
'Vertical Av or Int', 'Prob Perc', | ||
'Prob Perc Ens', 'Prob Perc Time', | ||
'Time', 'Z', 'D']: | ||
cols = [col.strip() for col in next(file_handle).split(',')] | ||
cols = [col.strip() | ||
for col in next(file_handle).decode().split(',')] | ||
column_headings[column_header_name] = cols[4:-1] | ||
|
||
# Convert the time to python datetimes. | ||
|
@@ -588,7 +591,7 @@ def load_NAMEII_field(filename): | |
A generator :class:`iris.cube.Cube` instances. | ||
|
||
""" | ||
with open(filename, 'r') as file_handle: | ||
with open(filename, 'rb') as file_handle: | ||
# Create a dictionary which can hold the header metadata about this | ||
# file. | ||
header = read_header(file_handle) | ||
|
@@ -607,7 +610,8 @@ def load_NAMEII_field(filename): | |
for column_header_name in ['Species Category', 'Species', | ||
'Time Av or Int', 'Quantity', | ||
'Unit', 'Z', 'Time']: | ||
cols = [col.strip() for col in next(file_handle).split(',')] | ||
cols = [col.strip() | ||
for col in next(file_handle).decode().split(',')] | ||
column_headings[column_header_name] = cols[4:-1] | ||
|
||
# Convert the time to python datetimes | ||
|
@@ -667,7 +671,7 @@ def load_NAMEIII_timeseries(filename): | |
A generator :class:`iris.cube.Cube` instances. | ||
|
||
""" | ||
with open(filename, 'r') as file_handle: | ||
with open(filename, 'rb') as file_handle: | ||
# Create a dictionary which can hold the header metadata about this | ||
# file. | ||
header = read_header(file_handle) | ||
|
@@ -683,7 +687,8 @@ def load_NAMEIII_timeseries(filename): | |
'Vertical Av or Int', 'Prob Perc', | ||
'Prob Perc Ens', 'Prob Perc Time', | ||
'Location', 'X', 'Y', 'Z', 'D']: | ||
cols = [col.strip() for col in next(file_handle).split(',')] | ||
cols = [col.strip() | ||
for col in next(file_handle).decode().split(',')] | ||
column_headings[column_header_name] = cols[1:-1] | ||
|
||
# Determine the coordinates of the data and store in namedtuples. | ||
|
@@ -707,10 +712,10 @@ def load_NAMEIII_timeseries(filename): | |
for line in file_handle: | ||
# Split the line by comma, removing the last empty column caused | ||
# by the trailing comma. | ||
vals = line.split(',')[:-1] | ||
vals = line.split(b',')[:-1] | ||
|
||
# Time is stored in the first column. | ||
t = vals[0].strip() | ||
t = vals[0].decode().strip() | ||
dt = datetime.datetime.strptime(t, NAMEIII_DATETIME_FORMAT) | ||
time_list.append(dt) | ||
|
||
|
@@ -741,7 +746,7 @@ def load_NAMEII_timeseries(filename): | |
A generator :class:`iris.cube.Cube` instances. | ||
|
||
""" | ||
with open(filename, 'r') as file_handle: | ||
with open(filename, 'rb') as file_handle: | ||
# Create a dictionary which can hold the header metadata about this | ||
# file. | ||
header = read_header(file_handle) | ||
|
@@ -751,7 +756,8 @@ def load_NAMEII_timeseries(filename): | |
for column_header_name in ['Y', 'X', 'Location', | ||
'Species Category', 'Species', | ||
'Quantity', 'Z', 'Unit']: | ||
cols = [col.strip() for col in next(file_handle).split(',')] | ||
cols = [col.strip() | ||
for col in next(file_handle).decode().split(',')] | ||
column_headings[column_header_name] = cols[1:-1] | ||
|
||
# Determine the coordinates of the data and store in namedtuples. | ||
|
@@ -771,10 +777,10 @@ def load_NAMEII_timeseries(filename): | |
for line in file_handle: | ||
# Split the line by comma, removing the last empty column caused | ||
# by the trailing comma. | ||
vals = line.split(',')[:-1] | ||
vals = line.split(b',')[:-1] | ||
|
||
# Time is stored in the first two columns. | ||
t = (vals[0].strip() + ' ' + vals[1].strip()) | ||
t = (vals[0].strip() + b' ' + vals[1].strip()).decode() | ||
dt = datetime.datetime.strptime( | ||
t, NAMEII_TIMESERIES_DATETIME_FORMAT) | ||
time_list.append(dt) | ||
|
@@ -809,21 +815,22 @@ def load_NAMEIII_trajectory(filename): | |
time_unit = iris.unit.Unit('hours since epoch', | ||
calendar=iris.unit.CALENDAR_GREGORIAN) | ||
|
||
with open(filename, 'r') as infile: | ||
with open(filename, 'rb') as infile: | ||
header = read_header(infile) | ||
|
||
# read the column headings | ||
for line in infile: | ||
if line.startswith(" "): | ||
if line.startswith(b' '): | ||
break | ||
headings = [heading.strip() for heading in line.split(",")] | ||
headings = [heading.strip() for heading in line.decode().split(',')] | ||
|
||
# read the columns | ||
columns = [[] for i in range(len(headings))] | ||
for line in infile: | ||
values = [v.strip() for v in line.split(",")] | ||
values = [v.strip() for v in line.split(b',')] | ||
for c, v in enumerate(values): | ||
if "UTC" in v: | ||
if b'UTC' in v: | ||
v = v.decode() | ||
v = v.replace(":00 ", " ") # Strip out milliseconds. | ||
v = datetime.datetime.strptime(v, NAMEIII_DATETIME_FORMAT) | ||
else: | ||
|
@@ -872,6 +879,8 @@ def load_NAMEIII_trajectory(filename): | |
elif name == "Z (FL)": | ||
name = "flight_level" | ||
long_name = name | ||
elif values[0].dtype.kind == 'S': | ||
values = [v.decode() for v in values] | ||
|
||
try: | ||
coord = DimCoord(values, units=units) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This changes the Python 2 behaviour.