CLN: Replace comprehensions list/set/dict functions with correspondin…

…g symbols (#18383)
pandas-dev · Nov 24, 2017 · 4fce784 · 4fce784
1 parent aec3347
commit 4fce784
Show file tree

Hide file tree

Showing 51 changed files with 160 additions and 172 deletions.
diff --git a/asv_bench/benchmarks/frame_ctor.py b/asv_bench/benchmarks/frame_ctor.py
@@ -23,9 +23,9 @@ def setup(self):
         self.some_dict = list(self.data.values())[0]
         self.dict_list = [dict(zip(self.columns, row)) for row in self.frame.values]
 
-        self.data2 = dict(
-            ((i, dict(((j, float(j)) for j in range(100)))) for i in
-             range(2000)))
+        self.data2 = {i: {j: float(j) for j in range(100)}
+                      for i in range(2000)}
+
 
     def time_frame_ctor_list_of_dict(self):
         DataFrame(self.dict_list)

diff --git a/asv_bench/benchmarks/packers.py b/asv_bench/benchmarks/packers.py
@@ -18,7 +18,7 @@ def _setup(self):
         self.N = 100000
         self.C = 5
         self.index = date_range('20000101', periods=self.N, freq='H')
-        self.df = DataFrame(dict(('float{0}'.format(i), randn(self.N)) for i in range(self.C)), index=self.index)
+        self.df = DataFrame({'float{0}'.format(i): randn(self.N) for i in range(self.C)}, index=self.index)
         self.df2 = self.df.copy()
         self.df2['object'] = [('%08x' % randrange((16 ** 8))) for _ in range(self.N)]
         self.remove(self.f)

diff --git a/asv_bench/benchmarks/replace.py b/asv_bench/benchmarks/replace.py
@@ -23,7 +23,7 @@ class replace_large_dict(object):
     def setup(self):
         self.n = (10 ** 6)
         self.start_value = (10 ** 5)
-        self.to_rep = dict(((i, (self.start_value + i)) for i in range(self.n)))
+        self.to_rep = {i: self.start_value + i for i in range(self.n)}
         self.s = Series(np.random.randint(self.n, size=(10 ** 3)))
 
     def time_replace_large_dict(self):
@@ -35,8 +35,8 @@ class replace_convert(object):
 
     def setup(self):
         self.n = (10 ** 3)
-        self.to_ts = dict(((i, pd.Timestamp(i)) for i in range(self.n)))
-        self.to_td = dict(((i, pd.Timedelta(i)) for i in range(self.n)))
+        self.to_ts = {i: pd.Timestamp(i) for i in range(self.n)}
+        self.to_td = {i: pd.Timedelta(i) for i in range(self.n)}
         self.s = Series(np.random.randint(self.n, size=(10 ** 3)))
         self.df = DataFrame({'A': np.random.randint(self.n, size=(10 ** 3)),
                              'B': np.random.randint(self.n, size=(10 ** 3))})

diff --git a/doc/sphinxext/numpydoc/phantom_import.py b/doc/sphinxext/numpydoc/phantom_import.py
@@ -60,8 +60,8 @@ def import_phantom_module(xml_file):
     # Sort items so that
     # - Base classes come before classes inherited from them
     # - Modules come before their contents
-    all_nodes = dict((n.attrib['id'], n) for n in root)
-    
+    all_nodes = {n.attrib['id']: n for n in root}
+
     def _get_bases(node, recurse=False):
         bases = [x.attrib['ref'] for x in node.findall('base')]
         if recurse:

diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx
@@ -33,7 +33,7 @@ from np_datetime cimport (pandas_datetimestruct,
 _MONTHS = ['JAN', 'FEB', 'MAR', 'APR', 'MAY', 'JUN', 'JUL',
            'AUG', 'SEP', 'OCT', 'NOV', 'DEC']
 _int_to_month = {(k + 1): v for k, v in enumerate(_MONTHS)}
-_month_to_int = dict((v, k) for k, v in _int_to_month.items())
+_month_to_int = {v: k for k, v in _int_to_month.items()}
 
 
 class WeekDay(object):

diff --git a/pandas/_libs/tslibs/resolution.pyx b/pandas/_libs/tslibs/resolution.pyx
@@ -53,7 +53,7 @@ _ONE_HOUR = 60 * _ONE_MINUTE
 _ONE_DAY = 24 * _ONE_HOUR
 
 DAYS = ['MON', 'TUE', 'WED', 'THU', 'FRI', 'SAT', 'SUN']
-_weekday_rule_aliases = dict((k, v) for k, v in enumerate(DAYS))
+_weekday_rule_aliases = {k: v for k, v in enumerate(DAYS)}
 
 _MONTHS = ['JAN', 'FEB', 'MAR', 'APR', 'MAY', 'JUN', 'JUL',
            'AUG', 'SEP', 'OCT', 'NOV', 'DEC']

diff --git a/pandas/_version.py b/pandas/_version.py
@@ -141,11 +141,11 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose):
         if verbose:
             print("keywords are unexpanded, not using")
         raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
-    refs = set(r.strip() for r in refnames.strip("()").split(","))
+    refs = {r.strip() for r in refnames.strip("()").split(",")}
     # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
     # just "foo-1.0". If we see a "tag: " prefix, prefer those.
     TAG = "tag: "
-    tags = set(r[len(TAG):] for r in refs if r.startswith(TAG))
+    tags = {r[len(TAG):] for r in refs if r.startswith(TAG)}
     if not tags:
         # Either we're using git < 1.8.3, or there really are no tags. We use
         # a heuristic: assume all version tags have a digit. The old git %d
@@ -154,7 +154,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose):
         # between branches and tags. By ignoring refnames without digits, we
         # filter out many common branch names like "release" and
         # "stabilization", as well as "HEAD" and "master".
-        tags = set(r for r in refs if re.search(r'\d', r))
+        tags = {r for r in refs if re.search(r'\d', r)}
         if verbose:
             print("discarding '{}', no digits".format(",".join(refs - tags)))
     if verbose:

diff --git a/pandas/core/common.py b/pandas/core/common.py
@@ -347,7 +347,7 @@ def map_indices_py(arr):
     Returns a dictionary with (element, index) pairs for each element in the
     given array/list
     """
-    return dict((x, i) for i, x in enumerate(arr))
+    return {x: i for i, x in enumerate(arr)}
 
 
 def union(*seqs):

diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
@@ -459,7 +459,7 @@ def _concat_datetimetz(to_concat, name=None):
     it is used in DatetimeIndex.append also
     """
     # do not pass tz to set because tzlocal cannot be hashed
-    if len(set(str(x.dtype) for x in to_concat)) != 1:
+    if len({str(x.dtype) for x in to_concat}) != 1:
         raise ValueError('to_concat must have the same tz')
     tz = to_concat[0].tz
     # no need to localize because internal repr will not be changed
@@ -525,7 +525,7 @@ def convert_sparse(x, axis):
     if len(typs) == 1:
         # concat input as it is if all inputs are sparse
         # and have the same fill_value
-        fill_values = set(c.fill_value for c in to_concat)
+        fill_values = {c.fill_value for c in to_concat}
         if len(fill_values) == 1:
             sp_values = [c.sp_values for c in to_concat]
             indexes = [c.sp_index.to_int_index() for c in to_concat]

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -347,7 +347,7 @@ def __init__(self, data=None, index=None, columns=None, dtype=None,
         elif isinstance(data, (np.ndarray, Series, Index)):
             if data.dtype.names:
                 data_columns = list(data.dtype.names)
-                data = dict((k, data[k]) for k in data_columns)
+                data = {k: data[k] for k in data_columns}
                 if columns is None:
                     columns = data_columns
                 mgr = self._init_dict(data, index, columns, dtype=dtype)
@@ -417,8 +417,7 @@ def _init_dict(self, data, index, columns, dtype=None):
                 extract_index(list(data.values()))
 
             # prefilter if columns passed
-            data = dict((k, v) for k, v in compat.iteritems(data)
-                        if k in columns)
+            data = {k: v for k, v in compat.iteritems(data) if k in columns}
 
             if index is None:
                 index = extract_index(list(data.values()))
@@ -3895,7 +3894,7 @@ def f(col):
                     return self._constructor_sliced(r, index=new_index,
                                                     dtype=r.dtype)
 
-                result = dict((col, f(col)) for col in this)
+                result = {col: f(col) for col in this}
 
             # non-unique
             else:
@@ -3906,7 +3905,7 @@ def f(i):
                     return self._constructor_sliced(r, index=new_index,
                                                     dtype=r.dtype)
 
-                result = dict((i, f(i)) for i, col in enumerate(this.columns))
+                result = {i: f(i) for i, col in enumerate(this.columns)}
                 result = self._constructor(result, index=new_index, copy=False)
                 result.columns = new_columns
                 return result
@@ -3984,7 +3983,7 @@ def _compare_frame_evaluate(self, other, func, str_rep, try_cast=True):
         if self.columns.is_unique:
 
             def _compare(a, b):
-                return dict((col, func(a[col], b[col])) for col in a.columns)
+                return {col: func(a[col], b[col]) for col in a.columns}
 
             new_data = expressions.evaluate(_compare, str_rep, self, other)
             return self._constructor(data=new_data, index=self.index,
@@ -3993,8 +3992,8 @@ def _compare(a, b):
         else:
 
             def _compare(a, b):
-                return dict((i, func(a.iloc[:, i], b.iloc[:, i]))
-                            for i, col in enumerate(a.columns))
+                return {i: func(a.iloc[:, i], b.iloc[:, i])
+                        for i, col in enumerate(a.columns)}
 
             new_data = expressions.evaluate(_compare, str_rep, self, other)
             result = self._constructor(data=new_data, index=self.index,

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -235,10 +235,10 @@ def _setup_axes(cls, axes, info_axis=None, stat_axis=None, aliases=None,
         """
 
         cls._AXIS_ORDERS = axes
-        cls._AXIS_NUMBERS = dict((a, i) for i, a in enumerate(axes))
+        cls._AXIS_NUMBERS = {a: i for i, a in enumerate(axes)}
         cls._AXIS_LEN = len(axes)
         cls._AXIS_ALIASES = aliases or dict()
-        cls._AXIS_IALIASES = dict((v, k) for k, v in cls._AXIS_ALIASES.items())
+        cls._AXIS_IALIASES = {v: k for k, v in cls._AXIS_ALIASES.items()}
         cls._AXIS_NAMES = dict(enumerate(axes))
         cls._AXIS_SLICEMAP = slicers or None
         cls._AXIS_REVERSED = axes_are_reversed
@@ -279,21 +279,21 @@ def set_axis(a, i):
 
     def _construct_axes_dict(self, axes=None, **kwargs):
         """Return an axes dictionary for myself."""
-        d = dict((a, self._get_axis(a)) for a in (axes or self._AXIS_ORDERS))
+        d = {a: self._get_axis(a) for a in (axes or self._AXIS_ORDERS)}
         d.update(kwargs)
         return d
 
     @staticmethod
     def _construct_axes_dict_from(self, axes, **kwargs):
         """Return an axes dictionary for the passed axes."""
-        d = dict((a, ax) for a, ax in zip(self._AXIS_ORDERS, axes))
+        d = {a: ax for a, ax in zip(self._AXIS_ORDERS, axes)}
         d.update(kwargs)
         return d
 
     def _construct_axes_dict_for_slice(self, axes=None, **kwargs):
         """Return an axes dictionary for myself."""
-        d = dict((self._AXIS_SLICEMAP[a], self._get_axis(a))
-                 for a in (axes or self._AXIS_ORDERS))
+        d = {self._AXIS_SLICEMAP[a]: self._get_axis(a)
+             for a in (axes or self._AXIS_ORDERS)}
         d.update(kwargs)
         return d
 
@@ -329,7 +329,7 @@ def _construct_axes_from_arguments(self, args, kwargs, require_all=False):
                         raise TypeError("not enough/duplicate arguments "
                                         "specified!")
 
-        axes = dict((a, kwargs.pop(a, None)) for a in self._AXIS_ORDERS)
+        axes = {a: kwargs.pop(a, None) for a in self._AXIS_ORDERS}
         return axes, kwargs
 
     @classmethod
@@ -1172,7 +1172,7 @@ def to_dense(self):
     # Picklability
 
     def __getstate__(self):
-        meta = dict((k, getattr(self, k, None)) for k in self._metadata)
+        meta = {k: getattr(self, k, None) for k in self._metadata}
         return dict(_data=self._data, _typ=self._typ, _metadata=self._metadata,
                     **meta)
 
@@ -4277,8 +4277,8 @@ def fillna(self, value=None, method=None, axis=None, inplace=False,
             elif self.ndim == 3:
 
                 # fill in 2d chunks
-                result = dict((col, s.fillna(method=method, value=value))
-                              for col, s in self.iteritems())
+                result = {col: s.fillna(method=method, value=value)
+                          for col, s in self.iteritems()}
                 new_obj = self._constructor.\
                     from_dict(result).__finalize__(self)
                 new_data = new_obj._data
@@ -5681,7 +5681,7 @@ def align(self, other, join='outer', axis=None, level=None, copy=True,
                 # this means other is a DataFrame, and we need to broadcast
                 # self
                 cons = self._constructor_expanddim
-                df = cons(dict((c, self) for c in other.columns),
+                df = cons({c: self for c in other.columns},
                           **other._construct_axes_dict())
                 return df._align_frame(other, join=join, axis=axis,
                                        level=level, copy=copy,
@@ -5691,7 +5691,7 @@ def align(self, other, join='outer', axis=None, level=None, copy=True,
                 # this means self is a DataFrame, and we need to broadcast
                 # other
                 cons = other._constructor_expanddim
-                df = cons(dict((c, other) for c in self.columns),
+                df = cons({c: other for c in self.columns},
                           **self._construct_axes_dict())
                 return self._align_frame(df, join=join, axis=axis, level=level,
                                          copy=copy, fill_value=fill_value,

diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
@@ -3840,7 +3840,7 @@ def first_not_none(values):
                             # if all series have a consistent name.  If the
                             # series do not have a consistent name, do
                             # nothing.
-                            names = set(v.name for v in values)
+                            names = {v.name for v in values}
                             if len(names) == 1:
                                 index.name = list(names)[0]
 

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -732,7 +732,7 @@ def _coerce_to_ndarray(cls, data):
 
     def _get_attributes_dict(self):
         """ return an attributes dict for my class """
-        return dict((k, getattr(self, k, None)) for k in self._attributes)
+        return {k: getattr(self, k, None) for k in self._attributes}
 
     def view(self, cls=None):
 
@@ -1784,7 +1784,7 @@ def append(self, other):
             if not isinstance(obj, Index):
                 raise TypeError('all inputs must be Index')
 
-        names = set(obj.name for obj in to_concat)
+        names = {obj.name for obj in to_concat}
         name = None if len(names) > 1 else self.name
 
         return self._concat(to_concat, name)

diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py
@@ -1024,7 +1024,7 @@ def _concat_same_dtype(self, to_concat, name):
         assert that we all have the same .closed
         we allow a 0-len index here as well
         """
-        if not len(set(i.closed for i in to_concat if len(i))) == 1:
+        if not len({i.closed for i in to_concat if len(i)}) == 1:
             msg = ('can only append two IntervalIndex objects '
                    'that are closed on the same side')
             raise ValueError(msg)

diff --git a/pandas/core/internals.py b/pandas/core/internals.py
@@ -3377,7 +3377,7 @@ def reduction(self, f, axis=0, consolidate=True, transposed=False,
             blocks.append(block)
 
         # note that some DatetimeTZ, Categorical are always ndim==1
-        ndim = set(b.ndim for b in blocks)
+        ndim = {b.ndim for b in blocks}
 
         if 2 in ndim:
 
@@ -3891,7 +3891,7 @@ def get_scalar(self, tup):
         """
         Retrieve single item
         """
-        full_loc = list(ax.get_loc(x) for ax, x in zip(self.axes, tup))
+        full_loc = [ax.get_loc(x) for ax, x in zip(self.axes, tup)]
         blk = self.blocks[self._blknos[full_loc[0]]]
         values = blk.values
 
@@ -4871,7 +4871,7 @@ def _merge_blocks(blocks, dtype=None, _can_consolidate=True):
     if _can_consolidate:
 
         if dtype is None:
-            if len(set(b.dtype for b in blocks)) != 1:
+            if len({b.dtype for b in blocks}) != 1:
                 raise AssertionError("_merge_blocks are invalid!")
             dtype = blocks[0].dtype
 

diff --git a/pandas/core/ops.py b/pandas/core/ops.py
@@ -146,7 +146,7 @@ def names(x):
             construct_result=_construct_divmod_result,
         )
 
-    new_methods = dict((names(k), v) for k, v in new_methods.items())
+    new_methods = {names(k): v for k, v in new_methods.items()}
     return new_methods
 
 

diff --git a/pandas/core/panelnd.py b/pandas/core/panelnd.py
@@ -105,7 +105,7 @@ def _combine_with_constructor(self, other, func):
             new_axes.append(getattr(self, a).union(getattr(other, a)))
 
         # reindex: could check that everything's the same size, but forget it
-        d = dict((a, ax) for a, ax in zip(self._AXIS_ORDERS, new_axes))
+        d = {a: ax for a, ax in zip(self._AXIS_ORDERS, new_axes)}
         d['copy'] = False
         this = self.reindex(**d)
         other = other.reindex(**d)

diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py
@@ -568,7 +568,7 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None):
             names = list(names)
         else:
             # make sure that all of the passed indices have the same nlevels
-            if not len(set(idx.nlevels for idx in indexes)) == 1:
+            if not len({idx.nlevels for idx in indexes}) == 1:
                 raise AssertionError("Cannot concat indices that do"
                                      " not have the same number of levels")
 

diff --git a/pandas/core/reshape/melt.py b/pandas/core/reshape/melt.py
@@ -149,7 +149,7 @@ def lreshape(data, groups, dropna=True, label=None):
         for c in pivot_cols:
             mask &= notna(mdata[c])
         if not mask.all():
-            mdata = dict((k, v[mask]) for k, v in compat.iteritems(mdata))
+            mdata = {k: v[mask] for k, v in compat.iteritems(mdata)}
 
     from pandas import DataFrame
     return DataFrame(mdata, columns=id_cols + pivot_cols)

diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py
@@ -131,8 +131,7 @@ def _init_dict(self, data, index, columns, dtype=None):
         # pre-filter out columns if we passed it
         if columns is not None:
             columns = _ensure_index(columns)
-            data = dict((k, v) for k, v in compat.iteritems(data)
-                        if k in columns)
+            data = {k: v for k, v in compat.iteritems(data) if k in columns}
         else:
             columns = Index(_try_sort(list(data.keys())))
 
@@ -173,7 +172,7 @@ def _init_matrix(self, data, index, columns, dtype=None):
         """ Init self from ndarray or list of lists """
         data = _prep_ndarray(data, copy=False)
         index, columns = self._prep_index(data, index, columns)
-        data = dict((idx, data[:, i]) for i, idx in enumerate(columns))
+        data = {idx: data[:, i] for i, idx in enumerate(columns)}
         return self._init_dict(data, index, columns, dtype)
 
     def _init_spmatrix(self, data, index, columns, dtype=None,
@@ -307,7 +306,7 @@ def to_dense(self):
         -------
         df : DataFrame
         """
-        data = dict((k, v.to_dense()) for k, v in compat.iteritems(self))
+        data = {k: v.to_dense() for k, v in compat.iteritems(self)}
         return DataFrame(data, index=self.index, columns=self.columns)
 
     def _apply_columns(self, func):
@@ -697,7 +696,7 @@ def _reindex_columns(self, columns, method, copy, level, fill_value=None,
             raise NotImplementedError("'method' argument is not supported")
 
         # TODO: fill value handling
-        sdict = dict((k, v) for k, v in compat.iteritems(self) if k in columns)
+        sdict = {k: v for k, v in compat.iteritems(self) if k in columns}
         return self._constructor(
             sdict, index=self.index, columns=columns,
             default_fill_value=self._default_fill_value).__finalize__(self)