Changed x.__class__ to type(x)

pandas-dev · Nov 26, 2019 · 7816ae5 · 7816ae5
1 parent 6332b1e
commit 7816ae5
Show file tree

Hide file tree

Showing 138 changed files with 2,899 additions and 1,345 deletions.
diff --git a/asv_bench/benchmarks/categoricals.py b/asv_bench/benchmarks/categoricals.py
@@ -14,21 +14,6 @@
         pass
 
 
-class Concat:
-    def setup(self):
-        N = 10 ** 5
-        self.s = pd.Series(list("aabbcd") * N).astype("category")
-
-        self.a = pd.Categorical(list("aabbcd") * N)
-        self.b = pd.Categorical(list("bbcdjk") * N)
-
-    def time_concat(self):
-        pd.concat([self.s, self.s])
-
-    def time_union(self):
-        union_categoricals([self.a, self.b])
-
-
 class Constructor:
     def setup(self):
         N = 10 ** 5
@@ -77,6 +62,33 @@ def time_existing_series(self):
         pd.Categorical(self.series)
 
 
+class CategoricalOps:
+    params = ["__lt__", "__le__", "__eq__", "__ne__", "__ge__", "__gt__"]
+    param_names = ["op"]
+
+    def setup(self, op):
+        N = 10 ** 5
+        self.cat = pd.Categorical(list("aabbcd") * N, ordered=True)
+
+    def time_categorical_op(self, op):
+        getattr(self.cat, op)("b")
+
+
+class Concat:
+    def setup(self):
+        N = 10 ** 5
+        self.s = pd.Series(list("aabbcd") * N).astype("category")
+
+        self.a = pd.Categorical(list("aabbcd") * N)
+        self.b = pd.Categorical(list("bbcdjk") * N)
+
+    def time_concat(self):
+        pd.concat([self.s, self.s])
+
+    def time_union(self):
+        union_categoricals([self.a, self.b])
+
+
 class ValueCounts:
 
     params = [True, False]

diff --git a/ci/azure/posix.yml b/ci/azure/posix.yml
@@ -44,16 +44,13 @@ jobs:
           PATTERN: "not slow and not network"
           LOCALE_OVERRIDE: "zh_CN.UTF-8"
 
-        # https://github.com/pandas-dev/pandas/issues/29432
-        # py37_np_dev:
-        #   ENV_FILE: ci/deps/azure-37-numpydev.yaml
-        #   CONDA_PY: "37"
-        #   PATTERN: "not slow and not network"
-        #   TEST_ARGS: "-W error"
-        #   PANDAS_TESTING_MODE: "deprecate"
-        #   EXTRA_APT: "xsel"
-        #   # TODO:
-        #   continueOnError: true
+        py37_np_dev:
+          ENV_FILE: ci/deps/azure-37-numpydev.yaml
+          CONDA_PY: "37"
+          PATTERN: "not slow and not network"
+          TEST_ARGS: "-W error"
+          PANDAS_TESTING_MODE: "deprecate"
+          EXTRA_APT: "xsel"
 
   steps:
     - script: |

diff --git a/ci/deps/azure-macos-36.yaml b/ci/deps/azure-macos-36.yaml
@@ -20,9 +20,9 @@ dependencies:
   - matplotlib=2.2.3
   - nomkl
   - numexpr
-  - numpy=1.13.3
+  - numpy=1.14
   - openpyxl
-  - pyarrow
+  - pyarrow>=0.12.0
   - pytables
   - python-dateutil==2.6.1
   - pytz

diff --git a/ci/deps/azure-windows-36.yaml b/ci/deps/azure-windows-36.yaml
@@ -20,7 +20,7 @@ dependencies:
   - numexpr
   - numpy=1.15.*
   - openpyxl
-  - pyarrow
+  - pyarrow>=0.12.0
   - pytables
   - python-dateutil
   - pytz

diff --git a/doc/redirects.csv b/doc/redirects.csv
@@ -828,7 +828,6 @@ generated/pandas.MultiIndex.sortlevel,../reference/api/pandas.MultiIndex.sortlev
 generated/pandas.MultiIndex.swaplevel,../reference/api/pandas.MultiIndex.swaplevel
 generated/pandas.MultiIndex.to_flat_index,../reference/api/pandas.MultiIndex.to_flat_index
 generated/pandas.MultiIndex.to_frame,../reference/api/pandas.MultiIndex.to_frame
-generated/pandas.MultiIndex.to_hierarchical,../reference/api/pandas.MultiIndex.to_hierarchical
 generated/pandas.notna,../reference/api/pandas.notna
 generated/pandas.notnull,../reference/api/pandas.notnull
 generated/pandas.option_context,../reference/api/pandas.option_context

diff --git a/doc/source/development/index.rst b/doc/source/development/index.rst
@@ -19,3 +19,4 @@ Development
     developer
     policies
     roadmap
+    meeting
diff --git a/doc/source/development/meeting.rst b/doc/source/development/meeting.rst
@@ -0,0 +1,32 @@
+.. _meeting:
+
+==================
+Developer Meetings
+==================
+
+We hold regular developer meetings on the second Wednesday
+of each month at 18:00 UTC. These meetings and their minutes are open to
+the public. All are welcome to join.
+
+Minutes
+-------
+
+The minutes of past meetings are available in `this Google Document <https://docs.google.com/document/d/1tGbTiYORHiSPgVMXawiweGJlBw5dOkVJLY-licoBmBU/edit?usp=sharing>`__.
+
+Calendar
+--------
+
+This calendar shows all the developer meetings.
+
+.. raw:: html
+
+   <iframe src="https://calendar.google.com/calendar/embed?src=pgbn14p6poja8a1cf2dv2jhrmg%40group.calendar.google.com" style="border: 0" width="800" height="600" frameborder="0" scrolling="no"></iframe>
+
+You can subscribe to this calendar with the following links:
+
+* `iCal <https://calendar.google.com/calendar/ical/pgbn14p6poja8a1cf2dv2jhrmg%40group.calendar.google.com/public/basic.ics>`__
+* `Google calendar <https://calendar.google.com/calendar/embed?src=pgbn14p6poja8a1cf2dv2jhrmg%40group.calendar.google.com>`__
+
+Additionally, we'll sometimes have one-off meetings on specific topics.
+These will be published on the same calendar.
+
diff --git a/doc/source/getting_started/basics.rst b/doc/source/getting_started/basics.rst
@@ -1950,6 +1950,7 @@ sparse              :class:`SparseDtype`      (none)             :class:`arrays.
 intervals           :class:`IntervalDtype`    :class:`Interval`  :class:`arrays.IntervalArray` :ref:`advanced.intervalindex`
 nullable integer    :class:`Int64Dtype`, ...  (none)             :class:`arrays.IntegerArray`  :ref:`integer_na`
 Strings             :class:`StringDtype`      :class:`str`       :class:`arrays.StringArray`   :ref:`text`
+Boolean (with NA)   :class:`BooleanDtype`     :class:`bool`      :class:`arrays.BooleanArray`  :ref:`api.arrays.bool`
 =================== ========================= ================== ============================= =============================
 
 Pandas has two ways to store strings.

diff --git a/doc/source/getting_started/install.rst b/doc/source/getting_started/install.rst
@@ -258,7 +258,7 @@ matplotlib                2.2.2              Visualization
 openpyxl                  2.4.8              Reading / writing for xlsx files
 pandas-gbq                0.8.0              Google Big Query access
 psycopg2                                     PostgreSQL engine for sqlalchemy
-pyarrow                   0.9.0              Parquet and feather reading / writing
+pyarrow                   0.12.0             Parquet and feather reading / writing
 pymysql                   0.7.11             MySQL engine for sqlalchemy
 pyreadstat                                   SPSS files (.sav) reading
 pytables                  3.4.2              HDF5 reading / writing

diff --git a/doc/source/reference/arrays.rst b/doc/source/reference/arrays.rst
@@ -25,6 +25,7 @@ Nullable Integer    :class:`Int64Dtype`, ...  (none)             :ref:`api.array
 Categorical         :class:`CategoricalDtype` (none)             :ref:`api.arrays.categorical`
 Sparse              :class:`SparseDtype`      (none)             :ref:`api.arrays.sparse`
 Strings             :class:`StringDtype`      :class:`str`       :ref:`api.arrays.string`
+Boolean (with NA)   :class:`BooleanDtype`     :class:`bool`      :ref:`api.arrays.bool`
 =================== ========================= ================== =============================
 
 Pandas and third-party libraries can extend NumPy's type system (see :ref:`extending.extension-types`).
@@ -485,6 +486,28 @@ The ``Series.str`` accessor is available for ``Series`` backed by a :class:`arra
 See :ref:`api.series.str` for more.
 
 
+.. _api.arrays.bool:
+
+Boolean data with missing values
+--------------------------------
+
+The boolean dtype (with the alias ``"boolean"``) provides support for storing
+boolean data (True, False values) with missing values, which is not possible
+with a bool :class:`numpy.ndarray`.
+
+.. autosummary::
+   :toctree: api/
+   :template: autosummary/class_without_autosummary.rst
+
+   arrays.BooleanArray
+
+.. autosummary::
+   :toctree: api/
+   :template: autosummary/class_without_autosummary.rst
+
+   BooleanDtype
+
+
 .. Dtype attributes which are manually listed in their docstrings: including
 .. it here to make sure a docstring page is built for them
 

diff --git a/doc/source/reference/indexing.rst b/doc/source/reference/indexing.rst
@@ -305,7 +305,6 @@ MultiIndex components
 
    MultiIndex.set_levels
    MultiIndex.set_codes
-   MultiIndex.to_hierarchical
    MultiIndex.to_flat_index
    MultiIndex.to_frame
    MultiIndex.is_lexsorted

diff --git a/doc/source/reference/style.rst b/doc/source/reference/style.rst
@@ -41,6 +41,7 @@ Style application
    Styler.set_caption
    Styler.set_properties
    Styler.set_uuid
+   Styler.set_na_rep
    Styler.clear
    Styler.pipe
 

diff --git a/doc/source/user_guide/scale.rst b/doc/source/user_guide/scale.rst
@@ -93,9 +93,9 @@ Use efficient datatypes
 -----------------------
 
 The default pandas data types are not the most memory efficient. This is
-especially true for high-cardinality text data (columns with relatively few
-unique values). By using more efficient data types you can store larger datasets
-in memory.
+especially true for text data columns with relatively few unique values (commonly
+referred to as "low-cardinality" data). By using more efficient data types you
+can store larger datasets in memory.
 
 .. ipython:: python
 

diff --git a/doc/source/user_guide/style.ipynb b/doc/source/user_guide/style.ipynb
@@ -67,6 +67,7 @@
     "df = pd.DataFrame({'A': np.linspace(1, 10, 10)})\n",
     "df = pd.concat([df, pd.DataFrame(np.random.randn(10, 4), columns=list('BCDE'))],\n",
     "               axis=1)\n",
+    "df.iloc[3, 3] = np.nan\n",
     "df.iloc[0, 2] = np.nan"
    ]
   },
@@ -402,6 +403,38 @@
     "df.style.format({\"B\": lambda x: \"±{:.2f}\".format(abs(x))})"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "You can format the text displayed for missing values by `na_rep`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df.style.format(\"{:.2%}\", na_rep=\"-\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "These formatting techniques can be used in combination with styling."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df.style.highlight_max().format(None, na_rep=\"-\")"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -659,6 +692,7 @@
     "- precision\n",
     "- captions\n",
     "- table-wide styles\n",
+    "- missing values representation\n",
     "- hiding the index or columns\n",
     "\n",
     "Each of these can be specified in two ways:\n",
@@ -800,6 +834,32 @@
     "We hope to collect some useful ones either in pandas, or preferable in a new package that [builds on top](#Extensibility) the tools here."
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Missing values"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "You can control the default missing values representation for the entire table through `set_na_rep` method."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "(df.style\n",
+    "   .set_na_rep(\"FAIL\")\n",
+    "   .format(None, na_rep=\"PASS\", subset=[\"D\"])\n",
+    "   .highlight_null(\"yellow\"))"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},