Skip to content

Commit

Permalink
Changed x.__class__ to type(x)
Browse files Browse the repository at this point in the history
  • Loading branch information
MomIsBestFriend committed Nov 26, 2019
2 parents 61cf55d + 853ec9a commit 6aa8ced
Show file tree
Hide file tree
Showing 109 changed files with 2,803 additions and 1,243 deletions.
42 changes: 27 additions & 15 deletions asv_bench/benchmarks/categoricals.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,21 +14,6 @@
pass


class Concat:
def setup(self):
N = 10 ** 5
self.s = pd.Series(list("aabbcd") * N).astype("category")

self.a = pd.Categorical(list("aabbcd") * N)
self.b = pd.Categorical(list("bbcdjk") * N)

def time_concat(self):
pd.concat([self.s, self.s])

def time_union(self):
union_categoricals([self.a, self.b])


class Constructor:
def setup(self):
N = 10 ** 5
Expand Down Expand Up @@ -77,6 +62,33 @@ def time_existing_series(self):
pd.Categorical(self.series)


class CategoricalOps:
params = ["__lt__", "__le__", "__eq__", "__ne__", "__ge__", "__gt__"]
param_names = ["op"]

def setup(self, op):
N = 10 ** 5
self.cat = pd.Categorical(list("aabbcd") * N, ordered=True)

def time_categorical_op(self, op):
getattr(self.cat, op)("b")


class Concat:
def setup(self):
N = 10 ** 5
self.s = pd.Series(list("aabbcd") * N).astype("category")

self.a = pd.Categorical(list("aabbcd") * N)
self.b = pd.Categorical(list("bbcdjk") * N)

def time_concat(self):
pd.concat([self.s, self.s])

def time_union(self):
union_categoricals([self.a, self.b])


class ValueCounts:

params = [True, False]
Expand Down
17 changes: 7 additions & 10 deletions ci/azure/posix.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,16 +44,13 @@ jobs:
PATTERN: "not slow and not network"
LOCALE_OVERRIDE: "zh_CN.UTF-8"

# https://github.com/pandas-dev/pandas/issues/29432
# py37_np_dev:
# ENV_FILE: ci/deps/azure-37-numpydev.yaml
# CONDA_PY: "37"
# PATTERN: "not slow and not network"
# TEST_ARGS: "-W error"
# PANDAS_TESTING_MODE: "deprecate"
# EXTRA_APT: "xsel"
# # TODO:
# continueOnError: true
py37_np_dev:
ENV_FILE: ci/deps/azure-37-numpydev.yaml
CONDA_PY: "37"
PATTERN: "not slow and not network"
TEST_ARGS: "-W error"
PANDAS_TESTING_MODE: "deprecate"
EXTRA_APT: "xsel"

steps:
- script: |
Expand Down
4 changes: 2 additions & 2 deletions ci/deps/azure-macos-36.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@ dependencies:
- matplotlib=2.2.3
- nomkl
- numexpr
- numpy=1.13.3
- numpy=1.14
- openpyxl
- pyarrow
- pyarrow>=0.12.0
- pytables
- python-dateutil==2.6.1
- pytz
Expand Down
2 changes: 1 addition & 1 deletion ci/deps/azure-windows-36.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ dependencies:
- numexpr
- numpy=1.15.*
- openpyxl
- pyarrow
- pyarrow>=0.12.0
- pytables
- python-dateutil
- pytz
Expand Down
1 change: 0 additions & 1 deletion doc/redirects.csv
Original file line number Diff line number Diff line change
Expand Up @@ -828,7 +828,6 @@ generated/pandas.MultiIndex.sortlevel,../reference/api/pandas.MultiIndex.sortlev
generated/pandas.MultiIndex.swaplevel,../reference/api/pandas.MultiIndex.swaplevel
generated/pandas.MultiIndex.to_flat_index,../reference/api/pandas.MultiIndex.to_flat_index
generated/pandas.MultiIndex.to_frame,../reference/api/pandas.MultiIndex.to_frame
generated/pandas.MultiIndex.to_hierarchical,../reference/api/pandas.MultiIndex.to_hierarchical
generated/pandas.notna,../reference/api/pandas.notna
generated/pandas.notnull,../reference/api/pandas.notnull
generated/pandas.option_context,../reference/api/pandas.option_context
Expand Down
1 change: 1 addition & 0 deletions doc/source/development/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,4 @@ Development
developer
policies
roadmap
meeting
32 changes: 32 additions & 0 deletions doc/source/development/meeting.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
.. _meeting:

==================
Developer Meetings
==================

We hold regular developer meetings on the second Wednesday
of each month at 18:00 UTC. These meetings and their minutes are open to
the public. All are welcome to join.

Minutes
-------

The minutes of past meetings are available in `this Google Document <https://docs.google.com/document/d/1tGbTiYORHiSPgVMXawiweGJlBw5dOkVJLY-licoBmBU/edit?usp=sharing>`__.

Calendar
--------

This calendar shows all the developer meetings.

.. raw:: html

<iframe src="https://calendar.google.com/calendar/embed?src=pgbn14p6poja8a1cf2dv2jhrmg%40group.calendar.google.com" style="border: 0" width="800" height="600" frameborder="0" scrolling="no"></iframe>

You can subscribe to this calendar with the following links:

* `iCal <https://calendar.google.com/calendar/ical/pgbn14p6poja8a1cf2dv2jhrmg%40group.calendar.google.com/public/basic.ics>`__
* `Google calendar <https://calendar.google.com/calendar/embed?src=pgbn14p6poja8a1cf2dv2jhrmg%40group.calendar.google.com>`__

Additionally, we'll sometimes have one-off meetings on specific topics.
These will be published on the same calendar.

1 change: 1 addition & 0 deletions doc/source/getting_started/basics.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1950,6 +1950,7 @@ sparse :class:`SparseDtype` (none) :class:`arrays.
intervals :class:`IntervalDtype` :class:`Interval` :class:`arrays.IntervalArray` :ref:`advanced.intervalindex`
nullable integer :class:`Int64Dtype`, ... (none) :class:`arrays.IntegerArray` :ref:`integer_na`
Strings :class:`StringDtype` :class:`str` :class:`arrays.StringArray` :ref:`text`
Boolean (with NA) :class:`BooleanDtype` :class:`bool` :class:`arrays.BooleanArray` :ref:`api.arrays.bool`
=================== ========================= ================== ============================= =============================

Pandas has two ways to store strings.
Expand Down
2 changes: 1 addition & 1 deletion doc/source/getting_started/install.rst
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,7 @@ matplotlib 2.2.2 Visualization
openpyxl 2.4.8 Reading / writing for xlsx files
pandas-gbq 0.8.0 Google Big Query access
psycopg2 PostgreSQL engine for sqlalchemy
pyarrow 0.9.0 Parquet and feather reading / writing
pyarrow 0.12.0 Parquet and feather reading / writing
pymysql 0.7.11 MySQL engine for sqlalchemy
pyreadstat SPSS files (.sav) reading
pytables 3.4.2 HDF5 reading / writing
Expand Down
23 changes: 23 additions & 0 deletions doc/source/reference/arrays.rst
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ Nullable Integer :class:`Int64Dtype`, ... (none) :ref:`api.array
Categorical :class:`CategoricalDtype` (none) :ref:`api.arrays.categorical`
Sparse :class:`SparseDtype` (none) :ref:`api.arrays.sparse`
Strings :class:`StringDtype` :class:`str` :ref:`api.arrays.string`
Boolean (with NA) :class:`BooleanDtype` :class:`bool` :ref:`api.arrays.bool`
=================== ========================= ================== =============================

Pandas and third-party libraries can extend NumPy's type system (see :ref:`extending.extension-types`).
Expand Down Expand Up @@ -485,6 +486,28 @@ The ``Series.str`` accessor is available for ``Series`` backed by a :class:`arra
See :ref:`api.series.str` for more.


.. _api.arrays.bool:

Boolean data with missing values
--------------------------------

The boolean dtype (with the alias ``"boolean"``) provides support for storing
boolean data (True, False values) with missing values, which is not possible
with a bool :class:`numpy.ndarray`.

.. autosummary::
:toctree: api/
:template: autosummary/class_without_autosummary.rst

arrays.BooleanArray

.. autosummary::
:toctree: api/
:template: autosummary/class_without_autosummary.rst

BooleanDtype


.. Dtype attributes which are manually listed in their docstrings: including
.. it here to make sure a docstring page is built for them
Expand Down
1 change: 0 additions & 1 deletion doc/source/reference/indexing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,6 @@ MultiIndex components

MultiIndex.set_levels
MultiIndex.set_codes
MultiIndex.to_hierarchical
MultiIndex.to_flat_index
MultiIndex.to_frame
MultiIndex.is_lexsorted
Expand Down
1 change: 1 addition & 0 deletions doc/source/reference/style.rst
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ Style application
Styler.set_caption
Styler.set_properties
Styler.set_uuid
Styler.set_na_rep
Styler.clear
Styler.pipe

Expand Down
6 changes: 3 additions & 3 deletions doc/source/user_guide/scale.rst
Original file line number Diff line number Diff line change
Expand Up @@ -93,9 +93,9 @@ Use efficient datatypes
-----------------------

The default pandas data types are not the most memory efficient. This is
especially true for high-cardinality text data (columns with relatively few
unique values). By using more efficient data types you can store larger datasets
in memory.
especially true for text data columns with relatively few unique values (commonly
referred to as "low-cardinality" data). By using more efficient data types you
can store larger datasets in memory.

.. ipython:: python
Expand Down
60 changes: 60 additions & 0 deletions doc/source/user_guide/style.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@
"df = pd.DataFrame({'A': np.linspace(1, 10, 10)})\n",
"df = pd.concat([df, pd.DataFrame(np.random.randn(10, 4), columns=list('BCDE'))],\n",
" axis=1)\n",
"df.iloc[3, 3] = np.nan\n",
"df.iloc[0, 2] = np.nan"
]
},
Expand Down Expand Up @@ -402,6 +403,38 @@
"df.style.format({\"B\": lambda x: \"±{:.2f}\".format(abs(x))})"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"You can format the text displayed for missing values by `na_rep`."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.style.format(\"{:.2%}\", na_rep=\"-\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"These formatting techniques can be used in combination with styling."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.style.highlight_max().format(None, na_rep=\"-\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down Expand Up @@ -659,6 +692,7 @@
"- precision\n",
"- captions\n",
"- table-wide styles\n",
"- missing values representation\n",
"- hiding the index or columns\n",
"\n",
"Each of these can be specified in two ways:\n",
Expand Down Expand Up @@ -800,6 +834,32 @@
"We hope to collect some useful ones either in pandas, or preferable in a new package that [builds on top](#Extensibility) the tools here."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Missing values"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"You can control the default missing values representation for the entire table through `set_na_rep` method."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"(df.style\n",
" .set_na_rep(\"FAIL\")\n",
" .format(None, na_rep=\"PASS\", subset=[\"D\"])\n",
" .highlight_null(\"yellow\"))"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down
Loading

0 comments on commit 6aa8ced

Please sign in to comment.