From 700e83fac9edd3c22aaf1b3781012a28850fcc9f Mon Sep 17 00:00:00 2001 From: Nicolas Kruchten Date: Fri, 8 Jan 2021 10:12:29 -0500 Subject: [PATCH 01/12] wip --- .../python/plotly/plotly/express/__init__.py | 4 + .../plotly/plotly/express/_chart_types.py | 115 ++++++++++++++++++ .../python/plotly/plotly/express/_core.py | 38 ++++-- packages/python/plotly/plotly/express/_doc.py | 9 +- .../test_optional/test_px/test_facets.py | 17 +-- .../test_optional/test_px/test_marginals.py | 2 +- 6 files changed, 161 insertions(+), 24 deletions(-) diff --git a/packages/python/plotly/plotly/express/__init__.py b/packages/python/plotly/plotly/express/__init__.py index 8bc5da5391..efda9cbe71 100644 --- a/packages/python/plotly/plotly/express/__init__.py +++ b/packages/python/plotly/plotly/express/__init__.py @@ -34,6 +34,8 @@ box, strip, histogram, + ecdf, + kde, scatter_matrix, parallel_coordinates, parallel_categories, @@ -89,6 +91,8 @@ "box", "strip", "histogram", + "ecdf", + "kde", "choropleth", "choropleth_mapbox", "pie", diff --git a/packages/python/plotly/plotly/express/_chart_types.py b/packages/python/plotly/plotly/express/_chart_types.py index f335e78de3..78fadb987e 100644 --- a/packages/python/plotly/plotly/express/_chart_types.py +++ b/packages/python/plotly/plotly/express/_chart_types.py @@ -489,6 +489,121 @@ def histogram( ) +def ecdf( + data_frame=None, + x=None, + y=None, + color=None, + line_dash=None, + facet_row=None, + facet_col=None, + facet_col_wrap=0, + facet_row_spacing=None, + facet_col_spacing=None, + hover_name=None, + hover_data=None, + animation_frame=None, + animation_group=None, + category_orders=None, + labels=None, + color_discrete_sequence=None, + color_discrete_map=None, + line_dash_sequence=None, + line_dash_map=None, + marginal=None, + opacity=None, + orientation=None, + line_shape=None, + norm=None, # TODO use this + complementary=None, # TODO use this + log_x=False, + log_y=False, + range_x=None, + range_y=None, + title=None, + template=None, + width=None, + height=None, +): + """ + In a Empirical Cumulative Distribution Function (ECDF) plot, rows of `data_frame` + are sorted by the value `x` (or `y` if `orientation` is `'h'`) and their cumulative + count (or the cumulative sum of `y` if supplied and `orientation` is `h`) is drawn + as a line. + """ + return make_figure(args=locals(), constructor=go.Scatter) + + +ecdf.__doc__ = make_docstring( + ecdf, + append_dict=dict( + x=[ + "If `orientation` is `'h'`, the cumulative sum of this argument is plotted rather than the cumulative count." + ] + + _wide_mode_xy_append, + y=[ + "If `orientation` is `'v'`, the cumulative sum of this argument is plotted rather than the cumulative count." + ] + + _wide_mode_xy_append, + ), +) + + +def kde( + data_frame=None, + x=None, + y=None, + color=None, + line_dash=None, + facet_row=None, + facet_col=None, + facet_col_wrap=0, + facet_row_spacing=None, + facet_col_spacing=None, + hover_name=None, + hover_data=None, + animation_frame=None, + animation_group=None, + category_orders=None, + labels=None, + color_discrete_sequence=None, + color_discrete_map=None, + line_dash_sequence=None, + line_dash_map=None, + marginal=None, + opacity=None, + orientation=None, + norm=None, # TODO use this + kernel=None, # TODO use this + bw_method=None, # TODO use this + bw_adjust=None, # TODO use this + log_x=False, + log_y=False, + range_x=None, + range_y=None, + title=None, + template=None, + width=None, + height=None, +): + """ + In a Kernel Density Estimation (KDE) plot, rows of `data_frame` + are used as inputs to a KDE smoothing function which is rendered as a line. + """ + return make_figure(args=locals(), constructor=go.Scatter) + + +kde.__doc__ = make_docstring( + kde, + append_dict=dict( + x=["If `orientation` is `'h'`, this argument is used as KDE weights."] + + _wide_mode_xy_append, + y=["If `orientation` is `'v'`, this argument is used as KDE weights."] + + _wide_mode_xy_append, + ), +) + + def violin( data_frame=None, x=None, diff --git a/packages/python/plotly/plotly/express/_core.py b/packages/python/plotly/plotly/express/_core.py index cc0e98375b..3ec71138a8 100644 --- a/packages/python/plotly/plotly/express/_core.py +++ b/packages/python/plotly/plotly/express/_core.py @@ -1312,6 +1312,9 @@ def build_dataframe(args, constructor): wide_cross_name = None # will likely be "index" in wide_mode value_name = None # will likely be "value" in wide_mode hist2d_types = [go.Histogram2d, go.Histogram2dContour] + hist1d_orientation = ( + constructor == go.Histogram or "complementary" in args or "kernel" in args + ) if constructor in cartesians: if wide_x and wide_y: raise ValueError( @@ -1346,7 +1349,7 @@ def build_dataframe(args, constructor): df_provided and var_name in df_input ): var_name = "variable" - if constructor == go.Histogram: + if hist1d_orientation: wide_orientation = "v" if wide_x else "h" else: wide_orientation = "v" if wide_y else "h" @@ -1360,7 +1363,10 @@ def build_dataframe(args, constructor): var_name = _escape_col_name(df_input, var_name, []) missing_bar_dim = None - if constructor in [go.Scatter, go.Bar, go.Funnel] + hist2d_types: + if ( + constructor in [go.Scatter, go.Bar, go.Funnel] + hist2d_types + and not hist1d_orientation + ): if not wide_mode and (no_x != no_y): for ax in ["x", "y"]: if args.get(ax) is None: @@ -1457,14 +1463,22 @@ def build_dataframe(args, constructor): df_output[var_name] = df_output[var_name].astype(str) orient_v = wide_orientation == "v" - if constructor in [go.Scatter, go.Funnel] + hist2d_types: + if hist1d_orientation: + args["x" if orient_v else "y"] = value_name + if wide_cross_name is None and constructor == go.Scatter: + args["y" if orient_v else "x"] = count_name + df_output[count_name] = 1 + else: + args["y" if orient_v else "x"] = wide_cross_name + args["color"] = args["color"] or var_name + elif constructor in [go.Scatter, go.Funnel] + hist2d_types: args["x" if orient_v else "y"] = wide_cross_name args["y" if orient_v else "x"] = value_name if constructor != go.Histogram2d: args["color"] = args["color"] or var_name if "line_group" in args: args["line_group"] = args["line_group"] or var_name - if constructor == go.Bar: + elif constructor == go.Bar: if _is_continuous(df_output, value_name): args["x" if orient_v else "y"] = wide_cross_name args["y" if orient_v else "x"] = value_name @@ -1474,13 +1488,9 @@ def build_dataframe(args, constructor): args["y" if orient_v else "x"] = count_name df_output[count_name] = 1 args["color"] = args["color"] or var_name - if constructor in [go.Violin, go.Box]: + elif constructor in [go.Violin, go.Box]: args["x" if orient_v else "y"] = wide_cross_name or var_name args["y" if orient_v else "x"] = value_name - if constructor == go.Histogram: - args["x" if orient_v else "y"] = value_name - args["y" if orient_v else "x"] = wide_cross_name - args["color"] = args["color"] or var_name if no_color: args["color"] = None args["data_frame"] = df_output @@ -1984,11 +1994,11 @@ def make_figure(args, constructor, trace_patch=None, layout_patch=None): trace_spec != trace_specs[0] and ( trace_spec.constructor in [go.Violin, go.Box] - and m.variable in ["symbol", "pattern"] + and m.variable in ["symbol", "pattern", "dash"] ) or ( trace_spec.constructor in [go.Histogram] - and m.variable in ["symbol"] + and m.variable in ["symbol", "dash"] ) ): pass @@ -2047,6 +2057,12 @@ def make_figure(args, constructor, trace_patch=None, layout_patch=None): ): trace.update(marker=dict(color=trace.line.color)) + if "complementary" in args: # ECDF + base = args["x"] if args["orientation"] == "v" else args["y"] + var = args["x"] if args["orientation"] == "h" else args["y"] + group = group.sort_values(by=base) + group[var] = group[var].cumsum() + patch, fit_results = make_trace_kwargs( args, trace_spec, group, mapping_labels.copy(), sizeref ) diff --git a/packages/python/plotly/plotly/express/_doc.py b/packages/python/plotly/plotly/express/_doc.py index 65d9f0588f..e09ea92b72 100644 --- a/packages/python/plotly/plotly/express/_doc.py +++ b/packages/python/plotly/plotly/express/_doc.py @@ -573,10 +573,17 @@ "Sets the number of rendered sectors from any given `level`. Set `maxdepth` to -1 to render all the" "levels in the hierarchy.", ], + norm=["TODO"], + complementary=["TODO"], + kernel=["TODO"], + bw_method=["TODO"], + bw_adjust=["TODO"], ) -def make_docstring(fn, override_dict={}, append_dict={}): +def make_docstring(fn, override_dict=None, append_dict=None): + override_dict = {} if override_dict is None else override_dict + append_dict = {} if append_dict is None else append_dict tw = TextWrapper(width=75, initial_indent=" ", subsequent_indent=" ") result = (fn.__doc__ or "") + "\nParameters\n----------\n" for param in getfullargspec(fn)[0]: diff --git a/packages/python/plotly/plotly/tests/test_optional/test_px/test_facets.py b/packages/python/plotly/plotly/tests/test_optional/test_px/test_facets.py index 6598599fb9..c1db2afe77 100644 --- a/packages/python/plotly/plotly/tests/test_optional/test_px/test_facets.py +++ b/packages/python/plotly/plotly/tests/test_optional/test_px/test_facets.py @@ -1,4 +1,3 @@ -import plotly import pandas as pd import plotly.express as px from pytest import approx @@ -112,25 +111,21 @@ def bad_facet_spacing_df(): def test_bad_facet_spacing_eror(bad_facet_spacing_df): df = bad_facet_spacing_df with pytest.raises( - ValueError, match="Use the facet_row_spacing argument to adjust this spacing\." + ValueError, match="Use the facet_row_spacing argument to adjust this spacing." ): - fig = px.scatter( - df, x="x", y="y", facet_row="category", facet_row_spacing=0.01001 - ) + px.scatter(df, x="x", y="y", facet_row="category", facet_row_spacing=0.01001) with pytest.raises( - ValueError, match="Use the facet_col_spacing argument to adjust this spacing\." + ValueError, match="Use the facet_col_spacing argument to adjust this spacing." ): - fig = px.scatter( - df, x="x", y="y", facet_col="category", facet_col_spacing=0.01001 - ) + px.scatter(df, x="x", y="y", facet_col="category", facet_col_spacing=0.01001) # Check error is not raised when the spacing is OK try: - fig = px.scatter(df, x="x", y="y", facet_row="category", facet_row_spacing=0.01) + px.scatter(df, x="x", y="y", facet_row="category", facet_row_spacing=0.01) except ValueError: # Error shouldn't be raised, so fail if it is assert False try: - fig = px.scatter(df, x="x", y="y", facet_col="category", facet_col_spacing=0.01) + px.scatter(df, x="x", y="y", facet_col="category", facet_col_spacing=0.01) except ValueError: # Error shouldn't be raised, so fail if it is assert False diff --git a/packages/python/plotly/plotly/tests/test_optional/test_px/test_marginals.py b/packages/python/plotly/plotly/tests/test_optional/test_px/test_marginals.py index ecb7927d62..a9277986f0 100644 --- a/packages/python/plotly/plotly/tests/test_optional/test_px/test_marginals.py +++ b/packages/python/plotly/plotly/tests/test_optional/test_px/test_marginals.py @@ -14,7 +14,7 @@ def test_xy_marginals(px_fn, marginal_x, marginal_y): assert len(fig.data) == 1 + (marginal_x is not None) + (marginal_y is not None) -@pytest.mark.parametrize("px_fn", [px.histogram]) +@pytest.mark.parametrize("px_fn", [px.histogram, px.ecdf, px.kde]) @pytest.mark.parametrize("marginal", [None, "rug", "histogram", "box", "violin"]) @pytest.mark.parametrize("orientation", ["h", "v"]) def test_single_marginals(px_fn, marginal, orientation): From 538f18e5142ee82633acb7a64d9278122a9c737c Mon Sep 17 00:00:00 2001 From: Nicolas Kruchten Date: Sat, 9 Jan 2021 21:40:39 -0500 Subject: [PATCH 02/12] mostly finished ECDF --- .../plotly/plotly/express/_chart_types.py | 7 ++-- .../python/plotly/plotly/express/_core.py | 35 +++++++++++++++---- 2 files changed, 33 insertions(+), 9 deletions(-) diff --git a/packages/python/plotly/plotly/express/_chart_types.py b/packages/python/plotly/plotly/express/_chart_types.py index 78fadb987e..ae338c909b 100644 --- a/packages/python/plotly/plotly/express/_chart_types.py +++ b/packages/python/plotly/plotly/express/_chart_types.py @@ -494,6 +494,7 @@ def ecdf( x=None, y=None, color=None, + text=None, line_dash=None, facet_row=None, facet_col=None, @@ -513,9 +514,9 @@ def ecdf( marginal=None, opacity=None, orientation=None, - line_shape=None, - norm=None, # TODO use this - complementary=None, # TODO use this + line_shape="hv", + norm="probability", + complementary=False, log_x=False, log_y=False, range_x=None, diff --git a/packages/python/plotly/plotly/express/_core.py b/packages/python/plotly/plotly/express/_core.py index 3ec71138a8..a65dadd80a 100644 --- a/packages/python/plotly/plotly/express/_core.py +++ b/packages/python/plotly/plotly/express/_core.py @@ -1465,11 +1465,7 @@ def build_dataframe(args, constructor): if hist1d_orientation: args["x" if orient_v else "y"] = value_name - if wide_cross_name is None and constructor == go.Scatter: - args["y" if orient_v else "x"] = count_name - df_output[count_name] = 1 - else: - args["y" if orient_v else "x"] = wide_cross_name + args["y" if orient_v else "x"] = wide_cross_name args["color"] = args["color"] or var_name elif constructor in [go.Scatter, go.Funnel] + hist2d_types: args["x" if orient_v else "y"] = wide_cross_name @@ -1491,6 +1487,21 @@ def build_dataframe(args, constructor): elif constructor in [go.Violin, go.Box]: args["x" if orient_v else "y"] = wide_cross_name or var_name args["y" if orient_v else "x"] = value_name + + if hist1d_orientation and constructor == go.Scatter: + if args["x"] is not None and args["y"] is not None: + args["histfunc"] = "sum" + elif args["x"] is None: + args["histfunc"] = None + args["orientation"] = "h" + args["x"] = count_name + df_output[count_name] = 1 + else: + args["histfunc"] = None + args["orientation"] = "v" + args["y"] = count_name + df_output[count_name] = 1 + if no_color: args["color"] = None args["data_frame"] = df_output @@ -1788,7 +1799,9 @@ def infer_config(args, constructor, trace_patch, layout_patch): trace_patch["opacity"] = args["opacity"] else: trace_patch["marker"] = dict(opacity=args["opacity"]) - if "line_group" in args: # px.line, px.line_*, px.area + if ( + "line_group" in args or "line_dash" in args + ): # px.line, px.line_*, px.area, px.ecdf, px, kde modes = set(["lines"]) if args.get("text") or args.get("symbol") or args.get("markers"): modes.add("markers") @@ -2061,7 +2074,17 @@ def make_figure(args, constructor, trace_patch=None, layout_patch=None): base = args["x"] if args["orientation"] == "v" else args["y"] var = args["x"] if args["orientation"] == "h" else args["y"] group = group.sort_values(by=base) + group_sum = group[var].sum() group[var] = group[var].cumsum() + if args["complementary"]: + group[var] = group_sum - group[var] + + if args["norm"] == "probability": + group[var] = group[var] / group_sum + elif args["norm"] == "percent": + group[var] = 100.0 * group[var] / group_sum + args["histnorm"] = args["norm"] + # TODO norm, including histnorm-like naming patch, fit_results = make_trace_kwargs( args, trace_spec, group, mapping_labels.copy(), sizeref From 807b8283af1aee9f9c1aa9d5d68d84d7c81b9641 Mon Sep 17 00:00:00 2001 From: Nicolas Kruchten Date: Mon, 11 Jan 2021 10:02:16 -0500 Subject: [PATCH 03/12] wip --- packages/python/plotly/plotly/express/_core.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/packages/python/plotly/plotly/express/_core.py b/packages/python/plotly/plotly/express/_core.py index a65dadd80a..2239c20343 100644 --- a/packages/python/plotly/plotly/express/_core.py +++ b/packages/python/plotly/plotly/express/_core.py @@ -1856,6 +1856,14 @@ def infer_config(args, constructor, trace_patch, layout_patch): if "trendline_options" in args and args["trendline_options"] is None: args["trendline_options"] = dict() + if "norm" in args: + if args.get("norm", None) not in [None, "percent", "probability"]: + raise ValueError( + "`norm` must be one of None, 'percent' or 'probability'. " + + "'%s' was provided." % args["norm"] + ) + args["histnorm"] = args["norm"] + # Compute applicable grouping attributes for k in group_attrables: if k in args: @@ -2083,8 +2091,6 @@ def make_figure(args, constructor, trace_patch=None, layout_patch=None): group[var] = group[var] / group_sum elif args["norm"] == "percent": group[var] = 100.0 * group[var] / group_sum - args["histnorm"] = args["norm"] - # TODO norm, including histnorm-like naming patch, fit_results = make_trace_kwargs( args, trace_spec, group, mapping_labels.copy(), sizeref From 5f0ac8b95d2cf186a63ec902d959adfe0d26ef45 Mon Sep 17 00:00:00 2001 From: Nicolas Kruchten Date: Thu, 5 Aug 2021 21:19:23 -0400 Subject: [PATCH 04/12] finishing up ECDF --- .../plotly/plotly/express/_chart_types.py | 11 +++-- .../python/plotly/plotly/express/_core.py | 40 +++++++++++++------ packages/python/plotly/plotly/express/_doc.py | 29 +++++++++++--- 3 files changed, 58 insertions(+), 22 deletions(-) diff --git a/packages/python/plotly/plotly/express/_chart_types.py b/packages/python/plotly/plotly/express/_chart_types.py index ae338c909b..38792a15e6 100644 --- a/packages/python/plotly/plotly/express/_chart_types.py +++ b/packages/python/plotly/plotly/express/_chart_types.py @@ -496,6 +496,7 @@ def ecdf( color=None, text=None, line_dash=None, + symbol=None, facet_row=None, facet_col=None, facet_col_wrap=0, @@ -505,18 +506,22 @@ def ecdf( hover_data=None, animation_frame=None, animation_group=None, + markers=False, + lines=True, category_orders=None, labels=None, color_discrete_sequence=None, color_discrete_map=None, line_dash_sequence=None, line_dash_map=None, + symbol_sequence=None, + symbol_map=None, marginal=None, opacity=None, orientation=None, line_shape="hv", - norm="probability", - complementary=False, + ecdfnorm="probability", + ecdfmode="standard", log_x=False, log_y=False, range_x=None, @@ -574,7 +579,7 @@ def kde( marginal=None, opacity=None, orientation=None, - norm=None, # TODO use this + kdenorm=None, # TODO use this kernel=None, # TODO use this bw_method=None, # TODO use this bw_adjust=None, # TODO use this diff --git a/packages/python/plotly/plotly/express/_core.py b/packages/python/plotly/plotly/express/_core.py index 2239c20343..770fc5dcef 100644 --- a/packages/python/plotly/plotly/express/_core.py +++ b/packages/python/plotly/plotly/express/_core.py @@ -660,6 +660,12 @@ def configure_cartesian_axes(args, fig, orders): if "is_timeline" in args: fig.update_xaxes(type="date") + if "ecdfmode" in args: + if args["orientation"] == "v": + fig.update_yaxes(rangemode="tozero") + else: + fig.update_xaxes(rangemode="tozero") + def configure_ternary_axes(args, fig, orders): fig.update_ternaries( @@ -1313,7 +1319,7 @@ def build_dataframe(args, constructor): value_name = None # will likely be "value" in wide_mode hist2d_types = [go.Histogram2d, go.Histogram2dContour] hist1d_orientation = ( - constructor == go.Histogram or "complementary" in args or "kernel" in args + constructor == go.Histogram or "ecdfmode" in args or "kernel" in args ) if constructor in cartesians: if wide_x and wide_y: @@ -1802,11 +1808,15 @@ def infer_config(args, constructor, trace_patch, layout_patch): if ( "line_group" in args or "line_dash" in args ): # px.line, px.line_*, px.area, px.ecdf, px, kde - modes = set(["lines"]) + modes = set() + if args.get("lines", True): + modes.add("lines") if args.get("text") or args.get("symbol") or args.get("markers"): modes.add("markers") if args.get("text"): modes.add("text") + if len(modes) == 0: + modes.add("lines") trace_patch["mode"] = "+".join(modes) elif constructor != go.Splom and ( "symbol" in args or constructor == go.Scattermapbox @@ -1856,13 +1866,13 @@ def infer_config(args, constructor, trace_patch, layout_patch): if "trendline_options" in args and args["trendline_options"] is None: args["trendline_options"] = dict() - if "norm" in args: - if args.get("norm", None) not in [None, "percent", "probability"]: + if "ecdfnorm" in args: + if args.get("ecdfnorm", None) not in [None, "percent", "probability"]: raise ValueError( - "`norm` must be one of None, 'percent' or 'probability'. " - + "'%s' was provided." % args["norm"] + "`ecdfnorm` must be one of None, 'percent' or 'probability'. " + + "'%s' was provided." % args["ecdfnorm"] ) - args["histnorm"] = args["norm"] + args["histnorm"] = args["ecdfnorm"] # Compute applicable grouping attributes for k in group_attrables: @@ -2078,18 +2088,22 @@ def make_figure(args, constructor, trace_patch=None, layout_patch=None): ): trace.update(marker=dict(color=trace.line.color)) - if "complementary" in args: # ECDF + if "ecdfmode" in args: base = args["x"] if args["orientation"] == "v" else args["y"] var = args["x"] if args["orientation"] == "h" else args["y"] - group = group.sort_values(by=base) - group_sum = group[var].sum() + ascending = args.get("ecdfmode", "standard") != "reversed" + group = group.sort_values(by=base, ascending=ascending) + group_sum = group[var].sum() # compute here before next line mutates group[var] = group[var].cumsum() - if args["complementary"]: + if not ascending: + group = group.sort_values(by=base, ascending=True) + + if args.get("ecdfmode", "standard") == "complementary": group[var] = group_sum - group[var] - if args["norm"] == "probability": + if args["ecdfnorm"] == "probability": group[var] = group[var] / group_sum - elif args["norm"] == "percent": + elif args["ecdfnorm"] == "percent": group[var] = 100.0 * group[var] / group_sum patch, fit_results = make_trace_kwargs( diff --git a/packages/python/plotly/plotly/express/_doc.py b/packages/python/plotly/plotly/express/_doc.py index e09ea92b72..39b635562e 100644 --- a/packages/python/plotly/plotly/express/_doc.py +++ b/packages/python/plotly/plotly/express/_doc.py @@ -325,7 +325,11 @@ "Setting this value is recommended when using `plotly.express.colors.diverging` color scales as the inputs to `color_continuous_scale`.", ], size_max=["int (default `20`)", "Set the maximum mark size when using `size`."], - markers=["boolean (default `False`)", "If `True`, markers are shown on lines.",], + markers=["boolean (default `False`)", "If `True`, markers are shown on lines."], + lines=[ + "boolean (default `True`)", + "If `False`, lines are not drawn (forced to `True` if `markers` is `False`).", + ], log_x=[ "boolean (default `False`)", "If `True`, the x-axis is log-scaled in cartesian coordinates.", @@ -573,11 +577,24 @@ "Sets the number of rendered sectors from any given `level`. Set `maxdepth` to -1 to render all the" "levels in the hierarchy.", ], - norm=["TODO"], - complementary=["TODO"], - kernel=["TODO"], - bw_method=["TODO"], - bw_adjust=["TODO"], + ecdfnorm=[ + "string or `None` (default `'probability'`)", + "One of `'probability'` or `'percent'`", + "If `None`, values will be raw counts or sums.", + "If `'probability', values will be probabilities normalized from 0 to 1.", + "If `'percent', values will be percentages normalized from 0 to 100.", + ], + ecdfmode=[ + "string (default `'standard'`)", + "One of `'standard'`, `'complementary'` or `'reversed'`", + "If `'standard'`, the ECDF is plotted such that values represent data at or below the point.", + "If `'complementary'`, the CCDF is plotted such that values represent data above the point.", + "If `'reversed'`, a variant of the CCDF is plotted such that values represent data at or above the point.", + ], + kernel=["TODO"], # kde + kdenorm=["TODO"], # kde + bw_method=["TODO"], # kde + bw_adjust=["TODO"], # kde ) From dfe621603870e1e26ce3df4d458165f6fd18012c Mon Sep 17 00:00:00 2001 From: Nicolas Kruchten Date: Thu, 5 Aug 2021 22:27:53 -0400 Subject: [PATCH 05/12] primitive KDE implementation, matches though --- .../python/plotly/plotly/express/_chart_types.py | 3 --- packages/python/plotly/plotly/express/_core.py | 15 ++++++++++++++- packages/python/plotly/plotly/express/_doc.py | 9 +++++---- 3 files changed, 19 insertions(+), 8 deletions(-) diff --git a/packages/python/plotly/plotly/express/_chart_types.py b/packages/python/plotly/plotly/express/_chart_types.py index 38792a15e6..b40bfe1c9c 100644 --- a/packages/python/plotly/plotly/express/_chart_types.py +++ b/packages/python/plotly/plotly/express/_chart_types.py @@ -579,10 +579,7 @@ def kde( marginal=None, opacity=None, orientation=None, - kdenorm=None, # TODO use this - kernel=None, # TODO use this bw_method=None, # TODO use this - bw_adjust=None, # TODO use this log_x=False, log_y=False, range_x=None, diff --git a/packages/python/plotly/plotly/express/_core.py b/packages/python/plotly/plotly/express/_core.py index 770fc5dcef..c06e35256b 100644 --- a/packages/python/plotly/plotly/express/_core.py +++ b/packages/python/plotly/plotly/express/_core.py @@ -1319,7 +1319,7 @@ def build_dataframe(args, constructor): value_name = None # will likely be "value" in wide_mode hist2d_types = [go.Histogram2d, go.Histogram2dContour] hist1d_orientation = ( - constructor == go.Histogram or "ecdfmode" in args or "kernel" in args + constructor == go.Histogram or "ecdfmode" in args or "bw_method" in args ) if constructor in cartesians: if wide_x and wide_y: @@ -2106,6 +2106,19 @@ def make_figure(args, constructor, trace_patch=None, layout_patch=None): elif args["ecdfnorm"] == "percent": group[var] = 100.0 * group[var] / group_sum + if "bw_method" in args: + from scipy.stats import gaussian_kde + + base = args["x"] if args["orientation"] == "v" else args["y"] + var = args["x"] if args["orientation"] == "h" else args["y"] + bw = args.get("bw_method") + group = group.sort_values(by=base) + + kernel = gaussian_kde( + dataset=group[base], weights=group[var], bw_method=bw + ) + group[var] = kernel.evaluate(group[base]) + patch, fit_results = make_trace_kwargs( args, trace_spec, group, mapping_labels.copy(), sizeref ) diff --git a/packages/python/plotly/plotly/express/_doc.py b/packages/python/plotly/plotly/express/_doc.py index 39b635562e..a961096338 100644 --- a/packages/python/plotly/plotly/express/_doc.py +++ b/packages/python/plotly/plotly/express/_doc.py @@ -591,10 +591,11 @@ "If `'complementary'`, the CCDF is plotted such that values represent data above the point.", "If `'reversed'`, a variant of the CCDF is plotted such that values represent data at or above the point.", ], - kernel=["TODO"], # kde - kdenorm=["TODO"], # kde - bw_method=["TODO"], # kde - bw_adjust=["TODO"], # kde + bw_method=[ + "str, scalar or callable (default `'scott'`)", + "If str, must be one of `'scott'` or `'silverman'`.", + "Passed to `scipy.stats.gaussian_kde`.", + ], ) From 9df2302c43f451c784d028cf51cd5532ffeb59c9 Mon Sep 17 00:00:00 2001 From: Nicolas Kruchten Date: Fri, 6 Aug 2021 07:05:03 -0400 Subject: [PATCH 06/12] wip --- packages/python/plotly/plotly/express/_chart_types.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/packages/python/plotly/plotly/express/_chart_types.py b/packages/python/plotly/plotly/express/_chart_types.py index b40bfe1c9c..55492b16f3 100644 --- a/packages/python/plotly/plotly/express/_chart_types.py +++ b/packages/python/plotly/plotly/express/_chart_types.py @@ -522,6 +522,7 @@ def ecdf( line_shape="hv", ecdfnorm="probability", ecdfmode="standard", + render_mode="auto", log_x=False, log_y=False, range_x=None, @@ -579,7 +580,8 @@ def kde( marginal=None, opacity=None, orientation=None, - bw_method=None, # TODO use this + bw_method=None, + render_mode="auto", log_x=False, log_y=False, range_x=None, @@ -590,8 +592,8 @@ def kde( height=None, ): """ - In a Kernel Density Estimation (KDE) plot, rows of `data_frame` - are used as inputs to a KDE smoothing function which is rendered as a line. + In a Kernel Density Estimation (KDE) plot, rows of `data_frame` are used as inputs + to a KDE smoothing function and a line is drawn with one point pre row of input. """ return make_figure(args=locals(), constructor=go.Scatter) From 9fd2f6be63552e64a80df3c5fd9fc9403d8d0c75 Mon Sep 17 00:00:00 2001 From: Nicolas Kruchten Date: Fri, 6 Aug 2021 07:12:53 -0400 Subject: [PATCH 07/12] kde labelling --- packages/python/plotly/plotly/express/_core.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/packages/python/plotly/plotly/express/_core.py b/packages/python/plotly/plotly/express/_core.py index c06e35256b..cd94976cd9 100644 --- a/packages/python/plotly/plotly/express/_core.py +++ b/packages/python/plotly/plotly/express/_core.py @@ -1874,6 +1874,9 @@ def infer_config(args, constructor, trace_patch, layout_patch): ) args["histnorm"] = args["ecdfnorm"] + if "bw_method" in args: + args["histnorm"] = "density" + # Compute applicable grouping attributes for k in group_attrables: if k in args: From d4a733c9de3a5c36434e9165251ecd8783cac8b1 Mon Sep 17 00:00:00 2001 From: Nicolas Kruchten Date: Wed, 11 Aug 2021 08:55:20 -0400 Subject: [PATCH 08/12] work out line shape for ecdfmode --- packages/python/plotly/plotly/express/_chart_types.py | 1 - packages/python/plotly/plotly/express/_core.py | 4 ++++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/packages/python/plotly/plotly/express/_chart_types.py b/packages/python/plotly/plotly/express/_chart_types.py index 55492b16f3..490a00dafe 100644 --- a/packages/python/plotly/plotly/express/_chart_types.py +++ b/packages/python/plotly/plotly/express/_chart_types.py @@ -519,7 +519,6 @@ def ecdf( marginal=None, opacity=None, orientation=None, - line_shape="hv", ecdfnorm="probability", ecdfmode="standard", render_mode="auto", diff --git a/packages/python/plotly/plotly/express/_core.py b/packages/python/plotly/plotly/express/_core.py index cd94976cd9..0a20f6abc7 100644 --- a/packages/python/plotly/plotly/express/_core.py +++ b/packages/python/plotly/plotly/express/_core.py @@ -1825,6 +1825,10 @@ def infer_config(args, constructor, trace_patch, layout_patch): if "line_shape" in args: trace_patch["line"] = dict(shape=args["line_shape"]) + elif "ecdfmode" in args: + trace_patch["line"] = dict( + shape="vh" if args["ecdfmode"] == "reversed" else "hv" + ) if "geojson" in args: trace_patch["featureidkey"] = args["featureidkey"] From b3c6c5dbb01c5de0f471af88666271b2b3397680 Mon Sep 17 00:00:00 2001 From: Nicolas Kruchten Date: Thu, 12 Aug 2021 15:17:38 -0400 Subject: [PATCH 09/12] wip --- doc/python/ecdf-plots.md | 145 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 145 insertions(+) create mode 100644 doc/python/ecdf-plots.md diff --git a/doc/python/ecdf-plots.md b/doc/python/ecdf-plots.md new file mode 100644 index 0000000000..de9741cc5c --- /dev/null +++ b/doc/python/ecdf-plots.md @@ -0,0 +1,145 @@ +--- +jupyter: + jupytext: + notebook_metadata_filter: all + text_representation: + extension: .md + format_name: markdown + format_version: '1.2' + jupytext_version: 1.4.2 + kernelspec: + display_name: Python 3 + language: python + name: python3 + language_info: + codemirror_mode: + name: ipython + version: 3 + file_extension: .py + mimetype: text/x-python + name: python + nbconvert_exporter: python + pygments_lexer: ipython3 + version: 3.7.7 + plotly: + description: How to add empirical cumulative distribution function (ECDF) plots. + display_as: statistical + language: python + layout: base + name: Empirical Cumulative Distribution Plots + order: 16 + page_type: u-guide + permalink: python/ecdf-plots/ + thumbnail: thumbnail/figure-labels.png +--- + +### Overview + +[Empirical cumulative distribution function plots](https://en.wikipedia.org/wiki/Empirical_distribution_function) are a way to visualize the distribution of a variabl, and Plotly Express has a built-in function, `px.ecdf()` to generate such plots. [Plotly Express](/python/plotly-express/) is the easy-to-use, high-level interface to Plotly, which [operates on a variety of types of data](/python/px-arguments/) and produces [easy-to-style figures](/python/styling-plotly-express/). + +### Simple ECDF Plots + +Providing a single column to the `x` variable yields a basic ECDF plot. + +```python +import plotly.express as px +df = px.data.tips() +fig = px.ecdf(df, x="total_bill") +fig.show() +``` + +Providing multiple columns leverage's Plotly Express' [wide-form data support](https://plotly.com/python/wide-form/) to show multiple variables on the same plot. + +```python +import plotly.express as px +df = px.data.tips() +fig = px.ecdf(df, x=["total_bill", "tip"]) +fig.show() +``` + +It is also possible to map another variable to the color dimension of a plot. + +```python +import plotly.express as px +df = px.data.tips() +fig = px.ecdf(df, x="total_bill", color="sex") +fig.show() +``` + +### Configuring the Y axis + +By default, the Y axis shows probability, but it is also possible to show raw counts by setting the `ecdfnorm` argument to `None` or to show percentages by setting it to `percent`. + +```python +import plotly.express as px +df = px.data.tips() +fig = px.ecdf(df, x="total_bill", color="sex", ecdfnorm=None) +fig.show() +``` + +If a `y` value is provided, the Y axis is set to the sum of `y` rather than counts. + +```python +import plotly.express as px +df = px.data.tips() +fig = px.ecdf(df, x="total_bill", y="tip", color="sex", ecdfnorm=None) +fig.show() +``` + +### Reversed and Complementary CDF plots + +By default, the Y value represents the fraction of the data that is *at or below* the value on on the X axis. Setting `ecdfmode` to `"reversed"` reverses this, with the Y axis representing the fraction of the data *at or above* the X value. Setting `ecdfmode` to `"complementary"` plots `1-ECDF`, meaning that the Y values represent the fraction of the data *above* the X value. + +```python +import plotly.express as px +fig = px.ecdf(df, x=[1,2,3,4], markers=True, ecdfmode="standard", + title="ecdfmode='standard' (at or below X value, the default)") +fig.show() +``` + +```python +import plotly.express as px +fig = px.ecdf(df, x=[1,2,3,4], markers=True, ecdfmode="reversed", + title="ecdfmode='reversed' (at or above X value)") +fig.show() +``` + +```python +import plotly.express as px +fig = px.ecdf(df, x=[1,2,3,4], markers=True, ecdfmode="complementary", + title="ecdfmode='complementary' (above X value)") +fig.show() +``` + +### Orientation + +By default, plots are oriented vertically (i.e. the variable is on the X axis and counted/summed upwards), but this can be overridden with the `orientation` argument. + +```python +import plotly.express as px +df = px.data.tips() +fig = px.ecdf(df, x="total_bill", y="tip", color="sex", ecdfnorm=None, orientation="h") +fig.show() +``` + +### Markers and/or Lines + +ECDF Plots can be configured to show lines and/or markers. + +```python +import plotly.express as px +df = px.data.tips() +fig = px.ecdf(df, x="total_bill", color="sex", markers=True) +fig.show() +``` + +```python +import plotly.express as px +df = px.data.tips() +fig = px.ecdf(df, x="total_bill", color="sex", markers=True, lines=False) +fig.show() +``` + +```python + +``` From 542ed5aff78217b60b6d16ed6cc687d6d626b9bc Mon Sep 17 00:00:00 2001 From: Nicolas Kruchten Date: Thu, 12 Aug 2021 21:41:53 -0400 Subject: [PATCH 10/12] docs sweep --- doc/python/box-plots.md | 11 +++-- doc/python/ecdf-plots.md | 45 +++++++++++++++++-- doc/python/graph-objects.md | 24 +++++++++-- doc/python/histograms.md | 13 ++++-- doc/python/line-and-scatter.md | 56 ++++++++++++++++++++++++ doc/python/plotly-express.md | 79 ++++++++++++++++++++++++---------- doc/python/violin.md | 11 +++-- 7 files changed, 198 insertions(+), 41 deletions(-) diff --git a/doc/python/box-plots.md b/doc/python/box-plots.md index e113cb9738..bae88a224b 100644 --- a/doc/python/box-plots.md +++ b/doc/python/box-plots.md @@ -6,7 +6,7 @@ jupyter: extension: .md format_name: markdown format_version: '1.2' - jupytext_version: 1.6.0 + jupytext_version: 1.4.2 kernelspec: display_name: Python 3 language: python @@ -20,7 +20,7 @@ jupyter: name: python nbconvert_exporter: python pygments_lexer: ipython3 - version: 3.7.6 + version: 3.7.7 plotly: description: How to make Box Plots in Python with Plotly. display_as: statistical @@ -36,13 +36,18 @@ jupyter: thumbnail: thumbnail/box.jpg --- -A [box plot](https://en.wikipedia.org/wiki/Box_plot) is a statistical representation of numerical data through their quartiles. The ends of the box represent the lower and upper quartiles, while the median (second quartile) is marked by a line inside the box. For other statistical representations of numerical data, see [other statistical charts](https://plotly.com/python/statistical-charts/). + +A [box plot](https://en.wikipedia.org/wiki/Box_plot) is a statistical representation of the distribution of a variable through its quartiles. The ends of the box represent the lower and upper quartiles, while the median (second quartile) is marked by a line inside the box. For other statistical representations of numerical data, see [other statistical charts](https://plotly.com/python/statistical-charts/). + + +Alternatives to box plots for visualizing distributions include [histograms](https://plotly.com/python/histograms/), [violin plots](https://plotly.com/python/violin/), [ECDF plots](https://plotly.com/python/ecdf-plots/) and [strip charts](https://plotly.com/python/strip-charts/). ## Box Plot with `plotly.express` [Plotly Express](/python/plotly-express/) is the easy-to-use, high-level interface to Plotly, which [operates on a variety of types of data](/python/px-arguments/) and produces [easy-to-style figures](/python/styling-plotly-express/). In a box plot created by `px.box`, the distribution of the column given as `y` argument is represented. + ```python import plotly.express as px diff --git a/doc/python/ecdf-plots.md b/doc/python/ecdf-plots.md index de9741cc5c..acc9a532b4 100644 --- a/doc/python/ecdf-plots.md +++ b/doc/python/ecdf-plots.md @@ -35,7 +35,9 @@ jupyter: ### Overview -[Empirical cumulative distribution function plots](https://en.wikipedia.org/wiki/Empirical_distribution_function) are a way to visualize the distribution of a variabl, and Plotly Express has a built-in function, `px.ecdf()` to generate such plots. [Plotly Express](/python/plotly-express/) is the easy-to-use, high-level interface to Plotly, which [operates on a variety of types of data](/python/px-arguments/) and produces [easy-to-style figures](/python/styling-plotly-express/). +[Empirical cumulative distribution function plots](https://en.wikipedia.org/wiki/Empirical_distribution_function) are a way to visualize the distribution of a variable, and Plotly Express has a built-in function, `px.ecdf()` to generate such plots. [Plotly Express](/python/plotly-express/) is the easy-to-use, high-level interface to Plotly, which [operates on a variety of types of data](/python/px-arguments/) and produces [easy-to-style figures](/python/styling-plotly-express/). + +Alternatives to ECDF plots for visualizing distributions include [histograms](https://plotly.com/python/histograms/), [violin plots](https://plotly.com/python/violin/), [box plots](https://plotly.com/python/box-plots/) and [strip charts](https://plotly.com/python/strip-charts/). ### Simple ECDF Plots @@ -90,24 +92,30 @@ fig.show() By default, the Y value represents the fraction of the data that is *at or below* the value on on the X axis. Setting `ecdfmode` to `"reversed"` reverses this, with the Y axis representing the fraction of the data *at or above* the X value. Setting `ecdfmode` to `"complementary"` plots `1-ECDF`, meaning that the Y values represent the fraction of the data *above* the X value. +In `standard` mode (the default), the right-most point is at 1 (or the total count/sum, depending on `ecdfnorm`) and the right-most point is above 0. + ```python import plotly.express as px fig = px.ecdf(df, x=[1,2,3,4], markers=True, ecdfmode="standard", - title="ecdfmode='standard' (at or below X value, the default)") + title="ecdfmode='standard' (Y=fraction at or below X value, this the default)") fig.show() ``` +In `reversed` mode, the right-most point is at 1 (or the total count/sum, depending on `ecdfnorm`) and the left-most point is above 0. + ```python import plotly.express as px fig = px.ecdf(df, x=[1,2,3,4], markers=True, ecdfmode="reversed", - title="ecdfmode='reversed' (at or above X value)") + title="ecdfmode='reversed' (Y=fraction at or above X value)") fig.show() ``` +In `complementary` mode, the right-most point is at 0 and no points are at 1 (or the total count/sum) per the definition of the CCDF as 1-ECDF, which has no point at 0. + ```python import plotly.express as px fig = px.ecdf(df, x=[1,2,3,4], markers=True, ecdfmode="complementary", - title="ecdfmode='complementary' (above X value)") + title="ecdfmode='complementary' (Y=fraction above X value)") fig.show() ``` @@ -140,6 +148,35 @@ fig = px.ecdf(df, x="total_bill", color="sex", markers=True, lines=False) fig.show() ``` +### Marginal Plots + +ECDF plots also support [marginal plots](https://plotly.com/python/marginal-plots/) + +```python +import plotly.express as px +df = px.data.tips() +fig = px.ecdf(df, x="total_bill", color="sex", markers=True, lines=False, marginal="histogram") +fig.show() +``` + +```python +import plotly.express as px +df = px.data.tips() +fig = px.ecdf(df, x="total_bill", color="sex", marginal="rug") +fig.show() +``` + +### Facets + +ECDF Plots also support [faceting](https://plotly.com/python/facet-plots/) + +```python +import plotly.express as px +df = px.data.tips() +fig = px.ecdf(df, x="total_bill", color="sex", facet_row="time", facet_col="day") +fig.show() +``` + ```python ``` diff --git a/doc/python/graph-objects.md b/doc/python/graph-objects.md index b2af9eba6b..936ea6d696 100644 --- a/doc/python/graph-objects.md +++ b/doc/python/graph-objects.md @@ -56,13 +56,19 @@ Graph objects have several benefits compared to plain Python dictionaries: 5. Graph object constructors and update methods accept "magic underscores" (e.g. `go.Figure(layout_title_text="The Title")` rather than `dict(layout=dict(title=dict(text="The Title")))`) for more compact code. 6. Graph objects support attached rendering (`.show()`) and exporting functions (`.write_image()`) that automatically invoke the appropriate functions from [the `plotly.io` module](https://plotly.com/python-api-reference/plotly.io.html). -### When to use Graph Objects Directly +### When to use Graph Objects vs Plotly Express -The recommended way to create figures is using the [functions in the plotly.express module](https://plotly.com/python-api-reference/), [collectively known as Plotly Express](/python/plotly-express/), which all return instances of `plotly.graph_objects.Figure`, so every figure produced with the plotly library, actually uses graph objects under the hood, unless manually constructed out of dictionaries. +The recommended way to create figures is using the [functions in the plotly.express module](https://plotly.com/python-api-reference/), [collectively known as Plotly Express](/python/plotly-express/), which all return instances of `plotly.graph_objects.Figure`, so every figure produced with the `plotly` library actually uses graph objects under the hood, unless manually constructed out of dictionaries. That said, certain kinds of figures are not yet possible to create with Plotly Express, such as figures that use certain 3D trace-types like [`mesh`](/python/3d-mesh/) or [`isosurface`](/python/3d-isosurface-plots/). In addition, certain figures are cumbersome to create by starting from a figure created with Plotly Express, for example figures with [subplots of different types](/python/mixed-subplots/), [dual-axis plots](/python/multiple-axes/), or [faceted plots](/python/facet-plots/) with multiple different types of traces. To construct such figures, it can be easier to start from an empty `plotly.graph_objects.Figure` object (or one configured with subplots via the [make_subplots() function](/python/subplots/)) and progressively add traces and update attributes as above. Every `plotly` documentation page lists the Plotly Express option at the top if a Plotly Express function exists to make the kind of chart in question, and then the graph objects version below. -Note that the figures produced by Plotly Express **in a single function-call** are [easy to customize at creation-time](/python/styling-plotly-express/), and to [manipulate after creation](/python/creating-and-updating-figures/) using the `update_*` and `add_*` methods. The figures produced by Plotly Express can always be built from the ground up using graph objects, but this approach typically takes **5-100 lines of code rather than 1**. Here is a simple example of how to produce the same figure object from the same data, once with Plotly Express and once without. The data in this example is in "long form" but [Plotly Express also accepts data in "wide form"](/python/wide-form/) and the line-count savings from Plotly Express over graph objects are comparable. More complex figures such as [sunbursts](/python/sunburst-charts/), [parallel coordinates](/python/parallel-coordinates-plot/), [facet plots](/python/facet-plots/) or [animations](/python/animations/) require many more lines of figure-specific graph objects code, whereas switching from one representation to another with Plotly Express usually involves changing just a few characters. +Note that the figures produced by Plotly Express **in a single function-call** are [easy to customize at creation-time](/python/styling-plotly-express/), and to [manipulate after creation](/python/creating-and-updating-figures/) using the `update_*` and `add_*` methods. + +### Comparing Graph Objects and Plotly Express + +The figures produced by Plotly Express can always be built from the ground up using graph objects, but this approach typically takes **5-100 lines of code rather than 1**. + +Here is a simple example of how to produce the same figure object from the same data, once with Plotly Express and once without. The data in this example is in "long form" but [Plotly Express also accepts data in "wide form"](/python/wide-form/) and the line-count savings from Plotly Express over graph objects are comparable. More complex figures such as [sunbursts](/python/sunburst-charts/), [parallel coordinates](/python/parallel-coordinates-plot/), [facet plots](/python/facet-plots/) or [animations](/python/animations/) require many more lines of figure-specific graph objects code, whereas switching from one representation to another with Plotly Express usually involves changing just a few characters. ```python import pandas as pd @@ -73,11 +79,17 @@ df = pd.DataFrame({ "Number Eaten": [2, 1, 3, 1, 3, 2], }) + +# Plotly Express + import plotly.express as px fig = px.bar(df, x="Fruit", y="Number Eaten", color="Contestant", barmode="group") fig.show() + +# Graph Objects + import plotly.graph_objects as go fig = go.Figure() @@ -88,4 +100,8 @@ fig.update_layout(legend_title_text = "Contestant") fig.update_xaxes(title_text="Fruit") fig.update_yaxes(title_text="Number Eaten") fig.show() -``` \ No newline at end of file +``` + +```python + +``` diff --git a/doc/python/histograms.md b/doc/python/histograms.md index 214e61d45b..6c49c64531 100644 --- a/doc/python/histograms.md +++ b/doc/python/histograms.md @@ -36,14 +36,19 @@ jupyter: thumbnail: thumbnail/histogram.jpg --- -In statistics, a [histogram](https://en.wikipedia.org/wiki/Histogram) is representation of the distribution of numerical data, where the data are binned and the count for each bin is represented. More generally, in plotly a histogram is an aggregated bar chart, with several possible aggregation functions (e.g. sum, average, count...). + +In statistics, a [histogram](https://en.wikipedia.org/wiki/Histogram) is representation of the distribution of numerical data, where the data are binned and the count for each bin is represented. More generally, in Plotly a histogram is an aggregated bar chart, with several possible aggregation functions (e.g. sum, average, count...) which can be used to visualize data on categorical and date axes as well as linear axes. -If you're looking instead for bar charts, i.e. representing *raw, unaggregated* data with rectangular + +Alternatives to violin plots for visualizing distributions include [violin plots](https://plotly.com/python/violin/), [box plots](https://plotly.com/python/box-plots/), [ECDF plots](https://plotly.com/python/ecdf-plots/) and [strip charts](https://plotly.com/python/strip-charts/). + +> If you're looking instead for bar charts, i.e. representing *raw, unaggregated* data with rectangular bar, go to the [Bar Chart tutorial](/python/bar-charts/). ## Histograms with Plotly Express [Plotly Express](/python/plotly-express/) is the easy-to-use, high-level interface to Plotly, which [operates on a variety of types of data](/python/px-arguments/) and produces [easy-to-style figures](/python/styling-plotly-express/). + ```python import plotly.express as px @@ -160,7 +165,7 @@ fig = px.histogram(df, x="total_bill", color="sex") fig.show() ``` -#### Using histfunc +#### Aggregating with other functions than `count` For each bin of `x`, one can compute a function of data using `histfunc`. The argument of `histfunc` is the dataframe column given as the `y` argument. Below the plot shows that the average tip increases with the total bill. @@ -193,7 +198,7 @@ fig.show() #### Visualizing the distribution -With the `marginal` keyword, a subplot is drawn alongside the histogram, visualizing the distribution. See [the distplot page](https://plotly.com/python/distplot/)for more examples of combined statistical representations. +With the `marginal` keyword, a [marginal](https://plotly.com/python/marginal-plots/) is drawn alongside the histogram, visualizing the distribution. See [the distplot page](https://plotly.com/python/distplot/) for more examples of combined statistical representations. ```python import plotly.express as px diff --git a/doc/python/line-and-scatter.md b/doc/python/line-and-scatter.md index b99afa56d5..cbdde08de6 100644 --- a/doc/python/line-and-scatter.md +++ b/doc/python/line-and-scatter.md @@ -67,6 +67,15 @@ fig = px.scatter(df, x="sepal_width", y="sepal_length", color="species", fig.show() ``` +Color can be [continuous](https://plotly.com/python/colorscales/) as follows, or [discrete/categorical](https://plotly.com/python/discrete-color/) as above. + +```python +import plotly.express as px +df = px.data.iris() +fig = px.scatter(df, x="sepal_width", y="sepal_length", color='petal_length') +fig.show() +``` + The `symbol` argument can be mapped to a column as well. A [wide variety of symbols](https://plotly.com/python/marker-style/) are available. ```python @@ -104,6 +113,53 @@ fig.update_traces(marker_size=10) fig.show() ``` +### Error Bars + +Scatter plots support [error bars](https://plotly.com/python/error-bars/). + +```python +import plotly.express as px +df = px.data.iris() +df["e"] = df["sepal_width"]/100 +fig = px.scatter(df, x="sepal_width", y="sepal_length", color="species", + error_x="e", error_y="e") +fig.show() +``` + +### Marginal Distribution Plots + +Scatter plots support [marginal distribution plots](https://plotly.com/python/marginal-plots/) + +```python +import plotly.express as px +df = px.data.iris() +fig = px.scatter(df, x="sepal_length", y="sepal_width", marginal_x="histogram", marginal_y="rug") +fig.show() +``` + +### Facetting + +Scatter plots support [faceting](https://plotly.com/python/facet-plots/). + +```python +import plotly.express as px +df = px.data.tips() +fig = px.scatter(df, x="total_bill", y="tip", color="smoker", facet_col="sex", facet_row="time") +fig.show() +``` + +### Linear Regression and Other Trendlines + +Scatter plots support [linear and non-linear trendlines](https://plotly.com/python/linear-fits/). + +```python +import plotly.express as px + +df = px.data.tips() +fig = px.scatter(df, x="total_bill", y="tip", trendline="ols") +fig.show() +``` + ## Line plots with Plotly Express ```python diff --git a/doc/python/plotly-express.md b/doc/python/plotly-express.md index 2238c5ead0..269df0e093 100644 --- a/doc/python/plotly-express.md +++ b/doc/python/plotly-express.md @@ -43,10 +43,10 @@ Plotly Express provides [more than 30 functions for creating different types of Plotly Express currently includes the following functions: * **Basics**: [`scatter`](/python/line-and-scatter/), [`line`](/python/line-charts/), [`area`](/python/filled-area-plots/), [`bar`](/python/bar-charts/), [`funnel`](/python/funnel-charts/), [`timeline`](https://plotly.com/python/gantt/) -* **Part-of-Whole**: [`pie`](/python/pie-charts/), [`sunburst`](/python/sunburst-charts/), [`treemap`](/python/treemaps/), [`funnel_area`](/python/funnel-charts/) -* **1D Distributions**: [`histogram`](/python/histograms/), [`box`](/python/box-plots/), [`violin`](/python/violin/), [`strip`](/python/strip-charts/) +* **Part-of-Whole**: [`pie`](/python/pie-charts/), [`sunburst`](/python/sunburst-charts/), [`treemap`](/python/treemaps/), [`icicle`](/python/icicle-charts/), [`funnel_area`](/python/funnel-charts/) +* **1D Distributions**: [`histogram`](/python/histograms/), [`box`](/python/box-plots/), [`violin`](/python/violin/), [`strip`](/python/strip-charts/), [`ecdf`](/python/ecdf-charts/) * **2D Distributions**: [`density_heatmap`](/python/2D-Histogram/), [`density_contour`](/python/2d-histogram-contour/) -* **Matrix Input**: [`imshow`](/python/imshow/) +* **Matrix or Image Input**: [`imshow`](/python/imshow/) * **3-Dimensional**: [`scatter_3d`](/python/3d-scatter-plots/), [`line_3d`](/python/3d-line-plots/) * **Multidimensional**: [`scatter_matrix`](/python/splom/), [`parallel_coordinates`](/python/parallel-coordinates-plot/), [`parallel_categories`](/python/parallel-categories-diagram/) * **Tile Maps**: [`scatter_mapbox`](/python/scattermapbox/), [`line_mapbox`](/python/lines-on-mapbox/), [`choropleth_mapbox`](/python/mapbox-county-choropleth/), [`density_mapbox`](/python/mapbox-density-heatmaps/) @@ -61,8 +61,8 @@ The Plotly Express API in general offers the following features: * **A single entry point into `plotly`**: just `import plotly.express as px` and get access to [all the plotting functions](https://plotly.com/python-api-reference/plotly.express.html), plus [built-in demo datasets under `px.data`](https://plotly.com/python-api-reference/generated/plotly.data.html#module-plotly.data) and [built-in color scales and sequences under `px.color`](https://plotly.com/python-api-reference/generated/plotly.colors.html#module-plotly.colors). Every PX function returns a `plotly.graph_objects.Figure` object, so you can edit it using all the same methods like [`update_layout` and `add_trace`](https://plotly.com/python/creating-and-updating-figures/#updating-figures). * **Sensible, Overridable Defaults**: PX functions will infer sensible defaults wherever possible, and will always let you override them. * **Flexible Input Formats**: PX functions [accept input in a variety of formats](/python/px-arguments/), from `list`s and `dict`s to [long-form or wide-form Pandas `DataFrame`s](/python/wide-form/) to [`numpy` arrays and `xarrays`](/python/imshow/) to [GeoPandas `GeoDataFrames`](/python/maps/). -* **Automatic Trace and Layout configuration**: PX functions will create one [trace](/python/figure-structure) per animation frame for each unique combination of data values mapped to discrete color, symbol, line-dash, facet-row and/or facet-column. Traces' `legendgroup` and `showlegend` attributed are set such that only one legend item appears per unique combination of discrete color, symbol and/or line-dash. Traces are automatically linked to a correctly-configured [subplot of the appropriate type](/python/figure-structure). -* **Automatic Figure Labelling**: PX functions label axes, legends and colorbars based in the input `DataFrame` or `xarray`, and provide [extra control with the `labels` argument](/python/styling-plotly-express/). +* **Automatic Trace and Layout configuration**: PX functions will create one [trace](/python/figure-structure) per animation frame for each unique combination of data values mapped to discrete color, symbol, line-dash, facet-row and/or facet-column. Traces' [`legendgroup` and `showlegend` attributes](https://plotly.com/python/legend/) are set such that only one legend item appears per unique combination of discrete color, symbol and/or line-dash. Traces are automatically linked to a correctly-configured [subplot of the appropriate type](/python/figure-structure). +* **Automatic Figure Labelling**: PX functions [label axes, legends and colorbars](https://plotly.com/python/figure-labels/) based in the input `DataFrame` or `xarray`, and provide [extra control with the `labels` argument](/python/styling-plotly-express/). * **Automatic Hover Labels**: PX functions populate the hover-label using the labels mentioned above, and provide [extra control with the `hover_name` and `hover_data` arguments](/python/hover-text-and-formatting/). * **Styling Control**: PX functions [read styling information from the default figure template](/python/styling-plotly-express/), and support commonly-needed [cosmetic controls like `category_orders` and `color_discrete_map`](/python/styling-plotly-express/) to precisely control categorical variables. * **Uniform Color Handling**: PX functions automatically switch between [continuous](/python/colorscales/) and [categorical color](/python/discrete-color/) based on the input type. @@ -71,6 +71,7 @@ The Plotly Express API in general offers the following features: * **A Pandas backend**: the 2D-cartesian plotting functions are available as [a Pandas plotting backend](/python/pandas-backend/) so you can call them via `df.plot()`. * **Trendlines**: `px.scatter` supports [built-in trendlines with accessible model output](/python/linear-fits/). * **Animations**: many PX functions support [simple animation support via the `animation_frame` and `animation_group` arguments](/python/animations/). +* **Automatic WebGL switching**: for sufficiently large scatter plots, PX will automatically [use WebGL for hardware-accelerated rendering](https://plotly.com/python/webgl-vs-svg/). ### Plotly Express in Dash @@ -101,7 +102,7 @@ fig = px.scatter(df, x="sepal_width", y="sepal_length", color="species") fig.show() ``` -**Read more about [trendlines](/python/linear-fits/) and [templates](/python/templates/).** +**Read more about [trendlines](/python/linear-fits/) and [templates](/python/templates/) and [marginal distribution plots](https://plotly.com/python/marginal-plots/).** ```python import plotly.express as px @@ -218,6 +219,33 @@ fig = px.area(df, x="year", y="pop", color="continent", line_group="country") fig.show() ``` +**Read more about [timeline/Gantt charts](/python/gantt/).** + +```python +import plotly.express as px +import pandas as pd + +df = pd.DataFrame([ + dict(Task="Job A", Start='2009-01-01', Finish='2009-02-28', Resource="Alex"), + dict(Task="Job B", Start='2009-03-05', Finish='2009-04-15', Resource="Alex"), + dict(Task="Job C", Start='2009-02-20', Finish='2009-05-30', Resource="Max") +]) + +fig = px.timeline(df, x_start="Start", x_end="Finish", y="Resource", color="Resource") +fig.show() +``` + +**Read more about [funnel charts](/python/funnel-charts/).** + +```python +import plotly.express as px +data = dict( + number=[39, 27.4, 20.6, 11, 2], + stage=["Website visit", "Downloads", "Potential customers", "Requested price", "Invoice sent"]) +fig = px.funnel(data, x='number', y='stage') +fig.show() +``` + ### Part to Whole Charts **Read more about [pie charts](/python/pie-charts/).** @@ -252,7 +280,7 @@ fig = px.treemap(df, path=[px.Constant('world'), 'continent', 'country'], values fig.show() ``` -**Read more about [treemaps](/python/icicle-charts/).** +**Read more about [icicle charts](/python/icicle-charts/).** ```python import plotly.express as px @@ -292,26 +320,30 @@ fig = px.violin(df, y="tip", x="smoker", color="sex", box=True, points="all", ho fig.show() ``` +**Read more about [Empirical Cumulative Distribution Function (ECDF) charts](https://plotly.com/python/ecdf-charts/).** + ```python import plotly.express as px df = px.data.tips() -fig = px.strip(df, x="total_bill", y="time", orientation="h", color="smoker") +fig = px.ecdf(df, x="total_bill", color="sex") fig.show() ``` -**Read more about [density contours, also known as 2D histogram contours](/python/2d-histogram-contour/).** +**Read more about [strip charts](https://plotly.com/python/strip-charts/).** ```python import plotly.express as px -df = px.data.iris() -fig = px.density_contour(df, x="sepal_width", y="sepal_length") +df = px.data.tips() +fig = px.strip(df, x="total_bill", y="time", orientation="h", color="smoker") fig.show() ``` +**Read more about [density contours, also known as 2D histogram contours](/python/2d-histogram-contour/).** + ```python import plotly.express as px df = px.data.iris() -fig = px.density_contour(df, x="sepal_width", y="sepal_length", color="species", marginal_x="rug", marginal_y="histogram") +fig = px.density_contour(df, x="sepal_width", y="sepal_length") fig.show() ``` @@ -330,24 +362,25 @@ fig.show() ```python import plotly.express as px - -fig = px.imshow([[1, 20, 30], - [20, 1, 60], - [30, 60, 1]]) +data=[[1, 25, 30, 50, 1], [20, 1, 60, 80, 30], [30, 60, 1, 5, 20]] +fig = px.imshow(data, + labels=dict(x="Day of Week", y="Time of Day", color="Productivity"), + x=['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday'], + y=['Morning', 'Afternoon', 'Evening'] + ) +fig.update_xaxes(side="top") fig.show() ``` ```python import plotly.express as px -import numpy as np -img_rgb = np.array([[[255, 0, 0], [0, 255, 0], [0, 0, 255]], - [[0, 255, 0], [0, 0, 255], [255, 0, 0]] - ], dtype=np.uint8) -fig = px.imshow(img_rgb) +from skimage import io +img = io.imread('https://upload.wikimedia.org/wikipedia/commons/thumb/0/00/Crab_Nebula.jpg/240px-Crab_Nebula.jpg') +fig = px.imshow(img) fig.show() ``` -#### Maps +#### Tile Maps **Read more about [tile maps](/python/mapbox-layers/) and [point on tile maps](/python/scattermapbox/).** @@ -375,6 +408,8 @@ fig = px.choropleth_mapbox(df, geojson=geojson, color="Bergeron", fig.show() ``` +### Outline Maps + **Read more about [outline symbol maps](/python/scatter-plots-on-maps/).** ```python diff --git a/doc/python/violin.md b/doc/python/violin.md index bcdd44f1fe..8e9d2a4e4a 100644 --- a/doc/python/violin.md +++ b/doc/python/violin.md @@ -5,8 +5,8 @@ jupyter: text_representation: extension: .md format_name: markdown - format_version: '1.1' - jupytext_version: 1.1.1 + format_version: '1.2' + jupytext_version: 1.4.2 kernelspec: display_name: Python 3 language: python @@ -20,7 +20,7 @@ jupyter: name: python nbconvert_exporter: python pygments_lexer: ipython3 - version: 3.6.8 + version: 3.7.7 plotly: description: How to make violin plots in Python with Plotly. display_as: statistical @@ -36,15 +36,18 @@ jupyter: thumbnail: thumbnail/violin.jpg --- + ## Violin Plot with Plotly Express A [violin plot](https://en.wikipedia.org/wiki/Violin_plot) is a statistical representation of numerical data. It is similar to a [box plot](https://plotly.com/python/box-plots/), with the addition of a rotated [kernel density](https://en.wikipedia.org/wiki/Kernel_density_estimation) plot on each side. -See also the [list of other statistical charts](https://plotly.com/python/statistical-charts/). +Alternatives to violin plots for visualizing distributions include [histograms](https://plotly.com/python/histograms/), [box plots](https://plotly.com/python/box-plots/), [ECDF plots](https://plotly.com/python/ecdf-plots/) and [strip charts](https://plotly.com/python/strip-charts/). + ### Basic Violin Plot with Plotly Express [Plotly Express](/python/plotly-express/) is the easy-to-use, high-level interface to Plotly, which [operates on a variety of types of data](/python/px-arguments/) and produces [easy-to-style figures](/python/styling-plotly-express/). + ```python import plotly.express as px From 33381b1cf01c283cae73a6c61d23e7eca9ae3728 Mon Sep 17 00:00:00 2001 From: Nicolas Kruchten Date: Thu, 12 Aug 2021 21:52:33 -0400 Subject: [PATCH 11/12] back out KDE --- .../python/plotly/plotly/express/__init__.py | 2 - .../plotly/plotly/express/_chart_types.py | 53 ------------------- .../python/plotly/plotly/express/_core.py | 22 +------- packages/python/plotly/plotly/express/_doc.py | 5 -- .../test_optional/test_px/test_marginals.py | 2 +- 5 files changed, 3 insertions(+), 81 deletions(-) diff --git a/packages/python/plotly/plotly/express/__init__.py b/packages/python/plotly/plotly/express/__init__.py index efda9cbe71..c8fab13b5f 100644 --- a/packages/python/plotly/plotly/express/__init__.py +++ b/packages/python/plotly/plotly/express/__init__.py @@ -35,7 +35,6 @@ strip, histogram, ecdf, - kde, scatter_matrix, parallel_coordinates, parallel_categories, @@ -92,7 +91,6 @@ "strip", "histogram", "ecdf", - "kde", "choropleth", "choropleth_mapbox", "pie", diff --git a/packages/python/plotly/plotly/express/_chart_types.py b/packages/python/plotly/plotly/express/_chart_types.py index 490a00dafe..3e1f97fc05 100644 --- a/packages/python/plotly/plotly/express/_chart_types.py +++ b/packages/python/plotly/plotly/express/_chart_types.py @@ -555,59 +555,6 @@ def ecdf( ) -def kde( - data_frame=None, - x=None, - y=None, - color=None, - line_dash=None, - facet_row=None, - facet_col=None, - facet_col_wrap=0, - facet_row_spacing=None, - facet_col_spacing=None, - hover_name=None, - hover_data=None, - animation_frame=None, - animation_group=None, - category_orders=None, - labels=None, - color_discrete_sequence=None, - color_discrete_map=None, - line_dash_sequence=None, - line_dash_map=None, - marginal=None, - opacity=None, - orientation=None, - bw_method=None, - render_mode="auto", - log_x=False, - log_y=False, - range_x=None, - range_y=None, - title=None, - template=None, - width=None, - height=None, -): - """ - In a Kernel Density Estimation (KDE) plot, rows of `data_frame` are used as inputs - to a KDE smoothing function and a line is drawn with one point pre row of input. - """ - return make_figure(args=locals(), constructor=go.Scatter) - - -kde.__doc__ = make_docstring( - kde, - append_dict=dict( - x=["If `orientation` is `'h'`, this argument is used as KDE weights."] - + _wide_mode_xy_append, - y=["If `orientation` is `'v'`, this argument is used as KDE weights."] - + _wide_mode_xy_append, - ), -) - - def violin( data_frame=None, x=None, diff --git a/packages/python/plotly/plotly/express/_core.py b/packages/python/plotly/plotly/express/_core.py index 0a20f6abc7..01c789ecab 100644 --- a/packages/python/plotly/plotly/express/_core.py +++ b/packages/python/plotly/plotly/express/_core.py @@ -1318,9 +1318,7 @@ def build_dataframe(args, constructor): wide_cross_name = None # will likely be "index" in wide_mode value_name = None # will likely be "value" in wide_mode hist2d_types = [go.Histogram2d, go.Histogram2dContour] - hist1d_orientation = ( - constructor == go.Histogram or "ecdfmode" in args or "bw_method" in args - ) + hist1d_orientation = constructor == go.Histogram or "ecdfmode" in args if constructor in cartesians: if wide_x and wide_y: raise ValueError( @@ -1807,7 +1805,7 @@ def infer_config(args, constructor, trace_patch, layout_patch): trace_patch["marker"] = dict(opacity=args["opacity"]) if ( "line_group" in args or "line_dash" in args - ): # px.line, px.line_*, px.area, px.ecdf, px, kde + ): # px.line, px.line_*, px.area, px.ecdf modes = set() if args.get("lines", True): modes.add("lines") @@ -1878,9 +1876,6 @@ def infer_config(args, constructor, trace_patch, layout_patch): ) args["histnorm"] = args["ecdfnorm"] - if "bw_method" in args: - args["histnorm"] = "density" - # Compute applicable grouping attributes for k in group_attrables: if k in args: @@ -2113,19 +2108,6 @@ def make_figure(args, constructor, trace_patch=None, layout_patch=None): elif args["ecdfnorm"] == "percent": group[var] = 100.0 * group[var] / group_sum - if "bw_method" in args: - from scipy.stats import gaussian_kde - - base = args["x"] if args["orientation"] == "v" else args["y"] - var = args["x"] if args["orientation"] == "h" else args["y"] - bw = args.get("bw_method") - group = group.sort_values(by=base) - - kernel = gaussian_kde( - dataset=group[base], weights=group[var], bw_method=bw - ) - group[var] = kernel.evaluate(group[base]) - patch, fit_results = make_trace_kwargs( args, trace_spec, group, mapping_labels.copy(), sizeref ) diff --git a/packages/python/plotly/plotly/express/_doc.py b/packages/python/plotly/plotly/express/_doc.py index a961096338..7d5282ef07 100644 --- a/packages/python/plotly/plotly/express/_doc.py +++ b/packages/python/plotly/plotly/express/_doc.py @@ -591,11 +591,6 @@ "If `'complementary'`, the CCDF is plotted such that values represent data above the point.", "If `'reversed'`, a variant of the CCDF is plotted such that values represent data at or above the point.", ], - bw_method=[ - "str, scalar or callable (default `'scott'`)", - "If str, must be one of `'scott'` or `'silverman'`.", - "Passed to `scipy.stats.gaussian_kde`.", - ], ) diff --git a/packages/python/plotly/plotly/tests/test_optional/test_px/test_marginals.py b/packages/python/plotly/plotly/tests/test_optional/test_px/test_marginals.py index a9277986f0..0274068d27 100644 --- a/packages/python/plotly/plotly/tests/test_optional/test_px/test_marginals.py +++ b/packages/python/plotly/plotly/tests/test_optional/test_px/test_marginals.py @@ -14,7 +14,7 @@ def test_xy_marginals(px_fn, marginal_x, marginal_y): assert len(fig.data) == 1 + (marginal_x is not None) + (marginal_y is not None) -@pytest.mark.parametrize("px_fn", [px.histogram, px.ecdf, px.kde]) +@pytest.mark.parametrize("px_fn", [px.histogram, px.ecdf]) @pytest.mark.parametrize("marginal", [None, "rug", "histogram", "box", "violin"]) @pytest.mark.parametrize("orientation", ["h", "v"]) def test_single_marginals(px_fn, marginal, orientation): From 787b16da0410fefbd56697fd046afc2dd597303a Mon Sep 17 00:00:00 2001 From: Nicolas Kruchten Date: Thu, 12 Aug 2021 21:53:30 -0400 Subject: [PATCH 12/12] changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9ddd0bd04c..e1a97c714b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ This project adheres to [Semantic Versioning](http://semver.org/). - `px.scatter` and `px.density_contours` now support new `trendline` types `'rolling'`, `'expanding'` and `'ewm'` [#2997](https://github.com/plotly/plotly.py/pull/2997) - `px.scatter` and `px.density_contours` now support new `trendline_options` argument to parameterize trendlines, with support for constant control and log-scaling in `'ols'` and specification of the fraction used for `'lowess'`, as well as pass-through to Pandas for `'rolling'`, `'expanding'` and `'ewm'` [#2997](https://github.com/plotly/plotly.py/pull/2997) - `px.scatter` and `px.density_contours` now support new `trendline_scope` argument that accepts the value `'overall'` to request a single trendline for all traces, including across facets and animation frames [#2997](https://github.com/plotly/plotly.py/pull/2997) + - A new `px.ecdf()` function for Empirical Cumulative Distribution Functions [#3330](https://github.com/plotly/plotly.py/pull/3330) ### Fixed - Fixed regression introduced in version 5.0.0 where pandas/numpy arrays with `dtype` of Object were being converted to `list` values when added to a Figure ([#3292](https://github.com/plotly/plotly.py/issues/3292), [#3293](https://github.com/plotly/plotly.py/pull/3293))