From 8cea7e67f4a059ff5a3e0d3dace33c9c4a3db7e4 Mon Sep 17 00:00:00 2001 From: Umberto Lupo Date: Tue, 4 Aug 2020 14:54:02 +0200 Subject: [PATCH 1/3] Fix bug introduced in 4bc90b2e88926dbf2b4da2a455e7480485303d64 np.max should have been np.min in plot_diagram for minumum birth and death calculation --- gtda/plotting/persistence_diagrams.py | 17 ++++---- gtda/plotting/point_clouds.py | 57 +++++++++++++++------------ 2 files changed, 41 insertions(+), 33 deletions(-) diff --git a/gtda/plotting/persistence_diagrams.py b/gtda/plotting/persistence_diagrams.py index ff35c7309..f76d270f4 100644 --- a/gtda/plotting/persistence_diagrams.py +++ b/gtda/plotting/persistence_diagrams.py @@ -41,19 +41,20 @@ def plot_diagram(diagram, homology_dimensions=None, plotly_params=None): np.where(np.isposinf(diagram_no_dims), -np.inf, diagram_no_dims), axis=0 ) - min_birth, min_death = np.max( + min_birth, min_death = np.min( np.where(np.isneginf(diagram_no_dims), np.inf, diagram_no_dims), axis=0 ) fig = gobj.Figure() - fig.add_trace(gobj.Scatter(x=[100 * min(-np.abs(max_death), min_birth), - 100 * max_death], - y=[100 * min(-np.abs(max_death), min_birth), - 100 * max_death], - mode="lines", - line=dict(dash="dash", width=1, color="black"), - showlegend=False, hoverinfo="none")) + fig.add_trace(gobj.Scatter( + x=[100 * min(-np.abs(max_death), min_birth), 100 * max_death], + y=[100 * min(-np.abs(max_death), min_birth), 100 * max_death], + mode="lines", + line={"dash": "dash", "width": 1, "color": "black"}, + showlegend=False, + hoverinfo="none" + )) for dim in homology_dimensions: name = f"H{int(dim)}" if dim != np.inf else "Any homology dimension" diff --git a/gtda/plotting/point_clouds.py b/gtda/plotting/point_clouds.py index 374a7f38b..0c96c7f4a 100644 --- a/gtda/plotting/point_clouds.py +++ b/gtda/plotting/point_clouds.py @@ -58,7 +58,7 @@ def plot_point_cloud(point_cloud, dimension=None, plotly_params=None): "zeroline": True, "showexponent": "all", "exponentformat": "e" - }, + }, "yaxis1": { "title": "1st", "side": "left", @@ -69,24 +69,28 @@ def plot_point_cloud(point_cloud, dimension=None, plotly_params=None): "zeroline": True, "showexponent": "all", "exponentformat": "e" - }, + }, "plot_bgcolor": "white" - } + } fig = gobj.Figure(layout=layout) - fig.update_xaxes(zeroline=True, linewidth=1, linecolor='black', + fig.update_xaxes(zeroline=True, linewidth=1, linecolor="black", mirror=False) - fig.update_yaxes(zeroline=True, linewidth=1, linecolor='black', + fig.update_yaxes(zeroline=True, linewidth=1, linecolor="black", mirror=False) - fig.add_trace(gobj.Scatter(x=point_cloud[:, 0], - y=point_cloud[:, 1], - mode='markers', - marker=dict(size=4, - color=list(range( - point_cloud.shape[0])), - colorscale='Viridis', - opacity=0.8))) + fig.add_trace(gobj.Scatter( + x=point_cloud[:, 0], + y=point_cloud[:, 1], + mode="markers", + marker={ + "size": 4, + "color": list(range(point_cloud.shape[0])), + "colorscale": "Viridis", + "opacity": 0.8 + } + )) + elif dimension == 3: scene = { "xaxis": { @@ -94,33 +98,36 @@ def plot_point_cloud(point_cloud, dimension=None, plotly_params=None): "type": "linear", "showexponent": "all", "exponentformat": "e" - }, + }, "yaxis": { "title": "1st", "type": "linear", "showexponent": "all", "exponentformat": "e" - }, + }, "zaxis": { "title": "2nd", "type": "linear", "showexponent": "all", "exponentformat": "e" + } } - } fig = gobj.Figure() fig.update_layout(scene=scene) - fig.add_trace(gobj.Scatter3d(x=point_cloud[:, 0], - y=point_cloud[:, 1], - z=point_cloud[:, 2], - mode='markers', - marker=dict(size=4, - color=list(range( - point_cloud.shape[0])), - colorscale='Viridis', - opacity=0.8))) + fig.add_trace(gobj.Scatter3d( + x=point_cloud[:, 0], + y=point_cloud[:, 1], + z=point_cloud[:, 2], + mode="markers", + marker={ + "size": 4, + "color": list(range(point_cloud.shape[0])), + "colorscale": "Viridis", + "opacity": 0.8 + } + )) # Update trace and layout according to user input if plotly_params: From cec4a5cc7db9e240e1fe6b7222af1292be8591d4 Mon Sep 17 00:00:00 2001 From: Umberto Lupo Date: Tue, 4 Aug 2020 15:07:42 +0200 Subject: [PATCH 2/3] Clean/simplify plot_diagram further to be more ready for extended persistence --- gtda/plotting/persistence_diagrams.py | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/gtda/plotting/persistence_diagrams.py b/gtda/plotting/persistence_diagrams.py index f76d270f4..2a6c33d09 100644 --- a/gtda/plotting/persistence_diagrams.py +++ b/gtda/plotting/persistence_diagrams.py @@ -37,19 +37,21 @@ def plot_diagram(diagram, homology_dimensions=None, plotly_params=None): homology_dimensions = np.unique(diagram[:, 2]) diagram_no_dims = diagram[:, :2] - max_birth, max_death = np.max( - np.where(np.isposinf(diagram_no_dims), -np.inf, diagram_no_dims), - axis=0 + max_val_display = np.max( + np.where(np.isposinf(diagram_no_dims), -np.inf, diagram_no_dims) ) - min_birth, min_death = np.min( - np.where(np.isneginf(diagram_no_dims), np.inf, diagram_no_dims), - axis=0 + min_val_display = np.min( + np.where(np.isneginf(diagram_no_dims), np.inf, diagram_no_dims) ) + parameter_range = max_val_display - min_val_display + extra_space = 0.02 * parameter_range + min_val_display -= extra_space + max_val_display += extra_space fig = gobj.Figure() fig.add_trace(gobj.Scatter( - x=[100 * min(-np.abs(max_death), min_birth), 100 * max_death], - y=[100 * min(-np.abs(max_death), min_birth), 100 * max_death], + x=[min_val_display, max_val_display], + y=[min_val_display, max_val_display], mode="lines", line={"dash": "dash", "width": 1, "color": "black"}, showlegend=False, @@ -64,9 +66,6 @@ def plot_diagram(diagram, homology_dimensions=None, plotly_params=None): fig.add_trace(gobj.Scatter(x=subdiagram[:, 0], y=subdiagram[:, 1], mode="markers", name=name)) - parameter_range = max_death - min_birth - extra_space = 0.02 * parameter_range - fig.update_layout( width=500, height=500, @@ -74,7 +73,7 @@ def plot_diagram(diagram, homology_dimensions=None, plotly_params=None): "title": "Birth", "side": "bottom", "type": "linear", - "range": [min_birth - extra_space, max_death + extra_space], + "range": [min_val_display, max_val_display], "autorange": False, "ticks": "outside", "showline": True, @@ -89,7 +88,7 @@ def plot_diagram(diagram, homology_dimensions=None, plotly_params=None): "title": "Death", "side": "left", "type": "linear", - "range": [min_birth - extra_space, max_death + extra_space], + "range": [min_val_display, max_val_display], "autorange": False, "scaleanchor": "x", "scaleratio": 1, "ticks": "outside", "showline": True, From 1858e6fc65616b76fe7cd0a36525c982fa236592 Mon Sep 17 00:00:00 2001 From: Umberto Lupo Date: Tue, 4 Aug 2020 15:29:27 +0200 Subject: [PATCH 3/3] Make`store_edge_elements` work with MapperPipeline.get_mapper_params and set_params --- gtda/mapper/pipeline.py | 78 ++++++++++++++++++++--------------------- 1 file changed, 39 insertions(+), 39 deletions(-) diff --git a/gtda/mapper/pipeline.py b/gtda/mapper/pipeline.py index 9105714db..5eb1a7758 100644 --- a/gtda/mapper/pipeline.py +++ b/gtda/mapper/pipeline.py @@ -8,16 +8,16 @@ from .utils._list_feature_union import ListFeatureUnion from .utils.pipeline import transformer_from_callable_on_rows, identity -global_pipeline_params = ('memory', 'verbose') -nodes_params = ('scaler', 'filter_func', 'cover') -clust_prepr_params = ('clustering_preprocessing',) -clust_params = ('clusterer', 'n_jobs', - 'parallel_backend_prefer') -nerve_params = ('min_intersection',) -clust_prepr_params_prefix = 'pullback_cover__' -nodes_params_prefix = 'pullback_cover__map_and_cover__' -clust_params_prefix = 'clustering__' -nerve_params_prefix = 'nerve__' +global_pipeline_params = ("memory", "verbose") +nodes_params = ("scaler", "filter_func", "cover") +clust_prepr_params = ("clustering_preprocessing",) +clust_params = ("clusterer", "n_jobs", + "parallel_backend_prefer") +nerve_params = ("min_intersection", "store_edge_elements") +clust_prepr_params_prefix = "pullback_cover__" +nodes_params_prefix = "pullback_cover__map_and_cover__" +clust_params_prefix = "clustering__" +nerve_params_prefix = "nerve__" class MapperPipeline(Pipeline): @@ -43,10 +43,10 @@ class MapperPipeline(Pipeline): >>> pipe = make_mapper_pipeline(filter_func=filter_func, ... cover=cover, ... clusterer=clusterer) - >>> print(pipe.get_mapper_params()['clusterer__eps']) + >>> print(pipe.get_mapper_params()["clusterer__eps"]) 0.5 >>> pipe.set_params(clusterer___eps=0.1) - >>> print(pipe.get_mapper_params()['clusterer__eps']) + >>> print(pipe.get_mapper_params()["clusterer__eps"]) 0.1 See also @@ -130,14 +130,14 @@ def _clean_dict_keys(kwargs, prefix): key[len(prefix):]: kwargs[key] for key in kwargs if (key.startswith(prefix) - and not key.startswith(prefix + 'steps') - and not key.startswith(prefix + 'memory') - and not key.startswith(prefix + 'verbose') - and not key.startswith(prefix + 'transformer_list') - and not key.startswith(prefix + 'n_jobs') - and not key.startswith(prefix + 'transformer_weights') - and not key.startswith(prefix + 'map_and_cover')) - } + and not key.startswith(prefix + "steps") + and not key.startswith(prefix + "memory") + and not key.startswith(prefix + "verbose") + and not key.startswith(prefix + "transformer_list") + and not key.startswith(prefix + "n_jobs") + and not key.startswith(prefix + "transformer_weights") + and not key.startswith(prefix + "map_and_cover")) + } def make_mapper_pipeline(scaler=None, @@ -146,7 +146,7 @@ def make_mapper_pipeline(scaler=None, clustering_preprocessing=None, clusterer=None, n_jobs=None, - parallel_backend_prefer='threads', + parallel_backend_prefer="threads", graph_step=True, min_intersection=1, store_edge_elements=False, @@ -199,12 +199,12 @@ def make_mapper_pipeline(scaler=None, in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. - parallel_backend_prefer : ``'processes'`` | ``'threads'``, optional, \ - default: ``'threads'`` + parallel_backend_prefer : ``"processes"`` | ``"threads"``, optional, \ + default: ``"threads"`` Soft hint for the default joblib backend to use in a joblib-parallel application of the clustering step across pullback cover sets. To be used in conjunction with `n_jobs`. The default process-based backend is - 'loky' and the default thread-based backend is 'threading'. See [2]_. + "loky" and the default thread-based backend is "threading". See [2]_. graph_step : bool, optional, default: ``True`` Whether the resulting pipeline should stop at the calculation of the @@ -257,11 +257,11 @@ def make_mapper_pipeline(scaler=None, >>> print(mapper.__class__) >>> mapper_params = mapper.get_mapper_params() - >>> print(mapper_params['filter_func'].__class__) + >>> print(mapper_params["filter_func"].__class__) - >>> print(mapper_params['cover'].__class__) + >>> print(mapper_params["cover"].__class__) - >>> print(mapper_params['clusterer'].__class__) + >>> print(mapper_params["clusterer"].__class__) >>> X = np.random.random((10000, 4)) # 10000 points in 4-dimensional space >>> mapper_graph = mapper.fit_transform(X) # Create the mapper graph @@ -272,7 +272,7 @@ def make_mapper_pipeline(scaler=None, ['pullback_set_label', 'partial_cluster_label', 'node_elements'] >>> # Find which points belong to first node of graph >>> node_id = 0 - >>> node_elements = mapper_graph.vs['node_elements'] + >>> node_elements = mapper_graph.vs["node_elements"] >>> print(f"Node ID: {node_id}, Node elements: {node_elements[node_id]}, " ... f"Data points: {X[node_elements[node_id]") Node Id: 0, @@ -372,7 +372,7 @@ def make_mapper_pipeline(scaler=None, if filter_func is None: from sklearn.decomposition import PCA _filter_func = PCA(n_components=2) - elif not hasattr(filter_func, 'fit_transform'): + elif not hasattr(filter_func, "fit_transform"): _filter_func = transformer_from_callable_on_rows(filter_func) else: _filter_func = filter_func @@ -395,26 +395,26 @@ def make_mapper_pipeline(scaler=None, _clusterer = clusterer map_and_cover = Pipeline( - steps=[('scaler', _scaler), - ('filter_func', _filter_func), - ('cover', _cover)], + steps=[("scaler", _scaler), + ("filter_func", _filter_func), + ("cover", _cover)], verbose=verbose) all_steps = [ - ('pullback_cover', ListFeatureUnion( - [('clustering_preprocessing', _clustering_preprocessing), - ('map_and_cover', map_and_cover)])), - ('clustering', ParallelClustering( + ("pullback_cover", ListFeatureUnion( + [("clustering_preprocessing", _clustering_preprocessing), + ("map_and_cover", map_and_cover)])), + ("clustering", ParallelClustering( clusterer=_clusterer, n_jobs=n_jobs, parallel_backend_prefer=parallel_backend_prefer)) - ] + ] if graph_step: all_steps.append( - ('nerve', Nerve(min_intersection=min_intersection, + ("nerve", Nerve(min_intersection=min_intersection, store_edge_elements=store_edge_elements)) - ) + ) mapper_pipeline = MapperPipeline( steps=all_steps, memory=memory, verbose=verbose)