Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix minimum birth bug in plot_diagram #449

Merged
merged 3 commits into from
Aug 4, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 39 additions & 39 deletions gtda/mapper/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,16 @@
from .utils._list_feature_union import ListFeatureUnion
from .utils.pipeline import transformer_from_callable_on_rows, identity

global_pipeline_params = ('memory', 'verbose')
nodes_params = ('scaler', 'filter_func', 'cover')
clust_prepr_params = ('clustering_preprocessing',)
clust_params = ('clusterer', 'n_jobs',
'parallel_backend_prefer')
nerve_params = ('min_intersection',)
clust_prepr_params_prefix = 'pullback_cover__'
nodes_params_prefix = 'pullback_cover__map_and_cover__'
clust_params_prefix = 'clustering__'
nerve_params_prefix = 'nerve__'
global_pipeline_params = ("memory", "verbose")
nodes_params = ("scaler", "filter_func", "cover")
clust_prepr_params = ("clustering_preprocessing",)
clust_params = ("clusterer", "n_jobs",
"parallel_backend_prefer")
nerve_params = ("min_intersection", "store_edge_elements")
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is important and was forgotten in #447.

clust_prepr_params_prefix = "pullback_cover__"
nodes_params_prefix = "pullback_cover__map_and_cover__"
clust_params_prefix = "clustering__"
nerve_params_prefix = "nerve__"


class MapperPipeline(Pipeline):
Expand All @@ -43,10 +43,10 @@ class MapperPipeline(Pipeline):
>>> pipe = make_mapper_pipeline(filter_func=filter_func,
... cover=cover,
... clusterer=clusterer)
>>> print(pipe.get_mapper_params()['clusterer__eps'])
>>> print(pipe.get_mapper_params()["clusterer__eps"])
0.5
>>> pipe.set_params(clusterer___eps=0.1)
>>> print(pipe.get_mapper_params()['clusterer__eps'])
>>> print(pipe.get_mapper_params()["clusterer__eps"])
0.1

See also
Expand Down Expand Up @@ -130,14 +130,14 @@ def _clean_dict_keys(kwargs, prefix):
key[len(prefix):]: kwargs[key]
for key in kwargs
if (key.startswith(prefix)
and not key.startswith(prefix + 'steps')
and not key.startswith(prefix + 'memory')
and not key.startswith(prefix + 'verbose')
and not key.startswith(prefix + 'transformer_list')
and not key.startswith(prefix + 'n_jobs')
and not key.startswith(prefix + 'transformer_weights')
and not key.startswith(prefix + 'map_and_cover'))
}
and not key.startswith(prefix + "steps")
and not key.startswith(prefix + "memory")
and not key.startswith(prefix + "verbose")
and not key.startswith(prefix + "transformer_list")
and not key.startswith(prefix + "n_jobs")
and not key.startswith(prefix + "transformer_weights")
and not key.startswith(prefix + "map_and_cover"))
}


def make_mapper_pipeline(scaler=None,
Expand All @@ -146,7 +146,7 @@ def make_mapper_pipeline(scaler=None,
clustering_preprocessing=None,
clusterer=None,
n_jobs=None,
parallel_backend_prefer='threads',
parallel_backend_prefer="threads",
graph_step=True,
min_intersection=1,
store_edge_elements=False,
Expand Down Expand Up @@ -199,12 +199,12 @@ def make_mapper_pipeline(scaler=None,
in a :obj:`joblib.parallel_backend` context. ``-1`` means using all
processors.

parallel_backend_prefer : ``'processes'`` | ``'threads'``, optional, \
default: ``'threads'``
parallel_backend_prefer : ``"processes"`` | ``"threads"``, optional, \
default: ``"threads"``
Soft hint for the default joblib backend to use in a joblib-parallel
application of the clustering step across pullback cover sets. To be
used in conjunction with `n_jobs`. The default process-based backend is
'loky' and the default thread-based backend is 'threading'. See [2]_.
"loky" and the default thread-based backend is "threading". See [2]_.

graph_step : bool, optional, default: ``True``
Whether the resulting pipeline should stop at the calculation of the
Expand Down Expand Up @@ -257,11 +257,11 @@ def make_mapper_pipeline(scaler=None,
>>> print(mapper.__class__)
<class 'gtda.mapper.pipeline.MapperPipeline'>
>>> mapper_params = mapper.get_mapper_params()
>>> print(mapper_params['filter_func'].__class__)
>>> print(mapper_params["filter_func"].__class__)
<class 'sklearn.decomposition._pca.PCA'>
>>> print(mapper_params['cover'].__class__)
>>> print(mapper_params["cover"].__class__)
<class 'gtda.mapper.cover.CubicalCover'>
>>> print(mapper_params['clusterer'].__class__)
>>> print(mapper_params["clusterer"].__class__)
<class 'sklearn.cluster._dbscan.DBSCAN'>
>>> X = np.random.random((10000, 4)) # 10000 points in 4-dimensional space
>>> mapper_graph = mapper.fit_transform(X) # Create the mapper graph
Expand All @@ -272,7 +272,7 @@ def make_mapper_pipeline(scaler=None,
['pullback_set_label', 'partial_cluster_label', 'node_elements']
>>> # Find which points belong to first node of graph
>>> node_id = 0
>>> node_elements = mapper_graph.vs['node_elements']
>>> node_elements = mapper_graph.vs["node_elements"]
>>> print(f"Node ID: {node_id}, Node elements: {node_elements[node_id]}, "
... f"Data points: {X[node_elements[node_id]")
Node Id: 0,
Expand Down Expand Up @@ -372,7 +372,7 @@ def make_mapper_pipeline(scaler=None,
if filter_func is None:
from sklearn.decomposition import PCA
_filter_func = PCA(n_components=2)
elif not hasattr(filter_func, 'fit_transform'):
elif not hasattr(filter_func, "fit_transform"):
_filter_func = transformer_from_callable_on_rows(filter_func)
else:
_filter_func = filter_func
Expand All @@ -395,26 +395,26 @@ def make_mapper_pipeline(scaler=None,
_clusterer = clusterer

map_and_cover = Pipeline(
steps=[('scaler', _scaler),
('filter_func', _filter_func),
('cover', _cover)],
steps=[("scaler", _scaler),
("filter_func", _filter_func),
("cover", _cover)],
verbose=verbose)

all_steps = [
('pullback_cover', ListFeatureUnion(
[('clustering_preprocessing', _clustering_preprocessing),
('map_and_cover', map_and_cover)])),
('clustering', ParallelClustering(
("pullback_cover", ListFeatureUnion(
[("clustering_preprocessing", _clustering_preprocessing),
("map_and_cover", map_and_cover)])),
("clustering", ParallelClustering(
clusterer=_clusterer,
n_jobs=n_jobs,
parallel_backend_prefer=parallel_backend_prefer))
]
]

if graph_step:
all_steps.append(
('nerve', Nerve(min_intersection=min_intersection,
("nerve", Nerve(min_intersection=min_intersection,
store_edge_elements=store_edge_elements))
)
)

mapper_pipeline = MapperPipeline(
steps=all_steps, memory=memory, verbose=verbose)
Expand Down
36 changes: 18 additions & 18 deletions gtda/plotting/persistence_diagrams.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,23 +37,26 @@ def plot_diagram(diagram, homology_dimensions=None, plotly_params=None):
homology_dimensions = np.unique(diagram[:, 2])

diagram_no_dims = diagram[:, :2]
max_birth, max_death = np.max(
np.where(np.isposinf(diagram_no_dims), -np.inf, diagram_no_dims),
axis=0
max_val_display = np.max(
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Taking the global maxima and minima makes the code ready for extended persistence.

np.where(np.isposinf(diagram_no_dims), -np.inf, diagram_no_dims)
)
min_birth, min_death = np.max(
np.where(np.isneginf(diagram_no_dims), np.inf, diagram_no_dims),
axis=0
min_val_display = np.min(
np.where(np.isneginf(diagram_no_dims), np.inf, diagram_no_dims)
)
parameter_range = max_val_display - min_val_display
extra_space = 0.02 * parameter_range
min_val_display -= extra_space
max_val_display += extra_space

fig = gobj.Figure()
fig.add_trace(gobj.Scatter(x=[100 * min(-np.abs(max_death), min_birth),
100 * max_death],
y=[100 * min(-np.abs(max_death), min_birth),
100 * max_death],
mode="lines",
line=dict(dash="dash", width=1, color="black"),
showlegend=False, hoverinfo="none"))
fig.add_trace(gobj.Scatter(
x=[min_val_display, max_val_display],
Copy link
Contributor Author

@ulupo ulupo Aug 4, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This leads to better behaviour when clicking "Autoscale" on the plotly HTML object in a notebook. Currently, the factor of 100 means that autoscaling leads to huge ranges.

y=[min_val_display, max_val_display],
mode="lines",
line={"dash": "dash", "width": 1, "color": "black"},
showlegend=False,
hoverinfo="none"
))

for dim in homology_dimensions:
name = f"H{int(dim)}" if dim != np.inf else "Any homology dimension"
Expand All @@ -63,17 +66,14 @@ def plot_diagram(diagram, homology_dimensions=None, plotly_params=None):
fig.add_trace(gobj.Scatter(x=subdiagram[:, 0], y=subdiagram[:, 1],
mode="markers", name=name))

parameter_range = max_death - min_birth
extra_space = 0.02 * parameter_range

fig.update_layout(
width=500,
height=500,
xaxis1={
"title": "Birth",
"side": "bottom",
"type": "linear",
"range": [min_birth - extra_space, max_death + extra_space],
"range": [min_val_display, max_val_display],
"autorange": False,
"ticks": "outside",
"showline": True,
Expand All @@ -88,7 +88,7 @@ def plot_diagram(diagram, homology_dimensions=None, plotly_params=None):
"title": "Death",
"side": "left",
"type": "linear",
"range": [min_birth - extra_space, max_death + extra_space],
"range": [min_val_display, max_val_display],
"autorange": False, "scaleanchor": "x", "scaleratio": 1,
"ticks": "outside",
"showline": True,
Expand Down
57 changes: 32 additions & 25 deletions gtda/plotting/point_clouds.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def plot_point_cloud(point_cloud, dimension=None, plotly_params=None):
"zeroline": True,
"showexponent": "all",
"exponentformat": "e"
},
},
"yaxis1": {
"title": "1st",
"side": "left",
Expand All @@ -69,58 +69,65 @@ def plot_point_cloud(point_cloud, dimension=None, plotly_params=None):
"zeroline": True,
"showexponent": "all",
"exponentformat": "e"
},
},
"plot_bgcolor": "white"
}
}

fig = gobj.Figure(layout=layout)
fig.update_xaxes(zeroline=True, linewidth=1, linecolor='black',
fig.update_xaxes(zeroline=True, linewidth=1, linecolor="black",
mirror=False)
fig.update_yaxes(zeroline=True, linewidth=1, linecolor='black',
fig.update_yaxes(zeroline=True, linewidth=1, linecolor="black",
mirror=False)

fig.add_trace(gobj.Scatter(x=point_cloud[:, 0],
y=point_cloud[:, 1],
mode='markers',
marker=dict(size=4,
color=list(range(
point_cloud.shape[0])),
colorscale='Viridis',
opacity=0.8)))
fig.add_trace(gobj.Scatter(
x=point_cloud[:, 0],
y=point_cloud[:, 1],
mode="markers",
marker={
"size": 4,
"color": list(range(point_cloud.shape[0])),
"colorscale": "Viridis",
"opacity": 0.8
}
))

elif dimension == 3:
scene = {
"xaxis": {
"title": "0th",
"type": "linear",
"showexponent": "all",
"exponentformat": "e"
},
},
"yaxis": {
"title": "1st",
"type": "linear",
"showexponent": "all",
"exponentformat": "e"
},
},
"zaxis": {
"title": "2nd",
"type": "linear",
"showexponent": "all",
"exponentformat": "e"
}
}
}

fig = gobj.Figure()
fig.update_layout(scene=scene)

fig.add_trace(gobj.Scatter3d(x=point_cloud[:, 0],
y=point_cloud[:, 1],
z=point_cloud[:, 2],
mode='markers',
marker=dict(size=4,
color=list(range(
point_cloud.shape[0])),
colorscale='Viridis',
opacity=0.8)))
fig.add_trace(gobj.Scatter3d(
x=point_cloud[:, 0],
y=point_cloud[:, 1],
z=point_cloud[:, 2],
mode="markers",
marker={
"size": 4,
"color": list(range(point_cloud.shape[0])),
"colorscale": "Viridis",
"opacity": 0.8
}
))

# Update trace and layout according to user input
if plotly_params:
Expand Down