Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ENH] Update t-SNE widget #6345

Merged
merged 20 commits into from
Jan 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
e20a54d
Update t-SNE defaults to match openTSNE
pavlin-policar Feb 20, 2023
e9c1293
owtsne: Allow user to disable PCA preprocessing
pavlin-policar Feb 21, 2023
659130d
owtsne: Warn if PCA disabled and large number of features
pavlin-policar Feb 21, 2023
9b0abb0
owtsne: Add support for spectral initialization
pavlin-policar Feb 21, 2023
62dc75a
owtsne: Add support for l1, cosine distances
pavlin-policar Feb 21, 2023
083ef11
owtsne: Fix label to be more consistent with rest of Orange
pavlin-policar Feb 21, 2023
febfcab
owtsne: Change exaggeration slider interval to 0.25
pavlin-policar Feb 21, 2023
be467ca
owtsne: Fix normalized_data not being invalidated properly
pavlin-policar Feb 21, 2023
99e39a3
owtsne: Add Distances signal
pavlin-policar Feb 22, 2023
64294a9
owtsne: normalize and pca preprocessing enabled by default
pavlin-policar Feb 23, 2023
c82f9bd
owtsne: remove _get_projection_data; updated in parent
pavlin-policar Feb 23, 2023
a9fca3f
owtsne: Disable preprocessing controls whenever distance matrix provided
pavlin-policar Feb 23, 2023
1e7bf33
owtsne: Fix DummyTSNE in tests
pavlin-policar Feb 23, 2023
c38af38
owtsne: Fixup enabled/disabled controls with distance matrix
pavlin-policar Feb 23, 2023
ef865df
owtsne: Add tooltips to disabled controls when distance matrix
pavlin-policar Feb 23, 2023
416707a
owtsne: Ensure data table-only settings properly restored
pavlin-policar Feb 27, 2023
360d6e4
owtsne: Update documentation
pavlin-policar Feb 28, 2023
ad26a7d
owtsne: Ensure tsne preprocessors are applied
pavlin-policar Sep 1, 2023
1da1967
owtsne: Ensure UI is consistent with underlying state
pavlin-policar Sep 22, 2023
34c79d3
owtsne: fix tests, use simulate util for combobox
pavlin-policar Oct 27, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 40 additions & 10 deletions Orange/projection/manifold.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from typing import Union

import logging
import warnings
from collections.abc import Iterable
Expand All @@ -13,6 +15,7 @@
from Orange.data import Table, Domain, ContinuousVariable
from Orange.data.util import get_unique_names
from Orange.distance import Distance, DistanceModel, Euclidean
from Orange.misc import DistMatrix
from Orange.projection import SklProjector, Projector, Projection
from Orange.projection.base import TransformDomain, ComputeValueProjector

Expand Down Expand Up @@ -382,12 +385,12 @@ class TSNE(Projector):
Orange.preprocess.SklImpute(),
]

def __init__(self, n_components=2, perplexity=30, learning_rate=200,
def __init__(self, n_components=2, perplexity=30, learning_rate="auto",
early_exaggeration_iter=250, early_exaggeration=12,
n_iter=750, exaggeration=None, theta=0.5,
n_iter=500, exaggeration=None, theta=0.5,
min_num_intervals=10, ints_in_interval=1,
initialization="pca", metric="euclidean", n_jobs=1,
neighbors="exact", negative_gradient_method="bh",
neighbors="auto", negative_gradient_method="auto",
multiscale=False, callbacks=None, callbacks_every_iters=50,
random_state=None, preprocessors=None):
super().__init__(preprocessors=preprocessors)
Expand Down Expand Up @@ -461,6 +464,10 @@ def compute_initialization(self, X):
initialization = openTSNE.initialization.pca(
X, self.n_components, random_state=self.random_state
)
elif self.initialization == "spectral":
initialization = openTSNE.initialization.spectral(
X, self.n_components, random_state=self.random_state,
)
elif self.initialization == "random":
initialization = openTSNE.initialization.random(
X, self.n_components, random_state=self.random_state
Expand Down Expand Up @@ -498,7 +505,7 @@ def fit(self, X: np.ndarray, Y: np.ndarray = None) -> openTSNE.TSNEEmbedding:
# Run standard t-SNE optimization
embedding.optimize(
n_iter=self.early_exaggeration_iter, exaggeration=self.early_exaggeration,
inplace=True, momentum=0.5, propagate_exception=True,
inplace=True, momentum=0.8, propagate_exception=True,
)
embedding.optimize(
n_iter=self.n_iter, exaggeration=self.exaggeration,
Expand All @@ -507,17 +514,40 @@ def fit(self, X: np.ndarray, Y: np.ndarray = None) -> openTSNE.TSNEEmbedding:

return embedding

def convert_embedding_to_model(self, data, embedding):
def convert_embedding_to_model(self, data: Union[Table, DistMatrix], embedding: np.ndarray):
# The results should be accessible in an Orange table, which doesn't
# need the full embedding attributes and is cast into a regular array
n = self.n_components

if self.metric == "precomputed":
if not isinstance(data, DistMatrix):
raise ValueError(
f"Expected `data` to be instance of "
f"{DistMatrix.__class__.__name__} when using "
f"`metric='precomputed'. Got {data.__class__.__name__} "
f"instead!"
)
# The distance matrix need not come attached with the original data
if data.row_items is not None:
data = data.row_items
else:
data = Table.from_domain(Domain([]))

# Determine variable names
postfixes = ["x", "y"] if n == 2 else list(range(1, n + 1))
tsne_colnames = [f"t-SNE-{p}" for p in postfixes]
names = [var.name for var in chain(data.domain.class_vars, data.domain.metas) if var]
proposed = [(f"t-SNE-{p}") for p in postfixes]
uniq_names = get_unique_names(names, proposed)
tsne_cols = [ContinuousVariable(name) for name in uniq_names]
embedding_domain = Domain(tsne_cols, data.domain.class_vars, data.domain.metas)
embedding_table = Table(embedding_domain, embedding.view(np.ndarray), data.Y, data.metas)
tsne_colnames = get_unique_names(names, tsne_colnames)
tsne_cols = [ContinuousVariable(name) for name in tsne_colnames]

# Distance matrices need not come attached with the original data
if len(data.domain) == 0:
embedding_domain = Domain(tsne_cols)
embedding_table = Table(embedding_domain, embedding.view(np.ndarray))

else: # data table was available
embedding_domain = Domain(tsne_cols, data.domain.class_vars, data.domain.metas)
embedding_table = Table(embedding_domain, embedding.view(np.ndarray), data.Y, data.metas)

# Create a model object which will be capable of transforming new data
# into the existing embedding
Expand Down
Loading