Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New Estimators #13

Open
wants to merge 17 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
include *.rst
recursive-include doc *
recursive-include examples *
recursive-include sliced/datasets *.csv
recursive-include sliced/datasets *.csv *.tes *.tra
include README.rst
include requirements.txt
include test_requirements.txt
27 changes: 27 additions & 0 deletions examples/plot_iht.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
"""
================================
Iterative Hessian Transformation
================================

An example plot of :class:`sliced.iht.IterativeHessianTransformation`
applied to the Penn Digits Dataset.
"""
import numpy as np
import matplotlib.pyplot as plt

from sliced import IterativeHessianTransformation
from sliced.datasets import load_penn_digits

X_train, y_train = load_penn_digits(subset='train', digits=[0, 6, 9])
X_test, y_test = load_penn_digits(subset='test', digits=[0, 6, 9])


iht = IterativeHessianTransformation(target_type='response').fit(X_train, y_train)
X_iht = iht.transform(X_test)

# plot data projected onto the first direction
plt.scatter(X_iht[:, 0], X_iht[:, 1], c=y_test, linewidth=0.5, edgecolor='k')
plt.xlabel("$X\hat{\\beta_1}$")
plt.ylabel("$X\hat{\\beta_2}$")

plt.show()
36 changes: 36 additions & 0 deletions examples/plot_kir.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
"""
=========================
Kernel Inverse Regression
=========================

An example plot of :class:`sliced.kir.KernelInverseRegression`
"""
import numpy as np
import matplotlib.pyplot as plt

from sliced import KernelInverseRegression
from sliced import datasets


X, y = datasets.make_cubic(random_state=123)

kir = KernelInverseRegression(gamma=1e-2)
X_kir = kir.fit_transform(X, y)

# estimate of the first dimension reducing directions
beta1_hat = kir.directions_[0, :]


# plot data projected onto the first direction
plt.scatter(X_kir[:, 0], y, c=y, cmap='viridis', linewidth=0.5, edgecolor='k')
plt.xlabel("$X\hat{\\beta_1}$")
plt.ylabel("y")

# annotation showing the direction found
beta_text = "$\\beta_1$ = " + "{0}".format([0.707, 0.707])
plt.annotate(beta_text, xy=(-2, 6.5))
beta1_hat_text = "$\hat{\\beta_1}$ = " + "{0}".format(
np.round(beta1_hat, 3).tolist()[:2])
plt.annotate(beta1_hat_text, xy=(-2, 7.5))

plt.show()
35 changes: 35 additions & 0 deletions examples/plot_opg.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
"""
======================
Outer Product Gradient
======================

An example plot of :class:`sliced.odr.OuterProductGradient`
"""
import numpy as np
import matplotlib.pyplot as plt

from sliced import OuterProductGradients
from sliced import datasets

X, y = datasets.make_cubic(random_state=123)

opg = OuterProductGradients()
X_opg = opg.fit_transform(X, y)

# estimate of the first dimension reducing directions
beta1_hat = opg.directions_[0, :]


# plot data projected onto the first direction
plt.scatter(X_opg[:, 0], y, c=y, cmap='viridis', linewidth=0.5, edgecolor='k')
plt.xlabel("$X\hat{\\beta_1}$")
plt.ylabel("y")

# annotation showing the direction found
beta_text = "$\\beta_1$ = " + "{0}".format([0.707, 0.707])
plt.annotate(beta_text, xy=(-2, 6.5))
beta1_hat_text = "$\hat{\\beta_1}$ = " + "{0}".format(
np.round(beta1_hat, 3).tolist()[:2])
plt.annotate(beta1_hat_text, xy=(-2, 7.5))

plt.show()
27 changes: 27 additions & 0 deletions examples/plot_phd.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
"""
==================================
Principal Hessian Directions (PHD)
==================================

An example plot of :class:`sliced.phd.PrincipalHessianDirections`
"""
import numpy as np
import matplotlib.pyplot as plt

from sliced import PrincipalHessianDirections
from sliced import datasets


from sliced.datasets import load_athletes

X, y = load_athletes()

phd = PrincipalHessianDirections(target_type='residual')
X_phd = phd.fit_transform(X, y)

# plot data projected onto the first direction
plt.scatter(X_phd[:, 0], y, c=y, cmap='viridis', linewidth=0.5, edgecolor='k')
plt.xlabel("$X\hat{\\beta_1}$")
plt.ylabel("y")

plt.show()
7 changes: 7 additions & 0 deletions sliced/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,12 @@
from sliced import externals

from sliced.sir import *
from sliced.kir import *
from sliced.save import *
from sliced.phd import *
from sliced.iht import *
from sliced.opg import *
from sliced.dr import *
from sliced.ladle import *
from sliced.bic import *
from sliced.version import *
48 changes: 48 additions & 0 deletions sliced/bic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import numpy as np

from sklearn.base import clone, BaseEstimator, MetaEstimatorMixin
from sklearn.base import TransformerMixin
from sklearn.utils import resample, check_random_state, check_array


__all__ = ['BICDimensionSelector']


class BICDimensionSelector(MetaEstimatorMixin, BaseEstimator, TransformerMixin):
def __init__(self, estimator, criterion='log'):
self.estimator = estimator
self.criterion = criterion

def fit(self, X, y):
n_samples, n_features = X.shape

# fit sufficient dimension reduction method
self.estimator_ = clone(self.estimator)
self.estimator_.fit(X, y)

evals = self.estimator_.eigenvalues_
self.criterion_ = np.zeros(n_features + 1)
if self.criterion == 'sum':
penalty = 2 * evals[0] * ((n_samples)**(-0.5)) * np.log(n_samples)
for k in range(n_features + 1):
if k == 0:
self.criterion_[k] = 0
else:
self.criterion_[k] = np.sum(evals[:k])
self.criterion_[k] -= penalty * k
elif self.criterion == 'log':
penalty = (evals[0] / 3.) * (np.log(n_samples) + np.sqrt(n_samples))
penalty /= (2. * n_samples)
for k in range(n_features + 1):
if k != n_features:
self.criterion_[k] = np.sum(np.log(evals[k:] + 1) - evals[k:])
self.criterion_[k] -= penalty * k * (2 * n_features - k + 1)
else:
raise ValueError('Unrecognized criterion {}'.format(self.criterion))

self.n_directions_ = np.argmax(self.criterion_)

return self

def transform(self, X):
return self.estimator_.transform(X)
1 change: 1 addition & 0 deletions sliced/datasets/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from sliced.datasets.synthetic import *
from sliced.datasets.banknote import load_banknote
from sliced.datasets.athletes import load_athletes
from sliced.datasets.penn_digits import load_penn_digits
Loading