Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Version - 0.0.20 #23

Merged
merged 6 commits into from
Jan 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion IngeoML/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
__version__ = '0.0.19'
__version__ = '0.0.20'

from IngeoML.bootstrap import CI, SE, StatisticSamples
from IngeoML.feature_selection import SelectFromModelCV
Expand Down
46 changes: 28 additions & 18 deletions IngeoML/optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,12 @@
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import f1_score
from sklearn.model_selection import ShuffleSplit, StratifiedShuffleSplit
from scipy.sparse import spmatrix
import numpy as np
import jax
import jax.numpy as jnp
from jax import nn
from jax.experimental.sparse import BCSR
import optax
from IngeoML.utils import Batches, balance_class_weights, progress_bar, soft_BER, cos_distance, cos_similarity

Expand Down Expand Up @@ -163,7 +165,6 @@ def estimator(parameters: object,
model: Callable[[object, jnp.array], jnp.array],
X, y,
batches: Batches=None,
array: Callable[[object],object]=jnp.array,
class_weight: str='balanced',
n_iter_no_change: int=jnp.inf,
deviation=None, n_outputs: int=None, validation=None,
Expand All @@ -179,7 +180,6 @@ def estimator(parameters: object,
:param y: Dependent variable.
:param batches: Batches used in the optimization.
:type batches: :py:class:`~IngeoML.utils.Batches`
:param array: Function to transform the independent variable.
:param class_weight: Element weights.
:param n_iter_no_change: Number of iterations without improving the performance.
:type n_iter_no_change: int
Expand Down Expand Up @@ -229,9 +229,9 @@ def deviation_regression(params, X, y, weights, *args):
hy = model(params, X, *args)
return deviation(y, hy, weights)

def encode(y, n_outputs, validation):
if n_outputs == 1:
labels = np.unique(y)
def encode(y, validation):
labels = np.unique(y)
if labels.shape[0] == 2:
h = {v:k for k, v in enumerate(labels)}
y_enc = np.array([h[x] for x in y])
if validation is not None and not hasattr(validation, 'split'):
Expand All @@ -244,6 +244,11 @@ def encode(y, n_outputs, validation):
_ = validation[1]
validation[1] = encoder.transform(_.reshape(-1, 1))
return y_enc

def array(data):
if isinstance(data, spmatrix):
return BCSR.from_scipy_sparse(data)
return jnp.array(data)

def create_batches(batches):
if batches is None:
Expand All @@ -265,17 +270,18 @@ def create_batches(batches):
batches_.append(tuple(args))
return batches_

def _validation(validation, X, y_enc, y):
def _validation(validation, X, y_enc, y, model_args):
if validation is not None and hasattr(validation, 'split'):
tr, vs = next(validation.split(X, y))
validation = [array(X[vs]), jnp.array(y_enc[vs])]
if model_args is not None:
validation += [array(x[vs]) for x in model_args]
model_args = [x[tr] for x in model_args]
X, y_enc = X[tr], y_enc[tr]
y = y[tr]
elif validation is not None and not hasattr(validation, 'split'):
validation = [array(validation[0]), jnp.array(validation[1])] + [array(x) for x in validation[2:]]
return validation, X, y_enc, y
return validation, X, y_enc, y, model_args

def _objective(deviation):
if not classifier:
Expand All @@ -298,19 +304,25 @@ def _objective(deviation):
validation = StratifiedShuffleSplit(n_splits=1, test_size=test_size)
else:
validation = ShuffleSplit(n_splits=1, test_size=test_size)
if n_outputs is None:
args = ()
if model_args is not None:
args = (array(x[:1]) for x in model_args)
n_outputs = model(parameters,
array(X[:1]), *args).shape[-1]

if classifier:
y_enc = encode(y, n_outputs, validation)
y_enc = encode(y, validation)
else:
y_enc = y
validation, X, y_enc, y = _validation(validation, X, y_enc, y)
validation, X, y_enc, y, model_args = _validation(validation, X,
y_enc, y, model_args)
if n_outputs is None:
if y_enc.ndim == 1:
n_outputs = 1
else:
n_outputs = y_enc.shape[-1]
batches_ = create_batches(batches)
objective, deviation = _objective(deviation)
if callable(parameters):
if model_args is not None:
parameters = parameters(X, y_enc, *model_args)
else:
parameters = parameters(X, y_enc)
return optimize(parameters, batches_, objective,
n_iter_no_change=n_iter_no_change,
validation=validation, model=model,
Expand All @@ -322,7 +334,6 @@ def classifier(parameters: object,
model: Callable[[object, jnp.array], jnp.array],
X, y,
batches: Batches=None,
array: Callable[[object],object]=jnp.array,
class_weight: str='balanced',
deviation=None, n_outputs: int=None, validation=None,
discretize_val: bool= True,
Expand All @@ -338,7 +349,6 @@ def classifier(parameters: object,
:param y: Dependent variable.
:param batches: Batches used in the optimization.
:type batches: :py:class:`~IngeoML.utils.Batches`
:param array: Function to transform the independent variable.
:param class_weight: Element weights.
:param deviation: Deviation function between the actual and predicted values.
:param n_output: Number of outputs.
Expand Down Expand Up @@ -371,7 +381,7 @@ def classifier(parameters: object,
"""

return estimator(parameters, model, X, y, batches=batches,
array=array, class_weight=class_weight,
class_weight=class_weight,
deviation=deviation, n_outputs=n_outputs,
validation=validation, discretize_val=discretize_val,
every_k_schedule=every_k_schedule,
Expand Down
50 changes: 42 additions & 8 deletions IngeoML/tests/test_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,13 +101,15 @@ def modelo(params, X):
return Y

X, y = load_wine(return_X_y=True)
index = np.arange(X.shape[0])
np.random.shuffle(index)
m = LinearSVC(dual='auto').fit(X[index[:100]], y[:100])
st = StratifiedShuffleSplit(n_splits=1, train_size=10,
random_state=0)
tr, _ = next(st.split(X, y))
m = LinearSVC(dual='auto').fit(X[tr], y[tr])
parameters = dict(W=jnp.array(m.coef_.T),
W0=jnp.array(m.intercept_))
p, evol = classifier(parameters, modelo, X, y,
# learning_rate=1e-3,
n_iter_no_change=10,
return_evolution=True)
evol = np.array([x[1] for x in evol])
assert np.any(np.diff(evol) != 0)
Expand All @@ -118,6 +120,7 @@ def modelo(params, X):
parameters = dict(W=jnp.array(m.coef_.T),
W0=jnp.array(m.intercept_))
p2, evol = classifier(parameters, modelo, X, y,
n_iter_no_change=10,
return_evolution=True)
evol = np.array([x[1] for x in evol])
assert np.any(np.diff(evol) != 0)
Expand All @@ -134,19 +137,50 @@ def modelo(params, X, X2):
return Y

X, y = load_wine(return_X_y=True)
index = np.arange(X.shape[0])
np.random.shuffle(index)
m = LinearSVC(dual='auto').fit(X[index[:100]], y[:100])
st = StratifiedShuffleSplit(n_splits=1, train_size=10,
random_state=0)
tr, _ = next(st.split(X, y))
m = LinearSVC(dual='auto').fit(X[tr], y[tr])
parameters = dict(W=jnp.array(m.coef_.T),
W0=jnp.array(m.intercept_))
p, evol = classifier(parameters, modelo, X, y,
# learning_rate=1e-3,
n_iter_no_change=10,
return_evolution=True,
model_args=(X,))
evol = np.array([x[1] for x in evol])
assert np.any(np.diff(evol) != 0)


def test_classifier_callable_parameter():
"""Classifier optimize with jax"""
from sklearn.metrics import recall_score
from sklearn.datasets import load_wine

@jax.jit
def modelo(params, X, X2):
Y = X2 @ params['W'] + params['W0']
return Y

def initial_parameters(X, y, X2):
y = y.argmax(axis=1)
st = StratifiedShuffleSplit(n_splits=1, train_size=10,
random_state=0)
tr, _ = next(st.split(X2, y))
m = LinearSVC(dual='auto').fit(X2[tr], y[tr])
parameters = dict(W=jnp.array(m.coef_.T),
W0=jnp.array(m.intercept_))
return parameters

X, y = load_wine(return_X_y=True)
p, evol = classifier(initial_parameters, modelo, X, y,
return_evolution=True,
n_iter_no_change=10,
model_args=(X,))
evol = np.array([x[1] for x in evol])
assert np.any(np.diff(evol) != 0)


def test_regression():
"""Test estimator using a regression"""
@jax.jit
Expand Down Expand Up @@ -197,7 +231,7 @@ def modelo(params, X):
learning_rate=1e-1)


def test_regression():
def test_regression2():
"""Test regression"""
from scipy.stats import pearsonr
from sklearn.linear_model import LinearRegression
Expand Down Expand Up @@ -321,4 +355,4 @@ def modelo(params, X):
return_evolution=True,
n_iter_no_change=2,
deviation=soft_comp_macro_f1)
assert len(evolution) and evolution[0][1] > 0.9
assert len(evolution) and evolution[0][1] > 0.85
Loading