Skip to content

Commit

Permalink
Fix incompatible types in assignment
Browse files Browse the repository at this point in the history
Signed-off-by: Daiki Katsuragawa <50144563+daikikatsuragawa@users.noreply.github.com>
  • Loading branch information
daikikatsuragawa committed Nov 16, 2022
1 parent 8f17cfd commit 490b7ab
Show file tree
Hide file tree
Showing 8 changed files with 51 additions and 36 deletions.
2 changes: 1 addition & 1 deletion dice_ml/explainer_interfaces/dice_KD.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ def vary_valid(self, KD_query_instance, total_CFs, features_to_vary, permitted_r

# TODO: this should be a user-specified parameter
num_queries = min(len(self.dataset_with_predictions), total_CFs * 10)
cfs = []
cfs = pd.DataFrame()

if self.KD_tree is not None and num_queries > 0:
KD_tree_output = self.KD_tree.query(KD_query_instance, num_queries)
Expand Down
13 changes: 9 additions & 4 deletions dice_ml/explainer_interfaces/dice_genetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import copy
import random
import timeit
from typing import Any, List, Union

import numpy as np
import pandas as pd
Expand All @@ -27,7 +28,7 @@ def __init__(self, data_interface, model_interface):
self.num_output_nodes = None

# variables required to generate CFs - see generate_counterfactuals() for more info
self.cfs = []
self.cfs = pd.DataFrame()
self.features_to_vary = []
self.cf_init_weights = [] # total_CFs, algorithm, features_to_vary
self.loss_weights = [] # yloss_type, diversity_loss_type, feature_weights
Expand Down Expand Up @@ -343,12 +344,16 @@ def _predict_fn_custom(self, input_instance, desired_class):

def compute_yloss(self, cfs, desired_range, desired_class):
"""Computes the first part (y-loss) of the loss function."""
yloss = 0.0
yloss: Any = 0.0
if self.model.model_type == ModelTypes.Classifier:
predicted_value = np.array(self.predict_fn_scores(cfs))
if self.yloss_type == 'hinge_loss':
maxvalue = np.full((len(predicted_value)), -np.inf)
for c in range(self.num_output_nodes):
if self.num_output_nodes is None:
num_output_nodes = 0
else:
num_output_nodes = self.num_output_nodes
for c in range(num_output_nodes):
if c != desired_class:
maxvalue = np.maximum(maxvalue, predicted_value[:, c])
yloss = np.maximum(0, maxvalue - predicted_value[:, int(desired_class)])
Expand Down Expand Up @@ -429,7 +434,7 @@ def mate(self, k1, k2, features_to_vary, query_instance):
def find_counterfactuals(self, query_instance, desired_range, desired_class,
features_to_vary, maxiterations, thresh, verbose):
"""Finds counterfactuals by generating cfs through the genetic algorithm"""
population = self.cfs.copy()
population: Any = self.cfs.copy()
iterations = 0
previous_best_loss = -np.inf
current_best_loss = np.inf
Expand Down
9 changes: 6 additions & 3 deletions dice_ml/explainer_interfaces/dice_pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import copy
import random
import timeit
from typing import Any, Optional, Type, Union

import numpy as np
import torch
Expand Down Expand Up @@ -223,14 +224,16 @@ def do_optimizer_initializations(self, optimizer, learning_rate):
opt_method = optimizer.split(':')[1]

# optimizater initialization
self.optimizer: Optional[Union[torch.optim.Adam, torch.optim.RMSprop]] = None
if opt_method == "adam":
self.optimizer = torch.optim.Adam(self.cfs, lr=learning_rate)
elif opt_method == "rmsprop":
self.optimizer = torch.optim.RMSprop(self.cfs, lr=learning_rate)

def compute_yloss(self):
"""Computes the first part (y-loss) of the loss function."""
yloss = 0.0
yloss: Any = 0.0
criterion: Optional[Union[torch.nn.BCEWithLogitsLoss, torch.nn.ReLU]] = None
for i in range(self.total_CFs):
if self.yloss_type == "l2_loss":
temp_loss = torch.pow((self.get_model_output(self.cfs[i]) - self.target_cf_class), 2)[0]
Expand Down Expand Up @@ -307,7 +310,7 @@ def compute_diversity_loss(self):
def compute_regularization_loss(self):
"""Adds a linear equality constraints to the loss functions -
to ensure all levels of a categorical variable sums to one"""
regularization_loss = 0.0
regularization_loss: Any = 0.0
for i in range(self.total_CFs):
for v in self.encoded_categorical_feature_indexes:
regularization_loss += torch.pow((torch.sum(self.cfs[i][v[0]:v[-1]+1]) - 1.0), 2)
Expand Down Expand Up @@ -425,7 +428,7 @@ def find_counterfactuals(self, query_instance, desired_class, optimizer, learnin
test_pred = self.predict_fn(torch.tensor(query_instance).float())[0]
if desired_class == "opposite":
desired_class = 1.0 - np.round(test_pred)
self.target_cf_class = torch.tensor(desired_class).float()
self.target_cf_class: Any = torch.tensor(desired_class).float()

self.min_iter = min_iter
self.max_iter = max_iter
Expand Down
8 changes: 4 additions & 4 deletions dice_ml/explainer_interfaces/dice_random.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
"""
import random
import timeit
from typing import List, Optional, Union

import numpy as np
import pandas as pd
Expand All @@ -30,10 +31,9 @@ def __init__(self, data_interface, model_interface):
self.model.transformer.initialize_transform_func()

self.precisions = self.data_interface.get_decimal_precisions(output_type="dict")
if self.data_interface.outcome_name in self.precisions:
self.outcome_precision = [self.precisions[self.data_interface.outcome_name]]
else:
self.outcome_precision = 0
self.outcome_precision = [
self.precisions[self.data_interface.outcome_name]
] if self.data_interface.outcome_name in self.precisions else 0

def _generate_counterfactuals(self, query_instance, total_CFs, desired_range=None,
desired_class="opposite", permitted_range=None,
Expand Down
3 changes: 1 addition & 2 deletions dice_ml/explainer_interfaces/dice_tensorflow2.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,8 +341,7 @@ def initialize_CFs(self, query_instance, init_near_query_instance=False):
one_init.append(np.random.uniform(self.minx[0][i], self.maxx[0][i]))
else:
one_init.append(query_instance[0][i])
one_init = np.array([one_init], dtype=np.float32)
self.cfs[n].assign(one_init)
self.cfs[n].assign(np.array([one_init], dtype=np.float32))

def round_off_cfs(self, assign=False):
"""function for intermediate projection of CFs."""
Expand Down
39 changes: 23 additions & 16 deletions dice_ml/explainer_interfaces/explainer_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import pickle
from abc import ABC, abstractmethod
from collections.abc import Iterable
from typing import Any, Dict, List, Optional, Union

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -152,10 +153,9 @@ def generate_counterfactuals(self, query_instances, total_CFs,
cf_examples_arr = []
query_instances_list = []
if isinstance(query_instances, pd.DataFrame):
for ix in range(query_instances.shape[0]):
query_instances_list.append(query_instances[ix:(ix+1)])
query_instances_list = [query_instances[ix:(ix+1)] for ix in range(query_instances.shape[0])]
elif isinstance(query_instances, Iterable):
query_instances_list = query_instances
query_instances_list = [query_instance for query_instance in query_instances]
for query_instance in tqdm(query_instances_list):
self.data_interface.set_continuous_feature_indexes(query_instance)
res = self._generate_counterfactuals(
Expand Down Expand Up @@ -416,7 +416,7 @@ def feature_importance(self, query_instances, cf_examples_list=None,
posthoc_sparsity_algorithm=posthoc_sparsity_algorithm,
**kwargs).cf_examples_list
allcols = self.data_interface.categorical_feature_names + self.data_interface.continuous_feature_names
summary_importance = None
summary_importance: Optional[Union[Dict[int, float]]] = None
local_importances = None
if global_importance:
summary_importance = {}
Expand Down Expand Up @@ -532,7 +532,7 @@ def do_posthoc_sparsity_enhancement(self, final_cfs_sparse, query_instance, post
for feature in features_sorted:
# current_pred = self.predict_fn_for_sparsity(final_cfs_sparse.iat[[cf_ix]][self.data_interface.feature_names])
# feat_ix = self.data_interface.continuous_feature_names.index(feature)
diff = query_instance[feature].iat[0] - final_cfs_sparse.at[cf_ix, feature]
diff = query_instance[feature].iat[0] - int(final_cfs_sparse.at[cf_ix, feature])
if(abs(diff) <= quantiles[feature]):
if posthoc_sparsity_algorithm == "linear":
final_cfs_sparse = self.do_linear_search(diff, decimal_prec, query_instance, cf_ix,
Expand Down Expand Up @@ -561,16 +561,17 @@ def do_linear_search(self, diff, decimal_prec, query_instance, cf_ix, feature, f
while((abs(diff) > 10e-4) and (np.sign(diff*old_diff) > 0) and
self.is_cf_valid(current_pred)) and (count_steps < limit_steps_ls):

old_val = final_cfs_sparse.at[cf_ix, feature]
old_val = int(final_cfs_sparse.at[cf_ix, feature])
final_cfs_sparse.at[cf_ix, feature] += np.sign(diff)*change
current_pred = self.predict_fn_for_sparsity(final_cfs_sparse.loc[[cf_ix]][self.data_interface.feature_names])
old_diff = diff

if not self.is_cf_valid(current_pred):
final_cfs_sparse.at[cf_ix, feature] = old_val
diff = query_instance[feature].iat[0] - int(final_cfs_sparse.at[cf_ix, feature])
return final_cfs_sparse

diff = query_instance[feature].iat[0] - final_cfs_sparse.at[cf_ix, feature]
diff = query_instance[feature].iat[0] - int(final_cfs_sparse.at[cf_ix, feature])

count_steps += 1

Expand All @@ -580,7 +581,7 @@ def do_binary_search(self, diff, decimal_prec, query_instance, cf_ix, feature, f
"""Performs a binary search between continuous features of a CF and corresponding values
in query_instance until the prediction class changes."""

old_val = final_cfs_sparse.at[cf_ix, feature]
old_val = int(final_cfs_sparse.at[cf_ix, feature])
final_cfs_sparse.at[cf_ix, feature] = query_instance[feature].iat[0]
# Prediction of the query instance
current_pred = self.predict_fn_for_sparsity(final_cfs_sparse.loc[[cf_ix]][self.data_interface.feature_names])
Expand All @@ -593,7 +594,7 @@ def do_binary_search(self, diff, decimal_prec, query_instance, cf_ix, feature, f

# move the CF values towards the query_instance
if diff > 0:
left = final_cfs_sparse.at[cf_ix, feature]
left = int(final_cfs_sparse.at[cf_ix, feature])
right = query_instance[feature].iat[0]

while left <= right:
Expand All @@ -613,7 +614,7 @@ def do_binary_search(self, diff, decimal_prec, query_instance, cf_ix, feature, f

else:
left = query_instance[feature].iat[0]
right = final_cfs_sparse.at[cf_ix, feature]
right = int(final_cfs_sparse.at[cf_ix, feature])

while right >= left:
current_val = right - ((right - left)/2)
Expand Down Expand Up @@ -731,13 +732,16 @@ def is_cf_valid(self, model_score):
model_score = model_score[0]
# Converting target_cf_class to a scalar (tf/torch have it as (1,1) shape)
if self.model.model_type == ModelTypes.Classifier:
target_cf_class = self.target_cf_class
if hasattr(self.target_cf_class, "shape"):
if len(self.target_cf_class.shape) == 1:
target_cf_class = self.target_cf_class[0]
temp_target_cf_class = self.target_cf_class[0]
elif len(self.target_cf_class.shape) == 2:
target_cf_class = self.target_cf_class[0][0]
target_cf_class = int(target_cf_class)
temp_target_cf_class = self.target_cf_class[0][0]
else:
temp_target_cf_class = int(self.target_cf_class)
else:
temp_target_cf_class = int(self.target_cf_class)
target_cf_class = temp_target_cf_class

if len(model_score) == 1: # for tensorflow/pytorch models
pred_1 = model_score[0]
Expand All @@ -757,6 +761,7 @@ def is_cf_valid(self, model_score):
return self.target_cf_range[0] <= model_score and model_score <= self.target_cf_range[1]

def get_model_output_from_scores(self, model_scores):
output_type: Any = None
if self.model.model_type == ModelTypes.Classifier:
output_type = np.int32
else:
Expand Down Expand Up @@ -806,17 +811,19 @@ def build_KD_tree(self, data_df_copy, desired_range, desired_class, predicted_ou
data_df_copy[predicted_outcome_name] = predictions

# segmenting the dataset according to outcome
dataset_with_predictions = None
if self.model.model_type == ModelTypes.Classifier:
dataset_with_predictions = data_df_copy.loc[[i == desired_class for i in predictions]].copy()

elif self.model.model_type == ModelTypes.Regressor:
dataset_with_predictions = data_df_copy.loc[
[desired_range[0] <= pred <= desired_range[1] for pred in predictions]].copy()

else:
dataset_with_predictions = None

KD_tree = None
# Prepares the KD trees for DiCE
if len(dataset_with_predictions) > 0:
if dataset_with_predictions is not None and len(dataset_with_predictions) > 0:
dummies = pd.get_dummies(dataset_with_predictions[self.data_interface.feature_names])
KD_tree = KDTree(dummies)

Expand Down
8 changes: 4 additions & 4 deletions dice_ml/explainer_interfaces/feasible_base_vae.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,8 +136,9 @@ def train(self, pre_trained=False):
train_loss = 0.0
train_size = 0

train_dataset = torch.tensor(self.vae_train_feat).float()
train_dataset = torch.utils.data.DataLoader(train_dataset, batch_size=self.batch_size, shuffle=True)
train_dataset = torch.utils.data.DataLoader(
torch.tensor(self.vae_train_feat).float(), # type: ignore
batch_size=self.batch_size, shuffle=True)
for train in enumerate(train_dataset):
self.cf_vae_optimizer.zero_grad()

Expand Down Expand Up @@ -178,8 +179,7 @@ def generate_counterfactuals(self, query_instance, total_CFs, desired_class="opp
final_cf_pred = []
final_test_pred = []
for i in range(len(query_instance)):
train_x = test_dataset[i]
train_x = torch.tensor(train_x).float()
train_x = torch.tensor(test_dataset[i]).float()
train_y = torch.argmax(self.pred_model(train_x), dim=1)

curr_gen_cf = []
Expand Down
5 changes: 3 additions & 2 deletions dice_ml/explainer_interfaces/feasible_model_approx.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,9 @@ def train(self, constraint_type, constraint_variables, constraint_direction, con
train_loss = 0.0
train_size = 0

train_dataset = torch.tensor(self.vae_train_feat).float()
train_dataset = torch.utils.data.DataLoader(train_dataset, batch_size=self.batch_size, shuffle=True)
train_dataset = torch.utils.data.DataLoader(
torch.tensor(self.vae_train_feat).float(), # type: ignore
batch_size=self.batch_size, shuffle=True)
for train in enumerate(train_dataset):
self.cf_vae_optimizer.zero_grad()

Expand Down

0 comments on commit 490b7ab

Please sign in to comment.