Skip to content

Commit

Permalink
Auto Linting Fixes via GOOSE
Browse files Browse the repository at this point in the history
  • Loading branch information
HeardACat authored Feb 16, 2025
1 parent 9964699 commit e134f9e
Show file tree
Hide file tree
Showing 21 changed files with 209 additions and 180 deletions.
4 changes: 2 additions & 2 deletions skfeature/function/information_theoretical_based/CMIM.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import numpy as np

from skfeature.utility.entropy_estimators import *
from skfeature.utility.entropy_estimators import cmidd, midd
from skfeature.utility.util import reverse_argsort


Expand Down Expand Up @@ -98,4 +98,4 @@ def cmim(X, y, mode="rank", **kwargs):
if mode == "index":
return np.array(F)
else:
return reverse_argsort(F)
return reverse_argsort(F)
85 changes: 52 additions & 33 deletions skfeature/function/information_theoretical_based/DISR.py
Original file line number Diff line number Diff line change
@@ -1,31 +1,30 @@
import numpy as np

from skfeature.utility.entropy_estimators import *
from skfeature.utility.entropy_estimators import cmidd, entropyd, midd
from skfeature.utility.mutual_information import conditional_entropy
from skfeature.utility.util import reverse_argsort


def disr(X, y, mode="rank", **kwargs):
"""
This function implement the DISR feature selection.
The scoring criteria is calculated based on the formula j_disr=sum_j(I(f,fj;y)/H(f,fj,y))
This function implements the DISR feature selection.
The scoring criteria is calculated based on the formula j_cmi=sum_j(I(f;fi|y)+I(f;y|fi))/sum_j(1+I(f;fi))
Input
-----
X: {numpy array}, shape (n_samples, n_features)
input data, guaranteed to be a discrete data matrix
y: {numpy array}, shape (n_samples,)
input class labels
kwargs: {dictionary}
n_selected_features: {int}
number of features to select
Output
------
F: {numpy array}, shape (n_features, )
F: {numpy array}, shape (n_features,)
index of selected features, F[0] is the most important feature
J_DISR: {numpy array}, shape: (n_features,)
J_CMI: {numpy array}, shape: (n_features,)
corresponding objective function value of selected features
MIfy: {numpy array}, shape: (n_features,)
corresponding mutual information between selected features and response
Expand All @@ -39,7 +38,7 @@ def disr(X, y, mode="rank", **kwargs):
# index of selected features, initialized to be empty
F = []
# Objective function value for selected features
J_DISR = []
J_CMI = []
# Mutual information between feature and response
MIfy = []
# indicate whether the user specifies the number of features
Expand All @@ -49,11 +48,13 @@ def disr(X, y, mode="rank", **kwargs):
n_selected_features = kwargs["n_selected_features"]
is_n_selected_features_specified = True

# sum stores sum_j(I(f,fj;y)/H(f,fj,y)) for each feature f
# sum stores sum_j(I(f;fi|y)+I(f;y|fi))
sum = np.zeros(n_features)
# sum_2 stores sum_j(1+I(f;fi))
sum_2 = np.zeros(n_features)

# make sure that j_cmi is positive at the very beginning
j_disr = 1
# make sure j_cmi is positive at the very beginning
j_cmi = 1

while True:
if len(F) == 0:
Expand All @@ -65,39 +66,57 @@ def disr(X, y, mode="rank", **kwargs):
# select the feature whose mutual information is the largest
idx = np.argmax(t1)
F.append(idx)
J_DISR.append(t1[idx])
J_CMI.append(t1[idx])
MIfy.append(t1[idx])
f_select = X[:, idx]

if is_n_selected_features_specified is True:
if is_n_selected_features_specified:
if len(F) == n_selected_features:
break
if is_n_selected_features_specified is not True:
if j_disr <= 0:
else:
if j_cmi <= 0:
break

# we assign an extreme small value to j_disr to ensure that it is smaller than all possible value of j_disr
j_disr = -1e30
for i in range(n_features):
if i not in F:
f = X[:, i]
t2 = midd(f_select, y) + cmidd(f, y, f_select)
t3 = (
entropyd(f)
+ conditional_entropy(f_select, f)
+ (conditional_entropy(y, f_select) - cmidd(y, f, f_select))
)
sum[i] += np.true_divide(t2, t3)
# record the largest j_disr and the corresponding feature index
if sum[i] > j_disr:
j_disr = sum[i]
idx = i
# we assign an extreme small value to j_cmi to ensure it is smaller than all possible value of j_cmi
j_cmi = -1000000000000
if len(F) == 1:
for i in range(n_features):
if i not in F:
f = X[:, i]
t2 = midd(f_select, y) + cmidd(f, y, f_select)
t3 = (
entropyd(f)
+ conditional_entropy(f_select, f)
+ (conditional_entropy(y, f_select) - cmidd(y, f, f_select))
)
sum[i] += np.true_divide(t2, t3)
sum_2[i] += 1 + midd(f_select, f)
# record the largest j_cmi and the corresponding feature index
if sum[i] / sum_2[i] > j_cmi:
j_cmi = sum[i] / sum_2[i]
idx = i
else:
for i in range(n_features):
if i not in F:
f = X[:, i]
t2 = midd(f_select, y) + cmidd(f, y, f_select)
t3 = (
entropyd(f)
+ conditional_entropy(f_select, f)
+ (conditional_entropy(y, f_select) - cmidd(y, f, f_select))
)
sum[i] += np.true_divide(t2, t3)
sum_2[i] += 1 + midd(f_select, f)
# record the largest j_cmi and the corresponding feature index
if sum[i] / sum_2[i] > j_cmi:
j_cmi = sum[i] / sum_2[i]
idx = i
F.append(idx)
J_DISR.append(j_disr)
J_CMI.append(j_cmi)
MIfy.append(t1[idx])
f_select = X[:, idx]

if mode == "index":
return F
return np.array(F)
else:
return reverse_argsort(F, X.shape[1])
return reverse_argsort(F)
28 changes: 18 additions & 10 deletions skfeature/function/information_theoretical_based/ICAP.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
import numpy as np

from skfeature.utility.entropy_estimators import *
from skfeature.utility.entropy_estimators import cmidd, midd
from skfeature.utility.util import reverse_argsort


def icap(X, y, mode="rank", **kwargs):
"""
This function implements the ICAP feature selection.
The scoring criteria is calculated based on the formula j_icap = I(f;y) - max_j(0,(I(fj;f)-I(fj;f|y)))
The scoring criteria is calculated based on the formula j_icap=I(f;y)-max_j(I(fj;f)-I(fj;f|y))
Input
-----
Expand All @@ -27,7 +27,13 @@ def icap(X, y, mode="rank", **kwargs):
corresponding objective function value of selected features
MIfy: {numpy array}, shape: (n_features,)
corresponding mutual information between selected features and response
Reference
---------
For more details, please refer to the following paper: "Feature Selection Based on Mutual Information: Criteria of Max-Dependency,
Max-Relevance, and Min-Redundancy" IEEE TPAMI 2005
"""

n_samples, n_features = X.shape
# index of selected features, initialized to be empty
F = []
Expand All @@ -37,14 +43,17 @@ def icap(X, y, mode="rank", **kwargs):
MIfy = []
# indicate whether the user specifies the number of features
is_n_selected_features_specified = False

if "n_selected_features" in list(kwargs.keys()):
n_selected_features = kwargs["n_selected_features"]
is_n_selected_features_specified = True

# t1 contains I(f;y) for each feature f
# t1 stores I(f;y) for each feature f
t1 = np.zeros(n_features)
# max contains max_j(0,(I(fj;f)-I(fj;f|y))) for each feature f
max = np.zeros(n_features)

# max stores max(I(fj;f)-I(fj;f|y)) for each feature f
# we assign an extreme small value to max[i] to make it smaller than possible value of max(I(fj;f)-I(fj;f|y))
max = -10000000 * np.ones(n_features)
for i in range(n_features):
f = X[:, i]
t1[i] = midd(f, y)
Expand All @@ -61,10 +70,10 @@ def icap(X, y, mode="rank", **kwargs):
MIfy.append(t1[idx])
f_select = X[:, idx]

if is_n_selected_features_specified is True:
if is_n_selected_features_specified:
if len(F) == n_selected_features:
break
if is_n_selected_features_specified is not True:
else:
if j_icap <= 0:
break

Expand All @@ -89,7 +98,6 @@ def icap(X, y, mode="rank", **kwargs):
f_select = X[:, idx]

if mode == "index":
return np.array(F, dtype=int)
return np.array(F)
else:
# make sure that F is the same size??
return reverse_argsort(F, size=X.shape[1])
return reverse_argsort(F)
74 changes: 37 additions & 37 deletions skfeature/function/information_theoretical_based/LCSI.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
import numpy as np

from skfeature.utility.entropy_estimators import *
from skfeature.utility.entropy_estimators import cmidd, midd
from skfeature.utility.util import reverse_argsort


def lcsi(X, y, **kwargs):
def lcsi(X, y, mode="rank", **kwargs):
"""
This function implements the basic scoring criteria for linear combination of shannon information term.
The scoring criteria is calculated based on the formula j_cmi=I(f;y)-beta*sum_j(I(fj;f))+gamma*sum(I(fj;f|y))
This function implements the LCSI feature selection.
The scoring criteria is calculated based on the formula j_lcsi=I(f;y)-beta*sum_j(I(fj;f))+gamma*sum(I(fj;f|y))
Input
-----
Expand All @@ -15,21 +16,18 @@ def lcsi(X, y, **kwargs):
y: {numpy array}, shape (n_samples,)
input class labels
kwargs: {dictionary}
Parameters for different feature selection algorithms.
beta: {float}
beta is the parameter in j_cmi=I(f;y)-beta*sum(I(fj;f))+gamma*sum(I(fj;f|y))
gamma: {float}
gamma is the parameter in j_cmi=I(f;y)-beta*sum(I(fj;f))+gamma*sum(I(fj;f|y))
function_name: {string}
name of the feature selection function
n_selected_features: {int}
number of features to select
beta: {float}
beta is the parameter in lcsi
gamma: {float}
gamma is the parameter in lcsi
Output
------
F: {numpy array}, shape: (n_features,)
F: {numpy array}, shape (n_features,)
index of selected features, F[0] is the most important feature
J_CMI: {numpy array}, shape: (n_features,)
J_LCSI: {numpy array}, shape: (n_features,)
corresponding objective function value of selected features
MIfy: {numpy array}, shape: (n_features,)
corresponding mutual information between selected features and response
Expand All @@ -39,76 +37,78 @@ def lcsi(X, y, **kwargs):
Brown, Gavin et al. "Conditional Likelihood Maximisation: A Unifying Framework for Information Theoretic Feature Selection." JMLR 2012.
"""

if "beta" not in list(kwargs.keys()):
beta = 0.8
else:
beta = kwargs["beta"]
if "gamma" not in list(kwargs.keys()):
gamma = 0.5
else:
gamma = kwargs["gamma"]

n_samples, n_features = X.shape
# index of selected features, initialized to be empty
F = []
# Objective function value for selected features
J_CMI = []
J_LCSI = []
# Mutual information between feature and response
MIfy = []
# indicate whether the user specifies the number of features
is_n_selected_features_specified = False
# initialize the parameters
if "beta" in list(kwargs.keys()):
beta = kwargs["beta"]
if "gamma" in list(kwargs.keys()):
gamma = kwargs["gamma"]

if "n_selected_features" in list(kwargs.keys()):
n_selected_features = kwargs["n_selected_features"]
is_n_selected_features_specified = True

# select the feature whose j_cmi is the largest
# t1 stores I(f;y) for each feature f
t1 = np.zeros(n_features)
# t2 stores sum_j(I(fj;f)) for each feature f
t2 = np.zeros(n_features)
# t3 stores sum_j(I(fj;f|y)) for each feature f
t3 = np.zeros(n_features)

for i in range(n_features):
f = X[:, i]
t1[i] = midd(f, y)

# make sure that j_cmi is positive at the very beginning
j_cmi = 1
j_lcsi = 1

while True:
if len(F) == 0:
# select the feature whose mutual information is the largest
idx = np.argmax(t1)
F.append(idx)
J_CMI.append(t1[idx])
J_LCSI.append(t1[idx])
MIfy.append(t1[idx])
f_select = X[:, idx]

if is_n_selected_features_specified:
if len(F) == n_selected_features:
break
else:
if j_cmi < 0:
if j_lcsi <= 0:
break

# we assign an extreme small value to j_cmi to ensure it is smaller than all possible values of j_cmi
j_cmi = -1e30
if "function_name" in kwargs.keys():
if kwargs["function_name"] == "MRMR":
beta = 1.0 / len(F)
elif kwargs["function_name"] == "JMI":
beta = 1.0 / len(F)
gamma = 1.0 / len(F)
# we assign an extreme small value to j_lcsi to ensure it is smaller than all possible values of j_lcsi
j_lcsi = -1000000000000
for i in range(n_features):
if i not in F:
f = X[:, i]
t2[i] += midd(f_select, f)
t3[i] += cmidd(f_select, f, y)
# calculate j_cmi for feature i (not in F)
# calculate j_lcsi for feature i (not in F)
t = t1[i] - beta * t2[i] + gamma * t3[i]
# record the largest j_cmi and the corresponding feature index
if t > j_cmi:
j_cmi = t
# record the largest j_lcsi and the corresponding feature index
if t > j_lcsi:
j_lcsi = t
idx = i
F.append(idx)
J_CMI.append(j_cmi)
J_LCSI.append(j_lcsi)
MIfy.append(t1[idx])
f_select = X[:, idx]

return np.array(F), np.array(J_CMI), np.array(MIfy)
if mode == "index":
return np.array(F)
else:
return reverse_argsort(F)
Loading

0 comments on commit e134f9e

Please sign in to comment.