-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathControlMetrics.py
225 lines (181 loc) · 8.61 KB
/
ControlMetrics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
import pandas as pd
import numpy as np
from scipy.linalg import svdvals, schur
from scipy import stats
import statsmodels.api as sm
import seaborn as sns
import matplotlib.pyplot as plt
from statannotations.Annotator import Annotator
def Normalization(A):
"""
Normalize the adjacency matrix A.
Parameters:
A (numpy.ndarray): Adjacency matrix to be normalized.
Returns:
numpy.ndarray: Normalized adjacency matrix.
"""
return A / (1 + max(svdvals(A)))
def AverageControl(A):
"""
Calculate Average Controllability for each node in a network.
Parameters:
A (numpy.ndarray): Normalized adjacency matrix of the network.
Returns:
numpy.ndarray: A vector of average controllability values for each node.
"""
A = Normalization(A)
T, U = schur(A, output='real') # Schur decomposition for stability
midMat = np.square(U.T)
v = np.diag(T)
P = np.tile(1 - np.square(v), (A.shape[0], 1)).T
values = np.sum(midMat / P, axis=0).T
return values
def ModalControl(A):
"""
Calculate Modal Controllability for each node in a network.
Parameters:
A (numpy.ndarray): Normalized adjacency matrix of the network.
Returns:
numpy.ndarray: A vector of modal controllability values for each node.
"""
A = Normalization(A)
T, U = schur(A, output='real') # Schur decomposition for stability
eigVals = np.diag(T)
N = A.shape[0]
phi = np.zeros(N)
for i in range(N):
phi[i] = np.sum(U[i, :]**2 * (1 - eigVals**2))
return phi
def control_metrics(df, col_name):
"""
Calculate control metrics for a DataFrame containing adjacency matrices.
Parameters:
df (pandas.DataFrame): DataFrame containing the data.
col_name (str): Column name in df containing adjacency matrices.
Returns:
pandas.DataFrame: DataFrame with additional columns for control metrics.
"""
# Normalization
df['A_Norm'] = df[col_name].apply(Normalization)
# Calculating control metrics
df['Average'] = df[col_name].apply(PyC_AverageControl)
df['Modal'] = df[col_name].apply(PyC_ModalControl)
df['TimeConstant'] = df['A_Norm'].apply(lambda x: np.diag(x))
return df
# Example usage:
# df = pd.read_csv('your_data.csv') # Load your data
# df = PyC_control_metrics(df_AB, 'A_matrice')
# print(df.head())
def melted(df, col_name, index, group_column = 'Group', entity_ids = 'user_id' ):
"""
Transforms a DataFrame by melting it based on a specified column.
Parameters:
df (pandas.DataFrame): The DataFrame to be transformed.
col_name (str): The name of the column to be melted.
index (list): The new column names for the melted DataFrame.
group_column (str, optional): The name of the column representing groups. Defaults to 'Group'.
entity_ids (str, optional): The name of the column representing entity IDs. Defaults to 'user_id'.
Returns:
pandas.DataFrame: A melted DataFrame.
"""
# Create a new DataFrame based on the specified column
df_control = pd.DataFrame(df[col_name].tolist(), columns=index)
# Concatenate with the 'group_column' and 'entity_ids' columns from the original DataFrame
df_control = pd.concat([df[[ group_column, entity_ids]], df_control], axis=1)
# Melt the DataFrame
df_melted = pd.melt(df_control, id_vars=[group_column], value_vars=index, var_name='Index')
return df_melted
def stat_tests(df_melted, value='value', group_column='Group'):
"""
Perform statistical tests on the provided DataFrame.
Parameters:
df_melted (pandas.DataFrame): DataFrame containing the data for testing.
value (str, optional): Column name of the values to be tested. Defaults to 'value'.
group_column (str, optional): Column name containing group names. Defaults to 'Group'.
Returns:
pandas.DataFrame: DataFrame containing the results of the statistical tests.
"""
index = df_melted['Index'].unique()
groups = df_melted[group_column].unique()
if len(groups) != 2:
raise ValueError("The DataFrame must contain exactly two unique groups for comparison.")
group1, group2 = groups
normality_results = pd.DataFrame(index=index, columns=['g1_Normal', 'g2_Normal'])
test_results = pd.DataFrame(index=index, columns=['Test_Stat', 'P_Value', 'Test_Type'])
for idx in index:
group_data = df_melted[df_melted['Index'] == idx]
group_1 = group_data[group_data[group_column] == group1][value]
group_2 = group_data[group_data[group_column] == group2][value]
# Shapiro-Wilk test for normality
normality_results.loc[idx, 'g1_Normal'] = stats.shapiro(group_1).pvalue > 0.05
normality_results.loc[idx, 'g2_Normal'] = stats.shapiro(group_2).pvalue > 0.05
# Choose statistical test based on normality results
if normality_results.loc[idx, 'g1_Normal'] and normality_results.loc[idx, 'g2_Normal']:
# T-test for normally distributed groups
stat, p = stats.ttest_ind(group_1, group_2, nan_policy='omit')
test_results.loc[idx] = [stat, p, 't-test']
else:
# Mann-Whitney U test for non-normally distributed groups
stat, p = stats.mannwhitneyu(group_1, group_2, alternative='two-sided')
test_results.loc[idx] = [stat, p, 'Mann-Whitney']
# Bonferroni correction for multiple testing
test_results['Adjusted_P_Value'] = sm.stats.multipletests(test_results['P_Value'], method='bonferroni')[1]
test_results['Is_Significant'] = test_results['Adjusted_P_Value'] < 0.05
# Merge normality and test results
stat_results = normality_results.merge(test_results, left_index=True, right_index=True)
return stat_results
def comparison_plot(df, group_column, result_stat_test, y_label='Y Axis', name_fig='file.jpg'):
"""
Creates a group comparison plot based on statistical test results.
Parameters:
df (pandas.DataFrame): DataFrame containing the data for plotting.
group_column (str): Column name in df that contains group names.
result_stat_test (pandas.DataFrame): DataFrame containing the results of statistical tests.
y_label (str, optional): Label for the Y-axis. Defaults to 'Y Axis'.
name_fig (str, optional): Filename for saving the figure. Defaults to 'file.jpg'.
Returns:
None: The function generates and saves a plot.
"""
# Determine the unique groups
unique_groups = df[group_column].unique()
if len(unique_groups) != 2:
raise ValueError("The DataFrame must contain exactly two unique groups for comparison.")
# Calculate median values for plotting
median_values_df = df.groupby([group_column, 'Index'])['value'].median().reset_index()
sns.set_style("white")
fig, ax = plt.subplots(figsize=(20, 8), dpi=100)
sns.barplot(x='Index', y='value', hue=group_column, data=median_values_df,
hue_order=unique_groups, palette={unique_groups[0]: 'gray', unique_groups[1]: 'blue'}, ax=ax)
# Annotate significant results
for i, index in enumerate(df['Index'].unique()):
if result_stat_test.loc[index, 'Is_Significant']:
ax.get_xticklabels()[i].set_weight('bold')
# Annotation configuration
pairs = [((index, unique_groups[0]), (index, unique_groups[1])) for index in df['Index'].unique()]
annotator = Annotator(ax, pairs, data=df, x='Index', y='value', hue=group_column)
num_comparisons = len(df['Index'].unique())
pvalue_thresholds = [(1.0e-03 / num_comparisons, '***'), (1.00e-02 / num_comparisons, '**'),
(5.00e-02 / num_comparisons, '*'), (1, 'ns')]
annotator.configure(test='Mann-Whitney', text_format='star', loc='inside', pvalue_thresholds=pvalue_thresholds)
annotator.apply_test().annotate()
# Customize plot appearance
ax.set_xlabel('')
ax.set_ylabel(y_label, fontweight='bold', fontsize=28, labelpad=30)
leg = ax.legend(fontsize='22', loc='lower left', frameon=False)
for text in leg.get_texts():
text.set_fontweight('bold')
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha="right", fontsize=24)
ax.tick_params(axis='both', which='major', labelsize=26, labelcolor='black', width=2.5)
for label in ax.get_yticklabels():
label.set_fontweight('bold')
sns.despine(top=True, right=True)
for spine in ['bottom', 'left']:
ax.spines[spine].set_linewidth(2.5)
ax.spines[spine].set_color('black')
plt.tight_layout()
plt.show()
fig.savefig(name_fig, bbox_inches='tight', dpi=300)
# Example usage:
# df_melted = PyC_melted(df, 'Average', 'Index')
# result_stat_test = PyC_stat_tests(df_melted)
# PyC_comparison_plot(df_melted, result_stat_test, 'Y Axis Label', 'output_figure.jpg')