Skip to content

Commit

Permalink
#15 Add the option to parametrize the desired features
Browse files Browse the repository at this point in the history
  • Loading branch information
poupou-web3 committed Aug 2, 2023
1 parent 7e40e04 commit 4f0af92
Showing 1 changed file with 72 additions and 31 deletions.
103 changes: 72 additions & 31 deletions sbscorer/sblegos/TransactionAnalyser.py
Original file line number Diff line number Diff line change
Expand Up @@ -633,36 +633,77 @@ def get_df_seeder_count(self):
ascending=False).reset_index().drop(
columns=['to_address']).rename(columns={'from_address': 'seeder', 'EOA': 'count_seed'})

def get_df_features(self):
df_features = self.gb_EOA_sorted['tx_hash'].count().reset_index().rename(columns={'tx_hash': 'count_tx'})
df_features['less_10_tx'] = df_features['count_tx'] <= 10
df_features['count_same_seed'] = df_features['EOA'].apply(lambda x: self.count_same_seed(x))
df_features['count_same_seed_naive'] = df_features['EOA'].apply(lambda x: self.count_same_seed_naive(x))
df_features['same_seed'] = df_features['count_same_seed'] > 0
df_features['same_seed_naive'] = df_features['count_same_seed_naive'] > 0
df_features['seed_suspicious'] = df_features.loc[:, 'same_seed'].ne(df_features.loc[:, 'same_seed_naive'])
df_features['count_interact_other_ctbt'] = df_features['EOA'].apply(
lambda x: self.count_interaction_with_other_contributor(x))

details_first_incoming_transaction = self.details_first_incoming_transaction
details_first_outgoing_transaction = self.details_first_outgoing_transaction

df_features['lcs'] = 0
df_features['cluster_size_lcs'] = 0
df_features['mean_score_lcs'] = 0
df_features['max_score_lcs'] = 0
df_bool_less_10_tx = df_features['less_10_tx']

if df_bool_less_10_tx.sum() > 0:
r = df_features.loc[df_bool_less_10_tx, 'EOA'].apply(
lambda x: self.transaction_similitude_pylcs(x, minimum_sim_tx=3))
df_features.loc[df_bool_less_10_tx, 'cluster_size_lcs'] = r.apply(lambda x: len(x))
df_features.loc[df_bool_less_10_tx, 'mean_score_lcs'] = r.apply(lambda x: self.get_mean_score_lcs(x))
df_features.loc[df_bool_less_10_tx, 'max_score_lcs'] = r.apply(lambda x: self.get_max_score_lcs(x))

df_features['has_lcs'] = df_features['cluster_size_lcs'] > 0

merge = df_features.merge(details_first_incoming_transaction, on='EOA', how='left')
merge = merge.merge(details_first_outgoing_transaction, on='EOA', how='left')
def get_df_features(self, list_features=None):
"""
Get the features of the transaction dataset
Parameters
----------
list_features : list
The list of features to retrieve, if None, the default features are retrieved : ['count_tx', 'less_10_tx',
'count_same_seed', 'count_same_seed_naive','same_seed', 'same_seed_naive',
'seed_suspicious', 'count_interact_other_ctbt','details_first_incoming_transaction',
'details_first_outgoing_transaction']
if 'all' is passed, the lcs feature is added
Returns : pd.DataFrame
The data frame with the features
index : EOA all unique addresses in the df_transactions
-------
"""

default_features = ['count_tx', 'less_10_tx', 'count_same_seed', 'count_same_seed_naive',
'same_seed', 'same_seed_naive', 'seed_suspicious', 'count_interact_other_ctbt',
'details_first_incoming_transaction', 'details_first_outgoing_transaction']
if list_features is None:
list_features = default_features
elif list_features == 'all':
list_features = default_features + ['lcs']

if 'count_tx' in list_features:
df_features = self.gb_EOA_sorted['tx_hash'].count().reset_index().rename(columns={'tx_hash': 'count_tx'})
else:
df_features = pd.DataFrame(self.df_transactions['EOA'].unique(), columns=['EOA'])

if 'less_10_tx' in list_features:
df_features['less_10_tx'] = df_features['count_tx'] <= 10
if 'count_same_seed' in list_features:
df_features['count_same_seed'] = df_features['EOA'].apply(lambda x: self.count_same_seed(x))
if 'count_same_seed_naive' in list_features:
df_features['count_same_seed_naive'] = df_features['EOA'].apply(lambda x: self.count_same_seed_naive(x))
if 'same_seed' in list_features:
df_features['same_seed'] = df_features['count_same_seed'] > 0
if 'same_seed_naive' in list_features:
df_features['same_seed_naive'] = df_features['count_same_seed_naive'] > 0
if 'seed_suspicious' in list_features:
df_features['seed_suspicious'] = df_features.loc[:, 'same_seed'].ne(df_features.loc[:, 'same_seed_naive'])
if 'count_interact_other_ctbt' in list_features:
df_features['count_interact_other_ctbt'] = df_features['EOA'].apply(
lambda x: self.count_interaction_with_other_contributor(x))

if 'lcs' in list_features:

df_features['lcs'] = 0
df_features['cluster_size_lcs'] = 0
df_features['mean_score_lcs'] = 0
df_features['max_score_lcs'] = 0
df_bool_less_10_tx = df_features['less_10_tx']

if df_bool_less_10_tx.sum() > 0:
r = df_features.loc[df_bool_less_10_tx, 'EOA'].apply(
lambda x: self.transaction_similitude_pylcs(x, minimum_sim_tx=3))
df_features.loc[df_bool_less_10_tx, 'cluster_size_lcs'] = r.apply(lambda x: len(x))
df_features.loc[df_bool_less_10_tx, 'mean_score_lcs'] = r.apply(lambda x: self.get_mean_score_lcs(x))
df_features.loc[df_bool_less_10_tx, 'max_score_lcs'] = r.apply(lambda x: self.get_max_score_lcs(x))

df_features['has_lcs'] = df_features['cluster_size_lcs'] > 0

if 'details_first_incoming_transaction' in list_features:
details_first_incoming_transaction = self.details_first_incoming_transaction
merge = df_features.merge(details_first_incoming_transaction, on='EOA', how='left')

if 'details_first_outgoing_transaction' in list_features:
details_first_outgoing_transaction = self.details_first_outgoing_transaction
merge = merge.merge(details_first_outgoing_transaction, on='EOA', how='left')

return merge

0 comments on commit 4f0af92

Please sign in to comment.