Skip to content

Commit

Permalink
fix LP test and model
Browse files Browse the repository at this point in the history
  • Loading branch information
anpolol committed Dec 18, 2023
1 parent 8bcdd70 commit 495768c
Show file tree
Hide file tree
Showing 3 changed files with 89 additions and 41 deletions.
59 changes: 31 additions & 28 deletions stable_gnn/model_link_predict.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,26 +2,24 @@
from typing import List

import torch
import torch_geometric.transforms as T
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import f1_score
from sklearn.base import BaseEstimator
from torch_geometric.datasets import Planetoid
from sklearn.metrics import f1_score
from sklearn.neural_network import MLPClassifier

from stable_gnn.embedding import EmbeddingFactory
from stable_gnn.embedding.sampling.samplers import NegativeSampler
from stable_gnn.graph import Graph


class ModelLinkPrediction():
class ModelLinkPrediction:
"""
Model for Link Prediction task with unsupervised embeddings
:param dataset: (Graph): Input Graph
:param number_of_trials (int): Number of trials for optuna tuning embeddings
:param device: (device): Device 'cuda' or 'cpu'
:param emb_conv_name: (str): Name of convolution for embedding learning
:param loss_name: (str): Name of loss function for embedding learning
:param loss_name: (str): Name of loss function for embedding learning
"""

def __init__(
Expand All @@ -33,21 +31,18 @@ def __init__(
) -> None:
super().__init__()


self.number_of_trials=number_of_trials
self.loss_name=loss_name
self.emb_conv_name=emb_conv_name
self.number_of_trials = number_of_trials
self.loss_name = loss_name
self.emb_conv_name = emb_conv_name
self.device = device



def train_test_edges(self, dataset: Graph) -> (List[List[int]], List[List[int]],List[List[int]],List[List[int]]):
'''
def train_test_edges(self, dataset: Graph) -> (List[List[int]], List[List[int]], List[List[int]], List[List[int]]):
"""
Split dataset to train and test and calculate negative samples
:param dataset: (Graph): Data to split on train, test and negatives
:return: (Tuple): Tuple of four lists of train edges, negativу train samples, test and negative test samples edges
'''
"""
self.data = dataset[0]
self.data.edge_index = self.data.edge_index.type(torch.LongTensor)

Expand All @@ -61,7 +56,7 @@ def train_test_edges(self, dataset: Graph) -> (List[List[int]], List[List[int]],
else:
test_edges.append(edge)

neg_samples_train =self._neg_samples(train_edges, self.data)
neg_samples_train = self._neg_samples(train_edges, self.data)
neg_samples_test = self._neg_samples(test_edges, self.data)
self.data.edge_index = torch.LongTensor(train_edges).T
return train_edges, neg_samples_train, test_edges, neg_samples_test
Expand All @@ -76,20 +71,29 @@ def _neg_samples(self, positive_edges: List[int], data: Graph) -> List[int]:
return neg_edges

def train_cl(self, train_edges: List[List[int]], neg_samples_train: List[List[int]]) -> BaseEstimator:
'''
"""
Train classifier for link prediction
:param train_edges: (List): List of existing edges
:param neg_samples_train: (List): List of negative samples to train
:return: (BaseEstimator): Classifier which support fit predict notation
'''
self.embeddings = EmbeddingFactory().build_embeddings(
loss_name=self.loss_name, conv=self.emb_conv_name, data=[self.data], device=self.device, number_of_trials=self.number_of_trials,
tune_out=True
)
"""
if self.number_of_trials:
self.embeddings = EmbeddingFactory().build_embeddings(
loss_name=self.loss_name,
conv=self.emb_conv_name,
data=[self.data],
device=self.device,
number_of_trials=self.number_of_trials,
tune_out=True,
)
else:
self.embeddings = self.data.x

emb_norm = torch.nn.functional.normalize(torch.tensor(self.embeddings))
self.clf = GradientBoostingClassifier(n_estimators=100, learning_rate=0.2, max_depth=5, random_state=0)
self.clf = MLPClassifier(
max_iter=100
) # GradientBoostingClassifier(n_estimators=100, learning_rate=0.2, max_depth=5, random_state=0)
x_pred = []
for edge in train_edges:
x_pred.append(torch.concat((emb_norm[edge[0]], emb_norm[edge[1]])).tolist())
Expand All @@ -100,15 +104,15 @@ def train_cl(self, train_edges: List[List[int]], neg_samples_train: List[List[in
self.clf.fit(x_pred, true_train)
return self.clf

def test(self, clf: BaseEstimator, test_edges: List[List[int]], neg_samples_test: List[List[int]] ) -> float:
'''
def test(self, clf: BaseEstimator, test_edges: List[List[int]], neg_samples_test: List[List[int]]) -> float:
"""
Calculate f1 measure for test edges
:param: cl (BaseEstimator)
:param test_edges: (List): List of existing edges to test on
:param neg_samples_test: (List): List of negative samples to test on
:return: (float): Value of f1 measure
'''
"""
emb_norm = torch.nn.functional.normalize(torch.tensor(self.embeddings))
pred_test = []
for edge in test_edges:
Expand All @@ -119,4 +123,3 @@ def test(self, clf: BaseEstimator, test_edges: List[List[int]], neg_samples_test
y_pred = clf.predict(pred_test)
y_true = [1] * len(test_edges) + [0] * len(neg_samples_test)
return f1_score(y_true, y_pred)

35 changes: 22 additions & 13 deletions tests/test_general/test_link_prediction.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,29 @@
import pytest
import torch_geometric.transforms as T
from stable_gnn.model_link_predict import ModelLinkPrediction
from torch_geometric.datasets import Planetoid
from sklearn.ensemble import GradientBoostingClassifier
import pytest
from sklearn.neural_network import MLPClassifier
from torch_geometric.datasets import Planetoid

from stable_gnn.model_link_predict import ModelLinkPrediction


@pytest.mark.parametrize("conv", ["SAGE", "GAT", "GCN"])
@pytest.mark.parametrize("loss_name", ["APP", "LINE", "HOPE_AA", "VERSE_Adj"])
def test_linkpredict(loss_name: str, conv: str) -> None:
root = '../tmp/'
name = 'Cora'
# @pytest.mark.parametrize("conv", ["SAGE", "GAT", "GCN"])
# @pytest.mark.parametrize("loss_name", ["APP", "LINE", "HOPE_AA", "VERSE_Adj"])
def test_linkpredict(): # loss_name: str, conv: str) -> None:
root = "../tmp/"
name = "Cora"
dataset = Planetoid(root=root + str(name), name=name, transform=T.NormalizeFeatures())

model = ModelLinkPrediction(number_of_trials=50, loss_name=loss_name, emb_conv_name=conv)
model_before = ModelLinkPrediction(number_of_trials=0) # , loss_name=loss_name, emb_conv_name=conv)
model_after = ModelLinkPrediction(number_of_trials=10) # , loss_name=loss_name, emb_conv_name=conv)

train_edges, train_negative, test_edges, test_negative = model.train_test_edges(dataset)
train_edges_b, train_negative_b, test_edges_b, test_negative_b = model_before.train_test_edges(dataset)
train_edges, train_negative, test_edges, test_negative = model_after.train_test_edges(dataset)

cl_before = GradientBoostingClassifier(n_estimators=100, learning_rate=0.2, max_depth=5, random_state=0)
cl_after = model.train_cl(train_edges, train_negative)
assert (model.test(cl_before, test_edges, test_negative)) < (model.test(cl_after, test_edges, test_negative))
cl_before = model_before.train_cl(
train_edges_b, train_negative_b
) # MLPClassifier()#GradientBoostingClassifier(n_estimators=100, learning_rate=0.2, max_depth=5, random_state=0)
cl_after = model_after.train_cl(train_edges, train_negative)
assert (model_before.test(cl_before, test_edges_b, test_negative_b)) < (
model_after.test(cl_after, test_edges, test_negative)
)
36 changes: 36 additions & 0 deletions tutorials/example_graph_classification.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
""
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 0
}

0 comments on commit 495768c

Please sign in to comment.