Skip to content

Commit

Permalink
FEA: Add FM&DeepFM model
Browse files Browse the repository at this point in the history
  • Loading branch information
ShanleiMu committed Jul 9, 2020
1 parent f8727ac commit 6576b2b
Show file tree
Hide file tree
Showing 5 changed files with 186 additions and 2 deletions.
69 changes: 69 additions & 0 deletions model/ctr_recommender/deepfm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
# -*- coding: utf-8 -*-
# @Time : 2020/7/8 10:33
# @Author : Shanlei Mu
# @Email : slmu@ruc.edu.cn
# @File : deepfm.py

"""
Reference:
Huifeng Guo et al., "DeepFM: A Factorization-Machine based Neural Network for CTR Prediction." in IJCAI 2017.
"""

import torch
import torch.nn as nn
import numpy as np

from model.abstract_recommender import AbstractRecommender
from model.layers import FMEmbedding, FMFirstOrderLinear, BaseFactorizationMachine, MLPLayers


class DeepFM(AbstractRecommender):

def __init__(self, config, dataset):
super(DeepFM).__init__()

self.embedding_size = config['model.embedding_size']
self.layers = config['model.layers']
self.dropout = config['model.dropout']
self.field_names = list(dataset.token2id.keys())
self.field_dims = [len(dataset.token2id[v]) for v in self.field_names]
self.field_seqlen = [dataset.token2seqlen[v] for v in self.field_names]
self.offsets = self._build_offsets()
self.layers = [self.embedding_size * len(self.field_names)] + self.layers

self.first_order_linear = FMFirstOrderLinear(self.filed_dims, self.offsets)
self.embedding = FMEmbedding(self.filed_dims, self.offsets, self.embedding_size)
self.fm = BaseFactorizationMachine(reduce_sum=True)
self.mlp_layers = MLPLayers(self.layers, self.dropout)
self.deep_predict_layer = nn.Linear(self.layers[-1], 1)
self.sigmoid = nn.Sigmoid()
self.loss = nn.BCELoss()

def _build_offsets(self):
offsets = []
for i in range(len(self.field_names)):
offsets += [self.field_dims[i]]
offsets += [0] * (self.field_seqlen[i] - 1)
offsets = np.array((0, *np.cumsum(offsets)[:-1]), dtype=np.long)
return offsets

def forward(self, interaction):
x = []
for field in self.field_names:
x.append(interaction[field])
x = torch.cat(x, dim=1)
embed_x = self.embedding(x)
y_fm = self.first_order_linear(x) + self.fm(embed_x)
# todo: how to deal with multi-hot feature (原论文明确规定每个field都是one-hot feature)
y_deep = self.deep_predict_layer(
self.mlp_layers(embed_x.view(-1, sum(self.field_seqlen) * self.embedding_size)))
y = self.sigmoid(y_fm + y_deep)
return y

def train_model(self, interaction):
label = interaction[LABEL]
output = self.forward(interaction)
return self.loss(output, label)

def predict(self, interaction):
return self.forward(interaction)
59 changes: 59 additions & 0 deletions model/ctr_recommender/fm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# -*- coding: utf-8 -*-
# @Time : 2020/7/8 10:09
# @Author : Shanlei Mu
# @Email : slmu@ruc.edu.cn
# @File : fm.py

"""
Reference:
Steffen Rendle et al., "Factorization Machines." in ICDM 2010.
"""

import torch
import torch.nn as nn
import numpy as np

from model.abstract_recommender import AbstractRecommender
from model.layers import FMEmbedding, FMFirstOrderLinear, BaseFactorizationMachine


class FM(AbstractRecommender):

def __init__(self, config, dataset):
super(FM).__init__()

self.embedding_size = config['model.embedding_size']
self.field_names = list(dataset.token2id.keys())
self.field_dims = [len(dataset.token2id[v]) for v in self.field_names]
self.field_seqlen = [dataset.token2seqlen[v] for v in self.field_names]
self.offsets = self._build_offsets()

self.embedding = FMEmbedding(self.filed_dims, self.offsets, self.embedding_size)
self.first_order_linear = FMFirstOrderLinear(self.filed_dims, self.offsets)
self.fm = BaseFactorizationMachine(reduce_sum=True)
self.sigmoid = nn.Sigmoid()
self.loss = nn.BCELoss()

def _build_offsets(self):
offsets = []
for i in range(len(self.field_names)):
offsets += [self.field_dims[i]]
offsets += [0] * (self.field_seqlen[i] - 1)
offsets = np.array((0, *np.cumsum(offsets)[:-1]), dtype=np.long)
return offsets

def forward(self, interaction):
x = []
for field in self.field_names:
x.append(interaction[field])
x = torch.cat(x, dim=1)
y = self.sigmoid(self.first_order_linear(x) + self.fm(self.embedding(x)))
return y

def train_model(self, interaction):
label = interaction[LABEL]
output = self.forward(interaction)
return self.loss(output, label)

def predict(self, interaction):
return self.forward(interaction)
File renamed without changes.
5 changes: 3 additions & 2 deletions model/general_recommender/neumf.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@ def __init__(self, config, dataset):
self.item_mlp_embedding = nn.Embedding(self.n_items, self.layers[0] - self.layers[0] // 2)
self.mlp_layers = MLPLayers(self.layers, self.dropout)
self.predict_layer = nn.Linear(self.embedding_size + self.layers[-1], 1)
self.loss = nn.BCEWithLogitsLoss()
self.sigmoid = nn.Sigmoid()
self.loss = nn.BCELoss()

self._init_weights()

Expand All @@ -57,7 +58,7 @@ def forward(self, user, item):
mf_output = torch.mul(user_mf_e, item_mf_e)
mlp_output = self.mlp_layers(torch.cat((user_mlp_e, item_mlp_e), -1))

output = self.predict_layer(torch.cat((mf_output, mlp_output), -1))
output = self.sigmoid(self.predict_layer(torch.cat((mf_output, mlp_output), -1)))
return output

def train_model(self, interaction):
Expand Down
55 changes: 55 additions & 0 deletions model/layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
"""

import warnings
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as fn
Expand Down Expand Up @@ -78,3 +79,57 @@ def _init_weights(self):

def forward(self, input_feature):
return self.mlp_layers(input_feature)


class FMEmbedding(nn.Module):

def __init__(self, field_dims, offsets, embed_dim):
super(FMEmbedding).__init__()
self.embedding = nn.Embedding(sum(field_dims), embed_dim)
self.offsets = offsets

self._init_weights()

def _init_weights(self):
xavier_normal_(self.embedding.weight)

def forward(self, input_x):
input_x = input_x + input_x.new_tensor(self.offsets).unsqueeze(0)
output = self.embedding(input_x)
return output


class FMFirstOrderLinear(nn.Module):

def __init__(self, field_dims, offsets, output_dim=1):
super(FMFirstOrderLinear).__init__()

self.w = nn.Embedding(sum(field_dims), output_dim)
self.bias = nn.Parameter(torch.zeros((output_dim, )))
self.offsets = offsets

self._init_weights()

def _init_weights(self):
xavier_normal_(self.w.weight)

def forward(self, input_x):
input_x = input_x + input_x.new_tensor(self.offsets).unsqueeze(0)
output = torch.sum(self.w(input_x), dim=1) + self.bias
return output


class BaseFactorizationMachine(nn.Module):

def __init__(self, reduce_sum=True):
super(BaseFactorizationMachine).__init__()
self.reduce_sum = reduce_sum

def forward(self, input_x):
square_of_sum = torch.sum(input_x, dim=1) ** 2
sum_of_square = torch.sum(input_x ** 2, dim=1)
output = square_of_sum - sum_of_square
if self.reduce_sum:
output = torch.sum(output, dim=1, keepdim=True)
output = 0.5 * output
return output

0 comments on commit 6576b2b

Please sign in to comment.