Merge pull request #624 from wangzhen38/bert_master

add bert4rec
PaddlePaddle · Dec 4, 2021 · 808d671 · 808d671
2 parents 885f102 + 353c5f7
commit 808d671
Show file tree

Hide file tree

Showing 9 changed files with 355,518 additions and 0 deletions.
diff --git a/README.md b/README.md
@@ -124,6 +124,7 @@ python -u tools/static_trainer.py -m models/rank/dnn/config.yaml #  静态图训
   |   排序   |      [Logistic Regression](models/rank/logistic_regression/)      |    ✓    |    ✓    |     ✓     |     x     | >=2.1.0 | /                                                                                                                                                                                                           |
   |   排序   |                      [Dnn](models/rank/dnn/)                      |    ✓    |    ✓    |     ✓     |     ✓     | >=2.1.0  | /                                                                                                                                                                                                           |
   |   排序   |                       [FM](models/rank/fm/)                       |    ✓    |    ✓    |     ✓     |     x     | >=2.1.0 | [IEEE Data Mining 2010][Factorization machines](https://analyticsconsultores.com.mx/wp-content/uploads/2019/03/Factorization-Machines-Steffen-Rendle-Osaka-University-2010.pdf)                             |
+  |   排序   |                       [BERT4REC](models/rank/bert4rec/)                       |    ✓    |    ✓    |     ✓     |     x     | >=2.1.0 | [CIKM 2019][BERT4Rec: Sequential Recommendation with Bidirectional Encoder Representations from Transformer](https://arxiv.org/pdf/1904.06690.pdf)                             |
   |   排序   |                      [FFM](models/rank/ffm/)                      |    ✓    |    ✓    |     ✓     |     x     | >=2.1.0 | [RECSYS 2016][Field-aware Factorization Machines for CTR Prediction](https://dl.acm.org/doi/pdf/10.1145/2959100.2959134)                                                                                    |
   |   排序   |                      [FNN](https://github.com/PaddlePaddle/PaddleRec/tree/release/1.8.5/models/rank/fnn/)                      |    ✓    |    ✓    |     ✓     |     x     | [1.8.5](https://github.com/PaddlePaddle/PaddleRec/tree/release/1.8.5) | [ECIR 2016][Deep Learning over Multi-field Categorical Data](https://arxiv.org/pdf/1601.02376.pdf)                                                                                                          |
   |   排序   |            [Deep Crossing](https://github.com/PaddlePaddle/PaddleRec/tree/release/1.8.5/models/rank/deep_crossing/)            |    ✓    |    ✓    |     ✓     |     x     | [1.8.5](https://github.com/PaddlePaddle/PaddleRec/tree/release/1.8.5) | [ACM 2016][Deep Crossing: Web-Scale Modeling without Manually Crafted Combinatorial Features](https://www.kdd.org/kdd2016/papers/files/adf0975-shanA.pdf)                                                   |

diff --git a/README_EN.md b/README_EN.md
@@ -114,6 +114,7 @@ python -u tools/static_trainer.py -m models/rank/dnn/config.yaml #  Training wit
   |         Rank          |      [Logistic Regression](models/rank/logistic_regression/)      |   ✓   |    ✓    |        ✓         |     x     |      >=2.1.0     | /                                                                                                                                                                                                           |
   |         Rank          |                      [Dnn](models/rank/dnn/)                      |   ✓   |    ✓    |        ✓         |     ✓     |      >=2.1.0     | /                                                                                                                                                                                                           |
   |         Rank          |                       [FM](models/rank/fm/)                       |   ✓   |    ✓    |        ✓         |     x     |      >=2.1.0     | [IEEE Data Mining 2010][Factorization machines](https://analyticsconsultores.com.mx/wp-content/uploads/2019/03/Factorization-Machines-Steffen-Rendle-Osaka-University-2010.pdf)                             |
+  |         Rank          |                       [BERT4REC](models/rank/bert4rec/)                       |   ✓   |    ✓    |        ✓         |     x     |      >=2.1.0     | [CIKM 2019][BERT4Rec: Sequential Recommendation with Bidirectional Encoder Representations from Transformer](https://arxiv.org/pdf/1904.06690.pdf)                             |
   |         Rank          |                      [FFM](models/rank/ffm/)                      |   ✓   |    ✓    |        ✓         |     x     | >=2.1.0 | [RECSYS 2016][Field-aware Factorization Machines for CTR Prediction](https://dl.acm.org/doi/pdf/10.1145/2959100.2959134)                                                                                    |
   |         Rank          |                      [FNN](https://github.com/PaddlePaddle/PaddleRec/tree/release/1.8.5/models/rank/fnn/)                      |   ✓   |    ✓    |        ✓         |     x     | [1.8.5](https://github.com/PaddlePaddle/PaddleRec/tree/release/1.8.5) | [ECIR 2016][Deep Learning over Multi-field Categorical Data](https://arxiv.org/pdf/1601.02376.pdf)                                                                                                          |
   |         Rank          |            [Deep Crossing](https://github.com/PaddlePaddle/PaddleRec/tree/release/1.8.5/models/rank/deep_crossing/)            |   ✓   |    ✓    |        ✓         |     x     | [1.8.5](https://github.com/PaddlePaddle/PaddleRec/tree/release/1.8.5) | [ACM 2016][Deep Crossing: Web-Scale Modeling without Manually Crafted Combinatorial Features](https://www.kdd.org/kdd2016/papers/files/adf0975-shanA.pdf)                                                   |

diff --git a/models/rank/bert4rec/config_bigdata.yaml b/models/rank/bert4rec/config_bigdata.yaml
@@ -0,0 +1,53 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# global settings
+
+runner:
+  train_data_dir: "data/train"
+  train_reader_path: "data_reader" # importlib format
+  use_gpu: True
+  train_batch_size: 1
+  data_batch_size: 256
+  epochs: 10
+  print_interval: 100
+
+  model_save_path: "output_model_bert4rec"
+  test_data_dir: "data/test"
+  infer_reader_path: "data_reader" # importlib format
+  infer_batch_size: 1
+  infer_load_path: "output_model_bert4rec"
+  infer_start_epoch: 9
+  infer_end_epoch: 10
+
+
+# hyper parameters of user-defined network
+hyper_parameters:
+  # optimizer config
+  optimizer:
+    learning_rate: 0.0001
+    weight_decay: 0.01
+
+  _emb_size: 64
+  _n_layer: 2
+  _n_head: 2
+  _voc_size: 54546
+  _max_position_seq_len: 50
+  _sent_types: 2
+  hidden_act: "gelu"
+  _dropout: 0.5
+  _attention_dropout: 0.2
+  _param_initializer: 0.02
+  num_test_user: 40226
+