recsys_rl.bib

% Encoding: UTF-8

@inproceedings{bandits,
author = {Li, Lihong and Chu, Wei and Langford, John and Schapire, Robert E.},
title = {A Contextual-Bandit Approach to Personalized News Article Recommendation},
year = {2010},
isbn = {9781605587998},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/1772690.1772758},
doi = {10.1145/1772690.1772758},
booktitle = {Proceedings of the 19th International Conference on World Wide Web},
pages = {661–670},
numpages = {10},
keywords = {contextual bandit, recommender systems, exploration/exploitation dilemma, web service, personalization},
location = {Raleigh, North Carolina, USA},
series = {WWW ’10}
}
  

@inproceedings{page-wise,
  doi = {10.1145/3240323.3240374},
  url = {https://doi.org/10.1145/3240323.3240374},
  year = {2018},
  publisher = {{ACM} Press},
  author = {Xiangyu Zhao and Long Xia and Liang Zhang and Zhuoye Ding and Dawei Yin and Jiliang Tang},
  title = {Deep reinforcement learning for page-wise recommendations},
  booktitle = {Proceedings of the 12th {ACM} Conference on Recommender Systems - {RecSys} {\textquotesingle}18}
}

@article{list-wise,
  title={Deep Reinforcement Learning for List-wise Recommendations},
  author={Xiangyu Zhao and Liang Zhang and Zhuoye Ding and Dawei Yin and Yihong Zhao and Jiliang Tang},
  journal={ArXiv},
  year={2018},
  volume={abs/1801.00209}
}

@article{CF_MF,
author = {Koren, Yehuda and Bell, Robert and Volinsky, Chris},
title = {Matrix Factorization Techniques for Recommender Systems},
year = {2009},
issue_date = {August 2009},
publisher = {IEEE Computer Society Press},
address = {Washington, DC, USA},
volume = {42},
number = {8},
issn = {0018-9162},
url = {https://doi.org/10.1109/MC.2009.263},
doi = {10.1109/MC.2009.263},
journal = {Computer},
month = aug,
pages = {30–37},
numpages = {8},
keywords = {Computational intelligence, Matrix factorization, Netflix Prize}
}

@inproceedings{GoogleNewsCF,
author = {Das, Abhinandan S. and Datar, Mayur and Garg, Ashutosh and Rajaram, Shyam},
title = {Google News Personalization: Scalable Online Collaborative Filtering},
year = {2007},
isbn = {9781595936547},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/1242572.1242610},
doi = {10.1145/1242572.1242610},
booktitle = {Proceedings of the 16th International Conference on World Wide Web},
pages = {271–280},
numpages = {10},
keywords = {PLSI, mapreduce, minhash, personalization, Google news, oneline recommendation system, scalable collaborative filtering},
location = {Banff, Alberta, Canada},
series = {WWW ’07}
}

@article{content-based,
author = {Philip, Simon and Shola, Peter and Abari, Ovye},
year = {2014},
month = {10},
pages = {},
title = {Application of Content-Based Approach in Research Paper Recommendation System for a Digital Library},
volume = {5},
journal = {International Journal of Advanced Computer Science and Applications},
doi = {10.14569/IJACSA.2014.051006}
}

@InProceedings{content-based_news,
author="Kompan, Michal
and Bielikov{\'a}, M{\'a}ria",
editor="Buccafurri, Francesco
and Semeraro, Giovanni",
title="Content-Based News Recommendation",
booktitle="E-Commerce and Web Technologies",
year="2010",
publisher="Springer Berlin Heidelberg",
address="Berlin, Heidelberg",
pages="61--72",
isbn="978-3-642-15208-5"
}

@inproceedings{mdp,
author = {Shani, Guy and Brafman, Ronen I. and Heckerman, David},
title = {An MDP-Based Recommender System},
year = {2002},
isbn = {1558608974},
publisher = {Morgan Kaufmann Publishers Inc.},
address = {San Francisco, CA, USA},
booktitle = {Proceedings of the Eighteenth Conference on Uncertainty in Artificial Intelligence},
pages = {453–460},
numpages = {8},
location = {Alberta, Canada},
series = {UAI’02}
}

@misc{ppo,
    title={Proximal Policy Optimization Algorithms},
    author={John Schulman and Filip Wolski and Prafulla Dhariwal and Alec Radford and Oleg Klimov},
    year={2017},
    eprint={1707.06347},
    archivePrefix={arXiv},
    primaryClass={cs.LG}
}

@misc{trulyPPO,
    title={Truly Proximal Policy Optimization},
    author={Yuhui Wang and Hao He and Chao Wen and Xiaoyang Tan},
    year={2019},
    eprint={1903.07940},
    archivePrefix={arXiv},
    primaryClass={cs.LG}
}

@InProceedings{dpg,
  title = 	 {Deterministic Policy Gradient Algorithms},
  author = 	 {David Silver and Guy Lever and Nicolas Heess and Thomas Degris and Daan Wierstra and Martin Riedmiller},
  booktitle = 	 {Proceedings of the 31st International Conference on Machine Learning},
  pages = 	 {387--395},
  year = 	 {2014},
  editor = 	 {Eric P. Xing and Tony Jebara},
  volume = 	 {32},
  number =       {1},
  series = 	 {Proceedings of Machine Learning Research},
  address = 	 {Bejing, China},
  month = 	 {22--24 Jun},
  publisher = 	 {PMLR},
  pdf = 	 {http://proceedings.mlr.press/v32/silver14.pdf},
  url = 	 {http://proceedings.mlr.press/v32/silver14.html},
}

@inproceedings{ddpg,
  added-at = {2019-07-12T20:04:55.000+0200},
  author = {Lillicrap, Timothy P. and Hunt, Jonathan J. and Pritzel, Alexander and Heess, Nicolas and Erez, Tom and Tassa, Yuval and Silver, David and Wierstra, Daan},
  biburl = {https://www.bibsonomy.org/bibtex/22708c349821330660afb992aec2be5d1/lanteunis},
  booktitle = {ICLR},
  crossref = {conf/iclr/2016},
  editor = {Bengio, Yoshua and LeCun, Yann},
  ee = {http://arxiv.org/abs/1509.02971},
  interhash = {b791167abe535c8525f6a9bf62fcc1ab},
  intrahash = {2708c349821330660afb992aec2be5d1},
  keywords = {},
  timestamp = {2019-07-12T20:04:55.000+0200},
  title = {Continuous control with deep reinforcement learning.},
  year = 2016
}

@inproceedings{td3,
  author    = {Scott Fujimoto and
               Herke van Hoof and
               David Meger},
  editor    = {Jennifer G. Dy and
               Andreas Krause},
  title     = {Addressing Function Approximation Error in Actor-Critic Methods},
  booktitle = {Proceedings of the 35th International Conference on Machine Learning,
               {ICML} 2018, Stockholmsm{\"{a}}ssan, Stockholm, Sweden, July
               10-15, 2018},
  series    = {Proceedings of Machine Learning Research},
  volume    = {80},
  pages     = {1582--1591},
  publisher = {{PMLR}},
  year      = {2018},
  url       = {http://proceedings.mlr.press/v80/fujimoto18a.html},
  timestamp = {Wed, 03 Apr 2019 18:17:30 +0200},
  biburl    = {https://dblp.org/rec/conf/icml/FujimotoHM18.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}

@Article{Uhlenbeck30,
  author =	"G. E. Uhlenbeck and L. S. Ornstein",
  title =	"On the theory of the Brownian motion",
  journal =	"Phys. Rev.",
  volume =	"36",
  number =	"3",
  pages =	"823--841",
  year = 	"1930",
}

@article{Liu2018DeepRL,
  title={Deep Reinforcement Learning based Recommendation with Explicit User-Item Interactions Modeling},
  author={Feng Liu and Ruiming Tang and Xutao Li and Yunming Ye and Haokun Chen and Huifeng Guo and Yuzhou Zhang},
  journal={ArXiv},
  year={2018},
  volume={abs/1810.12027}
}

@misc{vae,
    title={Auto-Encoding Variational Bayes},
    author={Diederik P Kingma and Max Welling},
    year={2013},
    eprint={1312.6114},
    archivePrefix={arXiv},
    primaryClass={stat.ML}
}

@inproceedings{pg,
author = {Sutton, Richard S. and McAllester, David and Singh, Satinder and Mansour, Yishay},
title = {Policy Gradient Methods for Reinforcement Learning with Function Approximation},
year = {1999},
publisher = {MIT Press},
address = {Cambridge, MA, USA},
booktitle = {Proceedings of the 12th International Conference on Neural Information Processing Systems},
pages = {1057–1063},
numpages = {7},
location = {Denver, CO},
series = {NIPS’99}
}

@book{sutton_book,
author = {Sutton, Richard S. and Barto, Andrew G.},
title = {Reinforcement Learning: An Introduction},
year = {2018},
isbn = {0262039249},
publisher = {A Bradford Book},
address = {Cambridge, MA, USA}
}

@article{ML_1M,
author = {Harper, F. Maxwell and Konstan, Joseph A.},
title = {The MovieLens Datasets: History and Context},
year = {2015},
issue_date = {January 2016},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
volume = {5},
number = {4},
issn = {2160-6455},
url = {https://doi.org/10.1145/2827872},
doi = {10.1145/2827872},
journal = {ACM Trans. Interact. Intell. Syst.},
month = dec,
articleno = {19},
numpages = {19},
keywords = {ratings, Datasets, MovieLens, recommendations}
}

@article{first,
  title={DRN: A Deep Reinforcement Learning Framework for News Recommendation},
  author={Guanjie Zheng and Fuzheng Zhang and Zihan Zheng and Yang Xiang and Nicholas Jing Yuan and Xing Xie and Zhenhui Li},
  journal={Proceedings of the 2018 World Wide Web Conference},
  year={2018}
}

@inproceedings{latest,
author = {Liu, Feng and Guo, Huifeng and Li, Xutao and Tang, Ruiming and Ye, Yunming and He, Xiuqiang},
title = {End-to-End Deep Reinforcement Learning Based Recommendation with Supervised Embedding},
year = {2020},
isbn = {9781450368223},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3336191.3371858},
doi = {10.1145/3336191.3371858},
booktitle = {Proceedings of the 13th International Conference on Web Search and Data Mining},
pages = {384–392},
numpages = {9},
keywords = {supervised embedding, recommendation, end-to-end, reinforcement learning},
location = {Houston, TX, USA},
series = {WSDM ’20}
}
  
@misc{wolpertinger,
    title={Deep Reinforcement Learning in Large Discrete Action Spaces},
    author={Gabriel Dulac-Arnold and Richard Evans and Hado van Hasselt and Peter Sunehag and Timothy Lillicrap and Jonathan Hunt and Timothy Mann and Theophane Weber and Thomas Degris and Ben Coppin},
    year={2015},
    eprint={1512.07679},
    archivePrefix={arXiv},
    primaryClass={cs.AI}
}

@misc{catalyst,
    author = {Kolesnikov, Sergey},
    title = {Accelerated DL R&D},
    year = {2018},
    publisher = {GitHub},
    journal = {GitHub repository},
    howpublished = {\url{https://github.com/catalyst-team/catalyst}},
}