Bibliography.bib

% Encoding: windows-1252

% ==================================
% Survey
% ==================================

@article{howard1972risk,
  title={Risk-sensitive Markov decision processes},
  author={Howard, Ronald A and Matheson, James E},
  journal={Management science},
  volume={18},
  number={7},
  pages={356--369},
  year={1972},
  publisher={INFORMS}
}

@article{sobel1982variance,
  title={The variance of discounted Markov decision processes},
  author={Sobel, Matthew J},
  journal={Journal of Applied Probability},
  volume={19},
  number={4},
  pages={794--802},
  year={1982},
  publisher={Cambridge University Press}
}

@inproceedings{borkar2010risk,
  title={Risk-constrained Markov decision processes},
  author={Borkar, Vivek and Jain, Rahul},
  booktitle={Decision and Control (CDC), 2010 49th IEEE Conference on},
  pages={2664--2669},
  year={2010},
  organization={IEEE}
}

@article{tamar2017sequential,
  title={Sequential decision making with coherent risk},
  author={Tamar, Aviv and Chow, Yinlam and Ghavamzadeh, Mohammad and Mannor, Shie},
  journal={IEEE Transactions on Automatic Control},
  volume={62},
  number={7},
  pages={3323--3338},
  year={2017},
  publisher={IEEE}
}

@inproceedings{prashanth2014policy,
  title={Policy gradients for CVaR-constrained MDPs},
  author={Prashanth, LA},
  booktitle={International Conference on Algorithmic Learning Theory},
  pages={155--169},
  year={2014},
  organization={Springer}
}

@article{miller2017optimal,
  title={Optimal control of conditional value-at-risk in continuous time},
  author={Miller, Christopher W and Yang, Insoon},
  journal={SIAM Journal on Control and Optimization},
  volume={55},
  number={2},
  pages={856--884},
  year={2017},
  publisher={SIAM}
}

@article{coraluppi1998optimal,
  title={Optimal control of Markov decision processes for performance and robustness.},
  author={Coraluppi, Stefano Paulo},
  year={1998}
}

% ==================================
% Distributional RL
% ==================================

@article{bellemare2017distributional,
  title={A distributional perspective on reinforcement learning},
  author={Bellemare, Marc G and Dabney, Will and Munos, R{\'e}mi},
  journal={arXiv preprint arXiv:1707.06887},
  year={2017}
}

@article{dabney2017distributional,
  title={Distributional Reinforcement Learning with Quantile Regression},
  author={Dabney, Will and Rowland, Mark and Bellemare, Marc G and Munos, R{\'e}mi},
  journal={arXiv preprint arXiv:1710.10044},
  year={2017}
}

@article{morimura2012parametric,
  title={Parametric return density estimation for reinforcement learning},
  author={Morimura, Tetsuro and Sugiyama, Masashi and Kashima, Hisashi and Hachiya, Hirotaka and Tanaka, Toshiyuki},
  journal={arXiv preprint arXiv:1203.3497},
  year={2012}
}

@inproceedings{morimura2010nonparametric,
  title={Nonparametric return distribution approximation for reinforcement learning},
  author={Morimura, Tetsuro and Sugiyama, Masashi and Kashima, Hisashi and Hachiya, Hirotaka and Tanaka, Toshiyuki},
  booktitle={Proceedings of the 27th International Conference on Machine Learning (ICML-10)},
  pages={799--806},
  year={2010}
}

% ==================================
% RL + Risk
% ==================================

@inproceedings{chow2015risk,
  title={Risk-sensitive and robust decision-making: a CVaR optimization approach},
  author={Chow, Yinlam and Tamar, Aviv and Mannor, Shie and Pavone, Marco},
  booktitle={Advances in Neural Information Processing Systems},
  pages={1522--1530},
  year={2015}
}

@article{bauerle2011markov,
  title={Markov decision processes with average-value-at-risk criteria},
  author={B{\"a}uerle, Nicole and Ott, Jonathan},
  journal={Mathematical Methods of Operations Research},
  volume={74},
  number={3},
  pages={361--379},
  year={2011},
  publisher={Springer}
}

% cvar pg actor-critic
@inproceedings{chow2014algorithms,
  title={Algorithms for CVaR optimization in MDPs},
  author={Chow, Yinlam and Ghavamzadeh, Mohammad},
  booktitle={Advances in neural information processing systems},
  pages={3509--3517},
  year={2014}
}

@article{garcia2015comprehensive,
  title={A comprehensive survey on safe reinforcement learning},
  author={Garc{\i}a, Javier and Fern{\'a}ndez, Fernando},
  journal={Journal of Machine Learning Research},
  volume={16},
  number={1},
  pages={1437--1480},
  year={2015}
}

@inproceedings{tamar2015optimizing,
  title={Optimizing the CVaR via Sampling.},
  author={Tamar, Aviv and Glassner, Yonatan and Mannor, Shie},
  booktitle={AAAI},
  pages={2993--2999},
  year={2015}
}

@inproceedings{tamar2015policy,
  title={Policy gradient for coherent risk measures},
  author={Tamar, Aviv and Chow, Yinlam and Ghavamzadeh, Mohammad and Mannor, Shie},
  booktitle={Advances in Neural Information Processing Systems},
  pages={1468--1476},
  year={2015}
}


% ==================================
% General Risk
% ==================================

@article{rockafellar2000optimization,
  title={Optimization of conditional value-at-risk},
  author={Rockafellar, R Tyrrell and Uryasev, Stanislav},
  journal={Journal of risk},
  volume={2},
  pages={21--42},
  year={2000}
}

@article{rockafellar2002conditional,
  title={Conditional value-at-risk for general loss distributions},
  author={Rockafellar, R Tyrrell and Uryasev, Stanislav},
  journal={Journal of banking \& finance},
  volume={26},
  number={7},
  pages={1443--1471},
  year={2002},
  publisher={Elsevier}
}

@article{artzner1999coherent,
  title={Coherent measures of risk},
  author={Artzner, Philippe and Delbaen, Freddy and Eber, Jean-Marc and Heath, David},
  journal={Mathematical finance},
  volume={9},
  number={3},
  pages={203--228},
  year={1999},
  publisher={Wiley Online Library}
}

@article{pflug2016time,
  title={Time-consistent decisions and temporal decomposition of coherent risk functionals},
  author={Pflug, Georg Ch and Pichler, Alois},
  journal={Mathematics of Operations Research},
  volume={41},
  number={2},
  pages={682--699},
  year={2016},
  publisher={INFORMS}
}

@article{boda2006time,
  title={Time consistent dynamic risk measures},
  author={Boda, Kang and Filar, Jerzy A},
  journal={Mathematical Methods of Operations Research},
  volume={63},
  number={1},
  pages={169--186},
  year={2006},
  publisher={Springer}
}

@article{wipplinger2007philippe,
  title={Philippe Jorion: Value at Risk-The New Benchmark for Managing Financial Risk},
  author={Wipplinger, Evert},
  journal={Financial Markets and Portfolio Management},
  volume={21},
  number={3},
  pages={397},
  year={2007},
  publisher={Springer Science \& Business Media}
}

% VaR-CVaR estimation
@incollection{bardou2009recursive,
  title={Recursive computation of value-at-risk and conditional value-at-risk using MC and QMC},
  author={Bardou, Olivier and Frikha, Noufel and Pages, Gilles},
  booktitle={Monte Carlo and quasi-Monte Carlo methods 2008},
  pages={193--208},
  year={2009},
  publisher={Springer}
}


@article{bernard2015quantile,
  title={Quantile of a mixture with application to model risk assessment},
  author={Bernard, Carole and Vanduffel, Steven},
  journal={Dependence Modeling},
  volume={3},
  number={1},
  year={2015},
  publisher={De Gruyter Open}
}


% ==================================
% General RL
% ==================================

@book{sutton1998reinforcement,
  title={Reinforcement learning: An introduction},
  author={Sutton, Richard S and Barto, Andrew G},
  volume={1},
  number={1},
  year={1998},
  publisher={MIT press Cambridge}
}

@article{bellman1957markovian,
  title={A Markovian decision process},
  author={Bellman, Richard},
  journal={Journal of Mathematics and Mechanics},
  pages={679--684},
  year={1957},
  publisher={JSTOR}
}

@inproceedings{bertsekas1995neuro,
  title={Neuro-dynamic programming: an overview},
  author={Bertsekas, Dimitri P and Tsitsiklis, John N},
  booktitle={Decision and Control, 1995., Proceedings of the 34th IEEE Conference on},
  volume={1},
  pages={560--564},
  year={1995},
  organization={IEEE}
}

@inproceedings{sutton2000policy,
  title={Policy gradient methods for reinforcement learning with function approximation},
  author={Sutton, Richard S and McAllester, David A and Singh, Satinder P and Mansour, Yishay},
  booktitle={Advances in neural information processing systems},
  pages={1057--1063},
  year={2000}
}

@inproceedings{konda2000actor,
  title={Actor-critic algorithms},
  author={Konda, Vijay R and Tsitsiklis, John N},
  booktitle={Advances in neural information processing systems},
  pages={1008--1014},
  year={2000}
}

@incollection{baird1995residual,
  title={Residual algorithms: Reinforcement learning with function approximation},
  author={Baird, Leemon},
  booktitle={Machine Learning Proceedings 1995},
  pages={30--37},
  year={1995},
  publisher={Elsevier}
}

% ==================================
% Q-learning
% ==================================

@article{watkins1992q,
  title={Q-learning},
  author={Watkins, Christopher JCH and Dayan, Peter},
  journal={Machine learning},
  volume={8},
  number={3-4},
  pages={279--292},
  year={1992},
  publisher={Springer}
}

% extended q-learning convergence proofs
@article{tsitsiklis1994asynchronous,
  title={Asynchronous stochastic approximation and Q-learning},
  author={Tsitsiklis, John N},
  journal={Machine learning},
  volume={16},
  number={3},
  pages={185--202},
  year={1994},
  publisher={Springer}
}

% q-learning as a random process
@inproceedings{jaakkola1994convergence,
  title={Convergence of stochastic iterative dynamic programming algorithms},
  author={Jaakkola, Tommi and Jordan, Michael I and Singh, Satinder P},
  booktitle={Advances in neural information processing systems},
  pages={703--710},
  year={1994}
}

% ==================================
% Theory
% ==================================

@book{boyd2004convex,
  title={Convex optimization},
  author={Boyd, Stephen and Vandenberghe, Lieven},
  year={2004},
  publisher={Cambridge university press}
}

@book{kreyszig1989introductory,
  title={Introductory functional analysis with applications},
  author={Kreyszig, Erwin},
  volume={1},
  year={1989},
  publisher={wiley New York}
}

@book{kullback1997information,
  title={Information theory and statistics},
  author={Kullback, Solomon},
  year={1997},
  publisher={Courier Corporation}
}

% wasserstein
@article{bickel1981some,
  title={Some asymptotic theory for the bootstrap},
  author={Bickel, Peter J and Freedman, David A},
  journal={The Annals of Statistics},
  pages={1196--1217},
  year={1981},
  publisher={JSTOR}
}

@article{koenker2001quantile,
  title={Quantile regression},
  author={Koenker, Roger and Hallock, Kevin F},
  journal={Journal of economic perspectives},
  volume={15},
  number={4},
  pages={143--156},
  year={2001}
}

@article{arjovsky2017wasserstein,
  title={Wasserstein gan},
  author={Arjovsky, Martin and Chintala, Soumith and Bottou, L{\'e}on},
  journal={arXiv preprint arXiv:1701.07875},
  year={2017}
}

@article{bellemare2017cramer,
  title={The cramer distance as a solution to biased wasserstein gradients},
  author={Bellemare, Marc G and Danihelka, Ivo and Dabney, Will and Mohamed, Shakir and Lakshminarayanan, Balaji and Hoyer, Stephan and Munos, R{\'e}mi},
  journal={arXiv preprint arXiv:1705.10743},
  year={2017}
}

@book{billingsley2008probability,
  title={Probability and measure},
  author={Billingsley, Patrick},
  year={2008},
  publisher={John Wiley \& Sons}
}

@article{robbins1951stochastic,
  title={A stochastic approximation method},
  author={Robbins, Herbert and Monro, Sutton},
  journal={The annals of mathematical statistics},
  pages={400--407},
  year={1951},
  publisher={JSTOR}
}


% ==================================
% Deep Learning
% ==================================

@book{goodfellow2016deep,
  title={Deep learning},
  author={Goodfellow, Ian and Bengio, Yoshua and Courville, Aaron and Bengio, Yoshua},
  volume={1},
  year={2016},
  publisher={MIT press Cambridge}
}

@article{kingma2014adam,
  title={Adam: A method for stochastic optimization},
  author={Kingma, Diederik P and Ba, Jimmy},
  journal={arXiv preprint arXiv:1412.6980},
  year={2014}
}

@article{ruder2016overview,
  title={An overview of gradient descent optimization algorithms},
  author={Ruder, Sebastian},
  journal={arXiv preprint arXiv:1609.04747},
  year={2016}
}

@article{tieleman2012lecture,
  title={Lecture 6.5-rmsprop: Divide the gradient by a running average of its recent magnitude},
  author={Tieleman, Tijmen and Hinton, Geoffrey},
  journal={COURSERA: Neural networks for machine learning},
  volume={4},
  number={2},
  pages={26--31},
  year={2012}
}

@inproceedings{krizhevsky2012imagenet,
  title={Imagenet classification with deep convolutional neural networks},
  author={Krizhevsky, Alex and Sutskever, Ilya and Hinton, Geoffrey E},
  booktitle={Advances in neural information processing systems},
  pages={1097--1105},
  year={2012}
}

@inproceedings{jarrett2009best,
  title={What is the best multi-stage architecture for object recognition?},
  author={Jarrett, Kevin and Kavukcuoglu, Koray and LeCun, Yann and others},
  booktitle={Computer Vision, 2009 IEEE 12th International Conference on},
  pages={2146--2153},
  year={2009},
  organization={IEEE}
}

@inproceedings{abadi2016tensorflow,
  title={TensorFlow: A System for Large-Scale Machine Learning.},
  author={Abadi, Mart{\'\i}n and Barham, Paul and Chen, Jianmin and Chen, Zhifeng and Davis, Andy and Dean, Jeffrey and Devin, Matthieu and Ghemawat, Sanjay and Irving, Geoffrey and Isard, Michael and others},
  booktitle={OSDI},
  volume={16},
  pages={265--283},
  year={2016}
}


% ==================================
% Deep Q-learning
% ==================================

@article{silver2017mastering,
  title={Mastering the game of go without human knowledge},
  author={Silver, David and Schrittwieser, Julian and Simonyan, Karen and Antonoglou, Ioannis and Huang, Aja and Guez, Arthur and Hubert, Thomas and Baker, Lucas and Lai, Matthew and Bolton, Adrian and others},
  journal={Nature},
  volume={550},
  number={7676},
  pages={354},
  year={2017},
  publisher={Nature Publishing Group}
}

@article{mnih2015human,
  title={Human-level control through deep reinforcement learning},
  author={Mnih, Volodymyr and Kavukcuoglu, Koray and Silver, David and Rusu, Andrei A and Veness, Joel and Bellemare, Marc G and Graves, Alex and Riedmiller, Martin and Fidjeland, Andreas K and Ostrovski, Georg and others},
  journal={Nature},
  volume={518},
  number={7540},
  pages={529},
  year={2015},
  publisher={Nature Publishing Group}
}

@article{bellemare13arcade,
  author = {{Bellemare}, M.~G. and {Naddaf}, Y. and {Veness}, J. and {Bowling}, M.},
  title = {The Arcade Learning Environment: An Evaluation Platform for General Agents},
  journal = {Journal of Artificial Intelligence Research},
  year = "2013",
  month = "jun",
  volume = "47",
  pages = "253--279",
}

@article{schaul2015prioritized,
  title={Prioritized experience replay},
  author={Schaul, Tom and Quan, John and Antonoglou, Ioannis and Silver, David},
  journal={arXiv preprint arXiv:1511.05952},
  year={2015}
}

@article{wang2015dueling,
  title={Dueling network architectures for deep reinforcement learning},
  author={Wang, Ziyu and Schaul, Tom and Hessel, Matteo and Van Hasselt, Hado and Lanctot, Marc and De Freitas, Nando},
  journal={arXiv preprint arXiv:1511.06581},
  year={2015}
}

@article{hessel2017rainbow,
  title={Rainbow: Combining Improvements in Deep Reinforcement Learning},
  author={Hessel, Matteo and Modayil, Joseph and Van Hasselt, Hado and Schaul, Tom and Ostrovski, Georg and Dabney, Will and Horgan, Dan and Piot, Bilal and Azar, Mohammad and Silver, David},
  journal={arXiv preprint arXiv:1710.02298},
  year={2017}
}

@misc{baselines,
  author = {Dhariwal, Prafulla and Hesse, Christopher and Klimov, Oleg and Nichol, Alex and Plappert, Matthias and Radford, Alec and Schulman, John and Sidor, Szymon and Wu, Yuhuai},
  title = {OpenAI Baselines},
  year = {2017},
  publisher = {GitHub},
  journal = {GitHub repository},
  howpublished = {\url{https://github.com/openai/baselines}},
}

@article{plappert2017parameter,
  title={Parameter space noise for exploration},
  author={Plappert, Matthias and Houthooft, Rein and Dhariwal, Prafulla and Sidor, Szymon and Chen, Richard Y and Chen, Xi and Asfour, Tamim and Abbeel, Pieter and Andrychowicz, Marcin},
  journal={arXiv preprint arXiv:1706.01905},
  year={2017}
}
% ==================================
% Motivation
% ==================================

@article{majumdar2017should,
  title={How Should a Robot Assess Risk? Towards an Axiomatic Theory of Risk in Robotics},
  author={Majumdar, Anirudha and Pavone, Marco},
  journal={arXiv preprint arXiv:1710.11040},
  year={2017}
}

@article{leike2017ai,
  title={AI Safety Gridworlds},
  author={Leike, Jan and Martic, Miljan and Krakovna, Victoria and Ortega, Pedro A and Everitt, Tom and Lefrancq, Andrew and Orseau, Laurent and Legg, Shane},
  journal={arXiv preprint arXiv:1711.09883},
  year={2017}
}

@article{amodei2016concrete,
  title={Concrete problems in AI safety},
  author={Amodei, Dario and Olah, Chris and Steinhardt, Jacob and Christiano, Paul and Schulman, John and Man{\'e}, Dan},
  journal={arXiv preprint arXiv:1606.06565},
  year={2016}
}

@article{shapiro2013kusuoka,
  title={On Kusuoka representation of law invariant risk measures},
  author={Shapiro, Alexander},
  journal={Mathematics of Operations Research},
  volume={38},
  number={1},
  pages={142--152},
  year={2013},
  publisher={INFORMS}
}

% risk-averse neuroscience
@article{shen2014risk,
  title={Risk-sensitive reinforcement learning},
  author={Shen, Yun and Tobia, Michael J and Sommer, Tobias and Obermayer, Klaus},
  journal={Neural computation},
  volume={26},
  number={7},
  pages={1298--1328},
  year={2014},
  publisher={MIT Press}
}

@article{basel2013fundamental,
  title={Fundamental review of the trading book: A revised market risk framework},
  author={Basel Committee and others},
  journal={Consultative Document, October},
  year={2013}
}

@article{bahdanau2014neural,
  title={Neural machine translation by jointly learning to align and translate},
  author={Bahdanau, Dzmitry and Cho, Kyunghyun and Bengio, Yoshua},
  journal={arXiv preprint arXiv:1409.0473},
  year={2014}
}