@inproceedings{beining2022off-policy,
title={Off-Policy Reinforcement Learning with Delayed Rewards},
author={Beining Han, Zhizhou Ren, Zuofan Wu, Yuan Zhou, and Jian Peng},
url={http://archive.ymsc.tsinghua.edu.cn/pacm_paperurl/20221007162024267538732},
booktitle={International Conference on Machine Learning (ICML)},
year={2022},
}
Beining Han, Zhizhou Ren, Zuofan Wu, Yuan Zhou, and Jian Peng. Off-Policy Reinforcement Learning with Delayed Rewards. 2022. In International Conference on Machine Learning (ICML). http://archive.ymsc.tsinghua.edu.cn/pacm_paperurl/20221007162024267538732.