BibTeX
@article{zhao2025genprm,
title = {GenPRM: Scaling Test-Time Compute of Process Reward Models via Generative Reasoning},
author = {Jian Zhao and Runze Liu and Kaiyan Zhang and Zhimu Zhou and Junqi Gao and Dong Li and Jiafei Lyu and Zhouyi Qian and Biqing Qi and Xiu Li and Bowen Zhou},
journal = {arXiv preprint arXiv:2504.00891},
year = {2025}
}
Our collection of PRMs in
Awesome-Process-Reward-Models:
@misc{Awesome-Process-Reward-Models,
title = {Awesome Process Reward Models},
author = {Runze Liu and Jian Zhao and Kaiyan Zhang and Zhimu Zhou and Junqi Gao and Dong Li and Jiafei Lyu and Zhouyi Qian and Biqing Qi and Xiu Li and Bowen Zhou},
howpublished = {\url{https://github.com/RyanLiu112/Awesome-Process-Reward-Models}},
note = {GitHub repository},
year = {2025}
}
Our recent work on LLM test-time scaling with PRMs:
@article{liu2025can,
title = {Can 1B LLM Surpass 405B LLM? Rethinking Compute-Optimal Test-Time Scaling},
author = {Runze Liu and Junqi Gao and Jian Zhao and Kaiyan Zhang and Xiu Li and Biqing Qi and Wanli Ouyang and Bowen Zhou},
journal = {arXiv preprint arXiv:2502.06703},
year = {2025}
}