Citation
@article
{timor2025acceleratingllminferencelossless,
title={Accelerating LLM Inference with Lossless Speculative Decoding Algorithms for Heterogeneous Vocabularies},
author={Nadav Timor and Jonathan Mamou and Daniel Korat and Moshe Berchansky and Oren Pereg and Gaurav Jain and Roy Schwartz and Moshe Wasserblat and David Harel},
year={2025},
eprint={2502.05202},
archivePrefix={arXiv},
primaryClass={cs.CL},
url={https://arxiv.org/abs/2502.05202},
}