custom.bib

% Use this file for citations not found in the ACL Anthology (contained in "anthology.bib").

@book{Aho:72,
    author  = {Alfred V. Aho and Jeffrey D. Ullman},
    title   = {The Theory of Parsing, Translation and Compiling},
    year    = "1972",
    volume  = "1",
    publisher = {Prentice-Hall},
    address = {Englewood Cliffs, NJ}
}

@book{APA:83,
    author  = {{American Psychological Association}},
    title   = {Publications Manual},
    year    = "1983",
    publisher = {American Psychological Association},
    address = {Washington, DC}
}

@article{Chandra:81,
	author = {Ashok K. Chandra and Dexter C. Kozen and Larry J. Stockmeyer},
	year = "1981",
	title = {Alternation},
	journal = {Journal of the Association for Computing Machinery},
	volume = "28",
	number = "1",
	pages = "114--133",
	doi = "10.1145/322234.322243",
}

@inproceedings{andrew2007scalable,
  title={Scalable training of {$L_1$}-regularized log-linear models},
  author={Andrew, Galen and Gao, Jianfeng},
  booktitle={Proceedings of the 24th International Conference on Machine Learning},
  pages={33--40},
  year={2007},
  url={https://dl.acm.org/doi/abs/10.1145/1273496.1273501}
}

@book{Gusfield:97,
    author  = {Dan Gusfield},
    title   = {Algorithms on Strings, Trees and Sequences},
    year    = "1997",
    publisher = {Cambridge University Press},
    address = {Cambridge, UK},
    url={https://www.cambridge.org/core/books/algorithms-on-strings-trees-and-sequences/F0B095049C7E6EF5356F0A26686C20D3}
}

@article{rasooli-tetrault-2015,
    author    = {Mohammad Sadegh Rasooli and Joel R. Tetreault},
    title     = {Yara Parser: {A} Fast and Accurate Dependency Parser},
    journal   = {Computing Research Repository},
    volume    = {arXiv:1503.06733},
    year      = {2015},
    url       = {https://arxiv.org/abs/1503.06733},
    note    = {version 2}
}

@article{Ando2005,
	Acmid = {1194905},
	Author = {Ando, Rie Kubota and Zhang, Tong},
	Issn = {1532-4435},
	Issue_Date = {12/1/2005},
	Journal = {Journal of Machine Learning Research},
	Month = dec,
	Numpages = {37},
	Pages = {1817--1853},
	Publisher = {JMLR.org},
	Title = {A Framework for Learning Predictive Structures from Multiple Tasks and Unlabeled Data},
	Volume = {6},
	Year = {2005},
	url={https://www.jmlr.org/papers/volume6/ando05a/ando05a.pdf}
}

@article{ct1965,
  title={An algorithm for the machine calculation of complex {F}ourier series},
  author={Cooley, James W. and Tukey, John W.},
  journal={Mathematics of Computation},
  volume={19},
  number={90},
  pages={297--301},
  year={1965},
  url={https://www.ams.org/journals/mcom/1965-19-090/S0025-5718-1965-0178586-1/S0025-5718-1965-0178586-1.pdf}
}

@inproceedings{averitec2024,
author = {Schlichtkrull, Michael and Guo, Zhijiang and Vlachos, Andreas},
title = {AVERITEC: a dataset for real-world claim verification with evidence from the web},
year = {2024},
publisher = {Curran Associates Inc.},
address = {Red Hook, NY, USA},
abstract = {Existing datasets for automated fact-checking have substantial limitations, such as relying on artificial claims, lacking annotations for evidence and intermediate reasoning, or including evidence published after the claim. In this paper, we introduce AVERITEC, a new dataset of 4,568 real-world claims covering fact-checks by 50 different organizations. Each claim is annotated with question-answer pairs supported by evidence available online, as well as textual justifications explaining how the evidence combines to produce a verdict. Through a multi-round annotation process, we avoid common pitfalls including context dependence, evidence insufficiency, and temporal leakage, and reach a substantial inter-annotator agreement of κ = 0.619 on verdicts. We develop a baseline as well as an evaluation scheme for verifying claims through question-answering against the open web.},
booktitle = {Proceedings of the 37th International Conference on Neural Information Processing Systems},
articleno = {2842},
numpages = {40},
location = {New Orleans, LA, USA},
series = {NIPS '23},
url = {https://dl.acm.org/doi/10.5555/3666122.3668964}
}

@misc{he2023debertav3improvingdebertausing,
      title={DeBERTaV3: Improving DeBERTa using ELECTRA-Style Pre-Training with Gradient-Disentangled Embedding Sharing}, 
      author={Pengcheng He and Jianfeng Gao and Weizhu Chen},
      year={2023},
      eprint={2111.09543},
      archivePrefix={arXiv},
      primaryClass={cs.CL},
      url={https://arxiv.org/abs/2111.09543}, 
}

@misc{jaradat2024contextawaredetectioncherrypickingnews,
      title={On Context-aware Detection of Cherry-picking in News Reporting}, 
      author={Israa Jaradat and Haiqi Zhang and Chengkai Li},
      year={2024},
      eprint={2401.05650},
      archivePrefix={arXiv},
      primaryClass={cs.CL},
      url={https://arxiv.org/abs/2401.05650}, 
}

@misc{openai2024gpt4o,
      title={Hello GPT-4o}, 
      author={OpenAI},
      year={2024},
      month={May},
      day={13},
      url={https://openai.com/index/hello-gpt-4o/}, 
      note = {Accessed: 9 August 2024}
}

@misc{anthropic2024claude35sonnet, 
      title={Introducing Claude 3.5 Sonnet \ Anthropic},
      url={https://www.anthropic.com/news/claude-3-5-sonnet},
      author={Anthropic},
      journal={Introducing Claude 3.5 Sonnet \ Anthropic}, 
      publisher={Anthropic}, 
      year={2024}, 
      month={Jun},
      note = {Accessed: 9 August 2024}
}

@misc{meta2024llama31,
      title={Introducing Llama 3.1: Our most capable models to date},
      url={https://ai.meta.com/blog/meta-llama-3-1/},
      author={Meta},
      publisher={Meta},
      year={2024},
      month={July},
}

@misc{cohere2024commandr,
      title={Command R: Retrieval-Augmented Generation at Production Scale},
      url={https://cohere.com/blog/command-r},
      author={Cohere},
      publisher={Cohere},
      year={2024},
      month={Mar},
}

@misc{zhang2024carefulexaminationlargelanguage,
      title={A Careful Examination of Large Language Model Performance on Grade School Arithmetic}, 
      author={Hugh Zhang and Jeff Da and Dean Lee and Vaughn Robinson and Catherine Wu and Will Song and Tiffany Zhao and Pranav Raja and Dylan Slack and Qin Lyu and Sean Hendryx and Russell Kaplan and Michele Lunati and Summer Yue},
      year={2024},
      eprint={2405.00332},
      archivePrefix={arXiv},
      primaryClass={cs.CL},
      url={https://arxiv.org/abs/2405.00332}, 
}
@article{scikit-learn,
  title={Scikit-learn: Machine Learning in {P}ython},
  author={Pedregosa, F. and Varoquaux, G. and Gramfort, A. and Michel, V.
          and Thirion, B. and Grisel, O. and Blondel, M. and Prettenhofer, P.
          and Weiss, R. and Dubourg, V. and Vanderplas, J. and Passos, A. and
          Cournapeau, D. and Brucher, M. and Perrot, M. and Duchesnay, E.},
  journal={Journal of Machine Learning Research},
  volume={12},
  pages={2825--2830},
  year={2011},
  url={https://jmlr.org/papers/v12/pedregosa11a.html}
}
@ARTICLE{2020SciPy-NMeth,
  author  = {Virtanen, Pauli and Gommers, Ralf and Oliphant, Travis E. and
            Haberland, Matt and Reddy, Tyler and Cournapeau, David and
            Burovski, Evgeni and Peterson, Pearu and Weckesser, Warren and
            Bright, Jonathan and {van der Walt}, St{\'e}fan J. and
            Brett, Matthew and Wilson, Joshua and Millman, K. Jarrod and
            Mayorov, Nikolay and Nelson, Andrew R. J. and Jones, Eric and
            Kern, Robert and Larson, Eric and Carey, C J and
            Polat, {\.I}lhan and Feng, Yu and Moore, Eric W. and
            {VanderPlas}, Jake and Laxalde, Denis and Perktold, Josef and
            Cimrman, Robert and Henriksen, Ian and Quintero, E. A. and
            Harris, Charles R. and Archibald, Anne M. and
            Ribeiro, Ant{\^o}nio H. and Pedregosa, Fabian and
            {van Mulbregt}, Paul and {SciPy 1.0 Contributors}},
  title   = {{{SciPy} 1.0: Fundamental Algorithms for Scientific
            Computing in Python}},
  journal = {Nature Methods},
  year    = {2020},
  volume  = {17},
  pages   = {261--272},
  adsurl  = {https://rdcu.be/b08Wh},
  doi     = {10.1038/s41592-019-0686-2},
}

@online{emb2024mxbai,
  title={Open Source Strikes Bread - New Fluffy Embeddings Model},
  author={Sean Lee and Aamir Shakir and Darius Koenig and Julius Lipp},
  year={2024},
  url={https://www.mixedbread.ai/blog/mxbai-embed-large-v1},
}

@article{douze2024faiss,
      title={The Faiss library},
      author={Matthijs Douze and Alexandr Guzhva and Chengqi Deng and Jeff Johnson and Gergely Szilvasy and Pierre-Emmanuel Mazaré and Maria Lomeli and Lucas Hosseini and Hervé Jégou},
      year={2024},
      eprint={2401.08281},
      archivePrefix={arXiv},
      primaryClass={cs.LG}
}

@article{johnson2019billion,
  title={Billion-scale similarity search with {GPUs}},
  author={Johnson, Jeff and Douze, Matthijs and J{\'e}gou, Herv{\'e}},
  journal={IEEE Transactions on Big Data},
  volume={7},
  number={3},
  pages={535--547},
  year={2019},
  publisher={IEEE},
  doi={10.1109/TBDATA.2019.2921572}
}

@inproceedings{carbonell-mmr,
author = {Carbonell, Jaime and Goldstein, Jade},
title = {The use of MMR, diversity-based reranking for reordering documents and producing summaries},
year = {1998},
isbn = {1581130155},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/290941.291025},
doi = {10.1145/290941.291025},
booktitle = {Proceedings of the 21st Annual International ACM SIGIR Conference on Research and Development in Information Retrieval},
pages = {335–336},
numpages = {2},
location = {Melbourne, Australia},
series = {SIGIR '98}
}

@inproceedings{vllm,
author = {Kwon, Woosuk and Li, Zhuohan and Zhuang, Siyuan and Sheng, Ying and Zheng, Lianmin and Yu, Cody Hao and Gonzalez, Joseph and Zhang, Hao and Stoica, Ion},
title = {Efficient Memory Management for Large Language Model Serving with PagedAttention},
year = {2023},
isbn = {9798400702297},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3600006.3613165},
doi = {10.1145/3600006.3613165},
abstract = {High throughput serving of large language models (LLMs) requires batching sufficiently many requests at a time. However, existing systems struggle because the key-value cache (KV cache) memory for each request is huge and grows and shrinks dynamically. When managed inefficiently, this memory can be significantly wasted by fragmentation and redundant duplication, limiting the batch size. To address this problem, we propose PagedAttention, an attention algorithm inspired by the classical virtual memory and paging techniques in operating systems. On top of it, we build vLLM, an LLM serving system that achieves (1) near-zero waste in KV cache memory and (2) flexible sharing of KV cache within and across requests to further reduce memory usage. Our evaluations show that vLLM improves the throughput of popular LLMs by 2--4\texttimes{} with the same level of latency compared to the state-of-the-art systems, such as FasterTransformer and Orca. The improvement is more pronounced with longer sequences, larger models, and more complex decoding algorithms. vLLM's source code is publicly available at https://github.com/vllm-project/vllm.},
booktitle = {Proceedings of the 29th Symposium on Operating Systems Principles},
pages = {611–626},
numpages = {16},
location = {Koblenz, Germany},
series = {SOSP '23}
}

@InProceedings{bm25,
  author = {Robertson, Stephen and Walker, S. and Jones, S. and Hancock-Beaulieu, M. M. and Gatford, M.},
  title = {Okapi at TREC-3},
  booktitle = {Overview of the Third Text REtrieval Conference (TREC-3)},
  year = {1995},
  month = {January}
}
@misc{golkar2023xvalcontinuousnumberencoding,
      title={xVal: A Continuous Number Encoding for Large Language Models}, 
      author={Siavash Golkar and Mariel Pettee and Michael Eickenberg and Alberto Bietti and Miles Cranmer and Geraud Krawezik and Francois Lanusse and Michael McCabe and Ruben Ohana and Liam Parker and Bruno Régaldo-Saint Blancard and Tiberiu Tesileanu and Kyunghyun Cho and Shirley Ho},
      year={2023},
      eprint={2310.02989},
      archivePrefix={arXiv},
      primaryClass={stat.ML},
      url={https://arxiv.org/abs/2310.02989}, 
}

@article{likert1932technique,
  title={A technique for the measurement of attitudes},
  author={Likert, Rensis},
  journal={Archives of Psychology},
  volume={22},
  number={140},
  pages={55},
  year={1932}
}

@article{likertstudy,
author = {Joshi, Ankur and Kale, Saket and Chandel, Satish and Pal, Dinesh},
year = {2015},
month = {01},
pages = {396-403},
title = {Likert Scale: Explored and Explained},
volume = {7},
journal = {British Journal of Applied Science \& Technology},
doi = {10.9734/BJAST/2015/14975}
}

@inproceedings{NIPS1989_0336dcba,
 author = {Bridle, John},
 booktitle = {Advances in Neural Information Processing Systems},
 editor = {D. Touretzky},
 pages = {},
 publisher = {Morgan-Kaufmann},
 title = {Training Stochastic Model Recognition Algorithms as Networks can Lead to Maximum Mutual Information Estimation of Parameters},
 url = {https://proceedings.neurips.cc/paper_files/paper/1989/file/0336dcbab05b9d5ad24f4333c7658a0e-Paper.pdf},
 volume = {2},
 year = {1989}
}

@inproceedings{rag,
author = {Lewis, Patrick and Perez, Ethan and Piktus, Aleksandra and Petroni, Fabio and Karpukhin, Vladimir and Goyal, Naman and K\"{u}ttler, Heinrich and Lewis, Mike and Yih, Wen-tau and Rockt\"{a}schel, Tim and Riedel, Sebastian and Kiela, Douwe},
title = {Retrieval-augmented generation for knowledge-intensive NLP tasks},
year = {2020},
isbn = {9781713829546},
publisher = {Curran Associates Inc.},
address = {Red Hook, NY, USA},
abstract = {Large pre-trained language models have been shown to store factual knowledge in their parameters, and achieve state-of-the-art results when fine-tuned on downstream NLP tasks. However, their ability to access and precisely manipulate knowledge is still limited, and hence on knowledge-intensive tasks, their performance lags behind task-specific architectures. Additionally, providing provenance for their decisions and updating their world knowledge remain open research problems. Pre-trained models with a differentiable access mechanism to explicit non-parametric memory can overcome this issue, but have so far been only investigated for extractive downstream tasks. We explore a general-purpose fine-tuning recipe for retrieval-augmented generation (RAG) — models which combine pre-trained parametric and non-parametric memory for language generation. We introduce RAG models where the parametric memory is a pre-trained seq2seq model and the non-parametric memory is a dense vector index of Wikipedia, accessed with a pre-trained neural retriever. We compare two RAG formulations, one which conditions on the same retrieved passages across the whole generated sequence, and another which can use different passages per token. We fine-tune and evaluate our models on a wide range of knowledge-intensive NLP tasks and set the state of the art on three open domain QA tasks, outperforming parametric seq2seq models and task-specific retrieve-and-extract architectures. For language generation tasks, we find that RAG models generate more specific, diverse and factual language than a state-of-the-art parametric-only seq2seq baseline.},
booktitle = {Proceedings of the 34th International Conference on Neural Information Processing Systems},
articleno = {793},
numpages = {16},
location = {Vancouver, BC, Canada},
series = {NIPS '20},
url = {https://proceedings.neurips.cc/paper_files/paper/2020/file/6b493230205f780e1bc26945df7481e5-Paper.pdf},
}
@software{Chase_LangChain_2022, author = {Chase, Harrison}, month = oct, title = {{LangChain}}, url = {https://github.com/hwchase17/langchain}, year = {2022} }

@software{Pietsch2020haystack,
  author       = {Matthias Pietsch and Timo Soni and Brendan Chan and Timo Möller and Boro Kostić},
  title        = {Haystack (Version 2.0)},
  year         = 2020,
  publisher    = {GitHub},
  howpublished = {\url{https://github.com/deepset-ai/haystack/}},
}

@inproceedings{cot,
author = {Wei, Jason and Wang, Xuezhi and Schuurmans, Dale and Bosma, Maarten and Ichter, Brian and Xia, Fei and Chi, Ed H. and Le, Quoc V. and Zhou, Denny},
title = {Chain-of-thought prompting elicits reasoning in large language models},
year = {2024},
isbn = {9781713871088},
publisher = {Curran Associates Inc.},
address = {Red Hook, NY, USA},
abstract = {We explore how generating a chain of thought—a series of intermediate reasoning steps—significantly improves the ability of large language models to perform complex reasoning. In particular, we show how such reasoning abilities emerge naturally in sufficiently large language models via a simple method called chain-of-thought prompting, where a few chain of thought demonstrations are provided as exemplars in prompting.Experiments on three large language models show that chain-of-thought prompting improves performance on a range of arithmetic, commonsense, and symbolic reasoning tasks. The empirical gains can be striking. For instance, prompting a PaLM 540B with just eight chain-of-thought exemplars achieves state-of-the-art accuracy on the GSM8K benchmark of math word problems, surpassing even finetuned GPT-3 with a verifier.},
booktitle = {Proceedings of the 36th International Conference on Neural Information Processing Systems},
articleno = {1800},
numpages = {14},
location = {New Orleans, LA, USA},
series = {NIPS '22},
url = {https://dl.acm.org/doi/10.5555/3600270.3602070}
}

@inproceedings{fewshot,
author = {Brown, Tom B. and Mann, Benjamin and Ryder, Nick and Subbiah, Melanie and Kaplan, Jared and Dhariwal, Prafulla and Neelakantan, Arvind and Shyam, Pranav and Sastry, Girish and Askell, Amanda and Agarwal, Sandhini and Herbert-Voss, Ariel and Krueger, Gretchen and Henighan, Tom and Child, Rewon and Ramesh, Aditya and Ziegler, Daniel M. and Wu, Jeffrey and Winter, Clemens and Hesse, Christopher and Chen, Mark and Sigler, Eric and Litwin, Mateusz and Gray, Scott and Chess, Benjamin and Clark, Jack and Berner, Christopher and McCandlish, Sam and Radford, Alec and Sutskever, Ilya and Amodei, Dario},
title = {Language models are few-shot learners},
year = {2020},
isbn = {9781713829546},
publisher = {Curran Associates Inc.},
address = {Red Hook, NY, USA},
abstract = {We demonstrate that scaling up language models greatly improves task-agnostic, few-shot performance, sometimes even becoming competitive with prior state-of-the-art fine-tuning approaches. Specifically, we train GPT-3, an autoregressive language model with 175 billion parameters, 10x more than any previous non-sparse language model, and test its performance in the few-shot setting. For all tasks, GPT-3 is applied without any gradient updates or fine-tuning, with tasks and few-shot demonstrations specified purely via text interaction with the model. GPT-3 achieves strong performance on many NLP datasets, including translation, question-answering, and cloze tasks. We also identify some datasets where GPT-3's few-shot learning still struggles, as well as some datasets where GPT-3 faces methodological issues related to training on large web corpora.},
booktitle = {Proceedings of the 34th International Conference on Neural Information Processing Systems},
articleno = {159},
numpages = {25},
location = {Vancouver, BC, Canada},
series = {NIPS '20},
 url = {https://proceedings.neurips.cc/paper_files/paper/2020/file/1457c0d6bfcb4967418bfb8ac142f64a-Paper.pdf},
}


@misc{tokens,
	author = {OpenAI},
	title = {What are tokens and how to count them?},
	url={https://help.openai.com/en/articles/4936856-what-are-tokens-and-how-to-count-them#},
	year = {2023},
	note = {Accessed: 15 August 2024},
}

@misc{json,
	author = {OpenAI},
	title = {Introducing structured outputs in the API},
	url={https://openai.com/index/introducing-structured-outputs-in-the-api/#},
	year = {2024},
	note = {Accessed: 15 August 2024},
}

@misc{menick2022teachinglanguagemodelssupport,
      title={Teaching language models to support answers with verified quotes}, 
      author={Jacob Menick and Maja Trebacz and Vladimir Mikulik and John Aslanides and Francis Song and Martin Chadwick and Mia Glaese and Susannah Young and Lucy Campbell-Gillingham and Geoffrey Irving and Nat McAleese},
      year={2022},
      eprint={2203.11147},
      archivePrefix={arXiv},
      primaryClass={cs.CL},
      url={https://arxiv.org/abs/2203.11147}, 
}
@Article{Ullrich2023,
author={Ullrich, Herbert
and Drchal, Jan
and R{\'y}par, Martin
and Vincourov{\'a}, Hana
and Moravec, V{\'a}clav},
title={CsFEVER and CTKFacts: acquiring Czech data for fact verification},
journal={Language Resources and Evaluation},
year={2023},
month={Dec},
day={01},
volume={57},
number={4},
pages={1571-1605},
abstract={In this paper, we examine several methods of acquiring Czech data for automated fact-checking, which is a task commonly modeled as a classification of textual claim veracity w.r.t. a corpus of trusted ground truths. We attempt to collect sets of data in form of a factual claim, evidence within the ground truth corpus, and its veracity label (supported, refuted or not enough info). As a first attempt, we generate a Czech version of the large-scale FEVER dataset built on top of Wikipedia corpus. We take a hybrid approach of machine translation and document alignment; the approach and the tools we provide can be easily applied to other languages. We discuss its weaknesses, propose a future strategy for their mitigation and publish the 127k resulting translations, as well as a version of such dataset reliably applicable for the Natural Language Inference task---the CsFEVER-NLI. Furthermore, we collect a novel dataset of 3,097 claims, which is annotated using the corpus of 2.2 M articles of Czech News Agency. We present an extended dataset annotation methodology based on the FEVER approach, and, as the underlying corpus is proprietary, we also publish a standalone version of the dataset for the task of Natural Language Inference we call CTKFactsNLI. We analyze both acquired datasets for spurious cues---annotation patterns leading to model overfitting. CTKFacts is further examined for inter-annotator agreement, thoroughly cleaned, and a typology of common annotator errors is extracted. Finally, we provide baseline models for all stages of the fact-checking pipeline and publish the NLI datasets, as well as our annotation platform and other experimental data.},
issn={1574-0218},
doi={10.1007/s10579-023-09654-3},
url={https://doi.org/10.1007/s10579-023-09654-3}
}


@misc{drchal2023pipelinedatasetgenerationautomated,
      title={Pipeline and Dataset Generation for Automated Fact-checking in Almost Any Language}, 
      author={Jan Drchal and Herbert Ullrich and Tomáš Mlynář and Václav Moravec},
      year={2023},
      eprint={2312.10171},
      archivePrefix={arXiv},
      primaryClass={cs.CL},
      url={https://arxiv.org/abs/2312.10171}, 
}
@misc{dejean2024thoroughcomparisoncrossencodersllms,
      title={A Thorough Comparison of Cross-Encoders and LLMs for Reranking SPLADE}, 
      author={Hervé Déjean and Stéphane Clinchant and Thibault Formal},
      year={2024},
      eprint={2403.10407},
      archivePrefix={arXiv},
      primaryClass={cs.IR},
      url={https://arxiv.org/abs/2403.10407}, 
}

@misc{tang2024strucbenchlargelanguagemodels,
      title={Struc-Bench: Are Large Language Models Really Good at Generating Complex Structured Data?}, 
      author={Xiangru Tang and Yiming Zong and Jason Phang and Yilun Zhao and Wangchunshu Zhou and Arman Cohan and Mark Gerstein},
      year={2024},
      eprint={2309.08963},
      archivePrefix={arXiv},
      primaryClass={cs.CL},
      url={https://arxiv.org/abs/2309.08963}, 
}

@misc{dubey2024llama3herdmodels,
      title={The Llama 3 Herd of Models}, 
      author={Abhimanyu Dubey and Abhinav Jauhri and Abhinav Pandey and Abhishek Kadian and Ahmad Al-Dahle and Aiesha Letman and Akhil Mathur and Alan Schelten and Amy Yang and Angela Fan and Anirudh Goyal and Anthony Hartshorn and Aobo Yang and ... and Zhiwei Zhao},
      year={2024},
      eprint={2407.21783},
      archivePrefix={arXiv},
      primaryClass={cs.AI},
      url={https://arxiv.org/abs/2407.21783}, 
}