1M requests

ParaLLeM scales up to millions of requests. This makes it excellent for large data pipelines.

Warning

The following example makes 1M (!) LLM requests. It costs ~$5 to complete with OpenAI. It uses "max_output_tokens=20" to save costs, but this is specific to OpenAI! Make sure to adjust kwargs for other providers. Not constraining reasoning can lead to costs of >$100. Use other providers at your own risk.

# Stress test: 1 million

from tqdm import tqdm

import parallem as pllm
import polars as pl
from datasets import load_dataset

ds = load_dataset("Skelebor/book_titles_and_descriptions_en_clean", split="train")
print(ds.num_rows)  # 1,032,335
# NOTE Cost: estimated $5


def genre_agent(agt: pllm.AgentContext, title: str):
    ct = agt.ask_llm(
        "Based on title alone, guess this book's genre. Return just the genre, no explanation needed.",
        title,
        reasoning={"effort": "minimal"},
        max_output_tokens=20,
    )
    return ct.final_answer.strip()


collector = []
with pllm.resume_directory(
    ".pllm/stress/stress_1m_v1",
    llm="gpt-5-nano",
    load_dotenv=True,
    strategy="batch",
    tweaks={
        "batch_max_size": 10000,
    },
) as orch:
    for i, example in tqdm(enumerate(ds), total=len(ds)):
        with orch.agent(i) as agt:
            out = genre_agent(agt, example["title"])
            collector.append((example["title"], out))
print(pl.DataFrame(collector, schema={"title": pl.Utf8, "genre": pl.Utf8}, orient="row"))
#