Documents

ParaLLeM excels for processing documents in high throughput.

By switching to batch mode, you save 50% on token costs, save CPU time, and can scale up to 1000s of documents.

In this example, we count the number of syllables in a word list of 100k words.

Warning

The following example makes 100k (!) LLM requests. It costs ~$0.85 to complete with OpenAI. It uses "max_output_tokens=20" to save costs, but this is specific to OpenAI! Make sure to adjust kwargs for other providers. Not constraining reasoning can lead to costs of >$100. Use other providers at your own risk.

examples/stress/stress_test.py

# Stress test: use GPT to count syllables for >100k words
import time
from tqdm import tqdm
import polars as pl

import parallem as pllm

df = pl.read_csv(
    "examples/stress/txts/words_100k.txt", has_header=False, new_columns=["word"]
).with_columns(pl.col("word").str.strip_chars())


def syllable_count_agent(agt: pllm.AgentContext, word: str):
    ct = agt.ask_llm(
        f'How many syllables are in "{word}"? Only return the number, no explanation.',
        reasoning={"effort": "minimal"},
        max_output_tokens=20,
    )
    try:
        return int(ct.final_answer.strip()), None
    except ValueError:
        return None, ct.final_answer.strip()


# Run the agent on all words
orch = pllm.resume_directory(
    ".pllm/stress/fresh3/stress_test",
    llm="gpt-5-nano",
    provider="openai",
    strategy="batch",
    tweaks={
        "batch_max_size": 10000,
    },
    load_dotenv=True,
)

collector = []
for (word,) in tqdm(df.iter_rows(), total=df.height):
    with orch.agent(f"syllable_count_{word}") as agt:
        count = syllable_count_agent(agt, word)
        collector.append((word, *count))

# Time submission
start_time = time.time()
orch.finalize_and_persist()
end_time = time.time()
print(f"Took {end_time - start_time:.2f} seconds to submit/retrieve batch.")

df = pl.DataFrame(
    collector,
    schema={
        "word": pl.Utf8,
        "syllable_count": pl.Int64,
        "raw_output": pl.Utf8,
    },
    orient="row",
)
print(df)