Documents
ParaLLeM excels for processing documents in high throughput.
By switching to batch mode, you save 50% on token costs, save CPU time, and can scale up to 1000s of documents.
In this example, we count the number of syllables in a word list of 100k words.
Warning
The following example makes 100k (!) LLM requests. It costs ~$0.85 to complete with OpenAI. It uses "max_output_tokens=20" to save costs, but this is specific to OpenAI! Make sure to adjust kwargs for other providers. Not constraining reasoning can lead to costs of >$100. Use other providers at your own risk.
examples/stress/stress_test.py
# Stress test: use GPT to count syllables for >100k words
import time
from tqdm import tqdm
import polars as pl
import parallem as pllm
df = pl.read_csv(
"examples/stress/txts/words_100k.txt", has_header=False, new_columns=["word"]
).with_columns(pl.col("word").str.strip_chars())
def syllable_count_agent(agt: pllm.AgentContext, word: str):
ct = agt.ask_llm(
f'How many syllables are in "{word}"? Only return the number, no explanation.',
reasoning={"effort": "minimal"},
max_output_tokens=20,
)
try:
return int(ct.final_answer.strip()), None
except ValueError:
return None, ct.final_answer.strip()
# Run the agent on all words
orch = pllm.resume_directory(
".pllm/stress/fresh3/stress_test",
llm="gpt-5-nano",
provider="openai",
strategy="batch",
tweaks={
"batch_max_size": 10000,
},
load_dotenv=True,
)
collector = []
for (word,) in tqdm(df.iter_rows(), total=df.height):
with orch.agent(f"syllable_count_{word}") as agt:
count = syllable_count_agent(agt, word)
collector.append((word, *count))
# Time submission
start_time = time.time()
orch.finalize_and_persist()
end_time = time.time()
print(f"Took {end_time - start_time:.2f} seconds to submit/retrieve batch.")
df = pl.DataFrame(
collector,
schema={
"word": pl.Utf8,
"syllable_count": pl.Int64,
"raw_output": pl.Utf8,
},
orient="row",
)
print(df)