Async API¶
unifex provides native async/await support for integration with async applications.
Basic Async Extraction¶
import asyncio
from unifex import create_extractor, ExtractorType
async def extract_document():
with create_extractor("document.pdf", ExtractorType.PDF) as extractor:
result = await extractor.extract_async(max_workers=2)
return result.document
doc = asyncio.run(extract_document())
print(f"Extracted {len(doc.pages)} pages asynchronously")
Async LLM Extraction¶
import asyncio
from unifex.llm import extract_structured_async
async def extract():
result = await extract_structured_async(
"document.pdf",
model="openai/gpt-4o",
max_workers=4,
)
return result.data
data = asyncio.run(extract())
Using with FastAPI¶
from fastapi import FastAPI, UploadFile
from unifex import create_extractor, ExtractorType
app = FastAPI()
@app.post("/extract")
async def extract_document(file: UploadFile):
# Save uploaded file temporarily
content = await file.read()
temp_path = f"/tmp/{file.filename}"
with open(temp_path, "wb") as f:
f.write(content)
# Extract asynchronously
with create_extractor(temp_path, ExtractorType.PDF) as extractor:
result = await extractor.extract_async()
return {"pages": len(result.document.pages)}
Concurrent Document Processing¶
Process multiple documents concurrently:
import asyncio
from unifex import create_extractor, ExtractorType
async def extract_one(path: str):
with create_extractor(path, ExtractorType.PDF) as extractor:
return await extractor.extract_async()
async def extract_many(paths: list[str]):
tasks = [extract_one(p) for p in paths]
return await asyncio.gather(*tasks)
# Using the same file twice for demo
paths = ["document.pdf", "document.pdf"]
results = asyncio.run(extract_many(paths))
print(f"Extracted {len(results)} documents concurrently")