This example assumes you already have a workspace and template set up. For creating templates from scratch, see the Full Workflow guide.
Prerequisites
- A Raydocs API token with
sessions-writeability - An existing extraction template ID
- Documents to process (PDF, PNG, JPG)
The 3-Step Process
1
Upload Files
Upload your documents to temporary storage using signed URLs
2
Create Sessions
Create extraction sessions with
auto_extract: true to start processing automatically3
Get Results
Poll for completion and retrieve your structured data
Complete Example
- Python
- TypeScript
- PHP
Copy
import time
from raydocs_client import RaydocsClient
# Initialize client
client = RaydocsClient("your_api_token")
# Your template ID (from Raydocs dashboard or API)
TEMPLATE_ID = "550e8400-e29b-41d4-a716-446655440000"
# ─────────────────────────────────────────────────────────────
# Step 1: Upload your documents
# ─────────────────────────────────────────────────────────────
documents = ["invoice1.pdf", "invoice2.pdf", "receipt.pdf"]
file_keys = []
for doc in documents:
print(f"Uploading {doc}...")
key = client.upload_file(doc)
file_keys.append(key)
print(f"✓ Uploaded {len(file_keys)} documents")
# ─────────────────────────────────────────────────────────────
# Step 2: Create sessions with auto-extract enabled
# ─────────────────────────────────────────────────────────────
sessions = client.batch_create_sessions(
template_id=TEMPLATE_ID,
file_keys=file_keys,
auto_extract=True # Extraction starts automatically!
)
print(f"✓ Created {len(sessions)} extraction sessions")
# ─────────────────────────────────────────────────────────────
# Step 3: Poll for results and retrieve extracted data
# ─────────────────────────────────────────────────────────────
for i, session in enumerate(sessions):
print(f"\nProcessing {documents[i]}...")
# Poll until extraction completes
while True:
results = client.get_results(session['id'])
if results:
result = results[0]
if result['status'] == 'completed':
# Get full result with extracted data
full_result = client.get_result(result['id'])
print(f"✅ Extraction complete!")
print(f" Extracted data: {full_result['data']}")
break
elif result['status'] == 'failed':
print(f"❌ Extraction failed")
break
time.sleep(5) # Wait 5 seconds before polling again
Copy
import { RaydocsClient } from './raydocs-client';
async function extractDocuments() {
const client = new RaydocsClient('your_api_token');
// Your template ID
const TEMPLATE_ID = '550e8400-e29b-41d4-a716-446655440000';
// ─────────────────────────────────────────────────────────────
// Step 1: Upload your documents
// ─────────────────────────────────────────────────────────────
const documents = ['invoice1.pdf', 'invoice2.pdf', 'receipt.pdf'];
const fileKeys: string[] = [];
for (const doc of documents) {
console.log(`Uploading ${doc}...`);
const key = await client.uploadFile(doc);
fileKeys.push(key);
}
console.log(`✓ Uploaded ${fileKeys.length} documents`);
// ─────────────────────────────────────────────────────────────
// Step 2: Create sessions with auto-extract enabled
// ─────────────────────────────────────────────────────────────
const sessions = await client.batchCreateSessions(
TEMPLATE_ID,
fileKeys,
true // auto_extract
);
console.log(`✓ Created ${sessions.length} extraction sessions`);
// ─────────────────────────────────────────────────────────────
// Step 3: Poll for results
// ─────────────────────────────────────────────────────────────
for (let i = 0; i < sessions.length; i++) {
console.log(`\nProcessing ${documents[i]}...`);
// Poll until extraction completes
while (true) {
const results = await client.getResults(sessions[i].id);
if (results.length > 0) {
const result = results[0];
if (result.status === 'completed') {
const fullResult = await client.getResult(result.id);
console.log('✅ Extraction complete!');
console.log(' Extracted data:', fullResult.data);
break;
} else if (result.status === 'failed') {
console.log('❌ Extraction failed');
break;
}
}
await new Promise(r => setTimeout(r, 5000)); // Wait 5 seconds
}
}
}
extractDocuments();
Copy
<?php
require_once 'vendor/autoload.php';
require_once 'RaydocsClient.php';
// Initialize client
$client = new RaydocsClient('your_api_token');
// Your template ID (from Raydocs dashboard or API)
$templateId = '550e8400-e29b-41d4-a716-446655440000';
// ─────────────────────────────────────────────────────────────
// Step 1: Upload your documents
// ─────────────────────────────────────────────────────────────
$documents = ['invoice1.pdf', 'invoice2.pdf', 'receipt.pdf'];
$fileKeys = [];
foreach ($documents as $doc) {
echo "Uploading {$doc}...\n";
$fileKeys[] = $client->uploadFile($doc);
}
echo "✓ Uploaded " . count($fileKeys) . " documents\n";
// ─────────────────────────────────────────────────────────────
// Step 2: Create sessions with auto-extract enabled
// ─────────────────────────────────────────────────────────────
$sessions = $client->batchCreateSessions(
$templateId,
$fileKeys,
true // auto_extract - extraction starts automatically!
);
echo "✓ Created " . count($sessions) . " extraction sessions\n";
// ─────────────────────────────────────────────────────────────
// Step 3: Poll for results and retrieve extracted data
// ─────────────────────────────────────────────────────────────
foreach ($sessions as $i => $session) {
echo "\nProcessing {$documents[$i]}...\n";
// Poll until extraction completes
while (true) {
$results = $client->getResults($session['id']);
if (!empty($results)) {
$result = $results[0];
if ($result['status'] === 'completed') {
$fullResult = $client->getResult($result['id']);
echo "✅ Extraction complete!\n";
echo " Extracted data: " . json_encode($fullResult['data'], JSON_PRETTY_PRINT) . "\n";
break;
} elseif ($result['status'] === 'failed') {
echo "❌ Extraction failed\n";
break;
}
}
sleep(5); // Wait 5 seconds before polling again
}
}
Understanding the Response
When extraction completes, you get structured data matching your template schema:Copy
{
"id": "result-uuid-here",
"status": "completed",
"data": {
"invoice_header": {
"invoice_number": "INV-2024-001",
"invoice_date": "2024-01-15",
"total_amount": 1250.00,
"currency": "USD"
},
"vendor_info": {
"vendor_name": "Acme Corp",
"vendor_address": "123 Business St, City, ST 12345"
},
"line_items": {
"items": [
{
"description": "Consulting Services",
"quantity": 10,
"unit_price": 100.00,
"total": 1000.00
},
{
"description": "Travel Expenses",
"quantity": 1,
"unit_price": 250.00,
"total": 250.00
}
]
}
},
"reasoning": {
"invoice_header": {
"invoice_number": {
"reasoning": "Found 'Invoice #: INV-2024-001' in header section on page 1",
"confidence": 0.95
}
}
}
}
Error Handling
Rate Limiting (429)
Rate Limiting (429)
If you hit rate limits, implement exponential backoff:
Copy
import time
def upload_with_retry(client, file_path, max_retries=3):
for attempt in range(max_retries):
try:
return client.upload_file(file_path)
except requests.HTTPError as e:
if e.response.status_code == 429:
wait = 2 ** attempt * 10 # 10s, 20s, 40s
print(f"Rate limited, waiting {wait}s...")
time.sleep(wait)
else:
raise
raise Exception("Max retries exceeded")
Failed Extractions
Failed Extractions
Check the result status and handle failures gracefully:
Copy
results = client.get_results(session_id)
if results and results[0]['status'] == 'failed':
# Log error, retry, or notify
print(f"Extraction failed for session {session_id}")
