Quick Start

This guide shows the fastest way to extract data from documents using an existing template. Perfect for getting started quickly.

This example assumes you already have a workspace and template set up. For creating templates from scratch, see the Full Workflow guide.

Prerequisites

A Raydocs API token with sessions-write ability
An existing extraction template ID
Documents to process (PDF, PNG, JPG)

The 3-Step Process

Upload Files

Upload your documents to temporary storage using signed URLs

Create Sessions

Create extraction sessions with auto_extract: true to start processing automatically

Get Results

Poll for completion and retrieve your structured data

Complete Example

Python
TypeScript
PHP

import time
from raydocs_client import RaydocsClient

# Initialize client
client = RaydocsClient("your_api_token")

# Your template ID (from Raydocs dashboard or API)
TEMPLATE_ID = "550e8400-e29b-41d4-a716-446655440000"

# ─────────────────────────────────────────────────────────────
# Step 1: Upload your documents
# ─────────────────────────────────────────────────────────────
documents = ["invoice1.pdf", "invoice2.pdf", "receipt.pdf"]

file_keys = []
for doc in documents:
    print(f"Uploading {doc}...")
    key = client.upload_file(doc)
    file_keys.append(key)

print(f"✓ Uploaded {len(file_keys)} documents")

# ─────────────────────────────────────────────────────────────
# Step 2: Create sessions with auto-extract enabled
# ─────────────────────────────────────────────────────────────
sessions = client.batch_create_sessions(
    template_id=TEMPLATE_ID,
    file_keys=file_keys,
    auto_extract=True  # Extraction starts automatically!
)

print(f"✓ Created {len(sessions)} extraction sessions")

# ─────────────────────────────────────────────────────────────
# Step 3: Poll for results and retrieve extracted data
# ─────────────────────────────────────────────────────────────
for i, session in enumerate(sessions):
    print(f"\nProcessing {documents[i]}...")
    
    # Poll until extraction completes
    while True:
        results = client.get_results(session['id'])
        
        if results:
            result = results[0]
            if result['status'] == 'completed':
                # Get full result with extracted data
                full_result = client.get_result(result['id'])
                print(f"✅ Extraction complete!")
                print(f"   Extracted data: {full_result['data']}")
                break
            elif result['status'] == 'failed':
                print(f"❌ Extraction failed")
                break
        
        time.sleep(5)  # Wait 5 seconds before polling again

import { RaydocsClient } from './raydocs-client';

async function extractDocuments() {
  const client = new RaydocsClient('your_api_token');
  
  // Your template ID
  const TEMPLATE_ID = '550e8400-e29b-41d4-a716-446655440000';
  
  // ─────────────────────────────────────────────────────────────
  // Step 1: Upload your documents
  // ─────────────────────────────────────────────────────────────
  const documents = ['invoice1.pdf', 'invoice2.pdf', 'receipt.pdf'];
  
  const fileKeys: string[] = [];
  for (const doc of documents) {
    console.log(`Uploading ${doc}...`);
    const key = await client.uploadFile(doc);
    fileKeys.push(key);
  }
  
  console.log(`✓ Uploaded ${fileKeys.length} documents`);
  
  // ─────────────────────────────────────────────────────────────
  // Step 2: Create sessions with auto-extract enabled
  // ─────────────────────────────────────────────────────────────
  const sessions = await client.batchCreateSessions(
    TEMPLATE_ID,
    fileKeys,
    true // auto_extract
  );
  
  console.log(`✓ Created ${sessions.length} extraction sessions`);
  
  // ─────────────────────────────────────────────────────────────
  // Step 3: Poll for results
  // ─────────────────────────────────────────────────────────────
  for (let i = 0; i < sessions.length; i++) {
    console.log(`\nProcessing ${documents[i]}...`);
    
    // Poll until extraction completes
    while (true) {
      const results = await client.getResults(sessions[i].id);
      
      if (results.length > 0) {
        const result = results[0];
        if (result.status === 'completed') {
          const fullResult = await client.getResult(result.id);
          console.log('✅ Extraction complete!');
          console.log('   Extracted data:', fullResult.data);
          break;
        } else if (result.status === 'failed') {
          console.log('❌ Extraction failed');
          break;
        }
      }
      
      await new Promise(r => setTimeout(r, 5000)); // Wait 5 seconds
    }
  }
}

extractDocuments();

<?php

require_once 'vendor/autoload.php';
require_once 'RaydocsClient.php';

// Initialize client
$client = new RaydocsClient('your_api_token');

// Your template ID (from Raydocs dashboard or API)
$templateId = '550e8400-e29b-41d4-a716-446655440000';

// ─────────────────────────────────────────────────────────────
// Step 1: Upload your documents
// ─────────────────────────────────────────────────────────────
$documents = ['invoice1.pdf', 'invoice2.pdf', 'receipt.pdf'];

$fileKeys = [];
foreach ($documents as $doc) {
    echo "Uploading {$doc}...\n";
    $fileKeys[] = $client->uploadFile($doc);
}

echo "✓ Uploaded " . count($fileKeys) . " documents\n";

// ─────────────────────────────────────────────────────────────
// Step 2: Create sessions with auto-extract enabled
// ─────────────────────────────────────────────────────────────
$sessions = $client->batchCreateSessions(
    $templateId,
    $fileKeys,
    true // auto_extract - extraction starts automatically!
);

echo "✓ Created " . count($sessions) . " extraction sessions\n";

// ─────────────────────────────────────────────────────────────
// Step 3: Poll for results and retrieve extracted data
// ─────────────────────────────────────────────────────────────
foreach ($sessions as $i => $session) {
    echo "\nProcessing {$documents[$i]}...\n";
    
    // Poll until extraction completes
    while (true) {
        $results = $client->getResults($session['id']);
        
        if (!empty($results)) {
            $result = $results[0];
            if ($result['status'] === 'completed') {
                $fullResult = $client->getResult($result['id']);
                echo "✅ Extraction complete!\n";
                echo "   Extracted data: " . json_encode($fullResult['data'], JSON_PRETTY_PRINT) . "\n";
                break;
            } elseif ($result['status'] === 'failed') {
                echo "❌ Extraction failed\n";
                break;
            }
        }
        
        sleep(5); // Wait 5 seconds before polling again
    }
}

Understanding the Response

When extraction completes, you get structured data matching your template schema:

{
  "id": "result-uuid-here",
  "status": "completed",
  "data": {
    "invoice_header": {
      "invoice_number": "INV-2024-001",
      "invoice_date": "2024-01-15",
      "total_amount": 1250.00,
      "currency": "USD"
    },
    "vendor_info": {
      "vendor_name": "Acme Corp",
      "vendor_address": "123 Business St, City, ST 12345"
    },
    "line_items": {
      "items": [
        {
          "description": "Consulting Services",
          "quantity": 10,
          "unit_price": 100.00,
          "total": 1000.00
        },
        {
          "description": "Travel Expenses",
          "quantity": 1,
          "unit_price": 250.00,
          "total": 250.00
        }
      ]
    }
  },
  "reasoning": {
    "invoice_header": {
      "invoice_number": {
        "reasoning": "Found 'Invoice #: INV-2024-001' in header section on page 1",
        "confidence": 0.95
      }
    }
  }
}

Error Handling

Rate Limiting (429)

If you hit rate limits, implement exponential backoff:

import time

def upload_with_retry(client, file_path, max_retries=3):
    for attempt in range(max_retries):
        try:
            return client.upload_file(file_path)
        except requests.HTTPError as e:
            if e.response.status_code == 429:
                wait = 2 ** attempt * 10  # 10s, 20s, 40s
                print(f"Rate limited, waiting {wait}s...")
                time.sleep(wait)
            else:
                raise
    raise Exception("Max retries exceeded")

Failed Extractions

Check the result status and handle failures gracefully:

results = client.get_results(session_id)
if results and results[0]['status'] == 'failed':
    # Log error, retry, or notify
    print(f"Extraction failed for session {session_id}")

Overview

Cookbook

Workspaces

Workspace Users

Extraction Templates

Extraction Sessions

Batch Operations

Documents

Results

Prerequisites

The 3-Step Process

Complete Example

Understanding the Response

Error Handling

Next Steps

Full Workflow

Extraction Schema

Overview

Cookbook

Workspaces

Workspace Users

Extraction Templates

Extraction Sessions

Batch Operations

Documents

Results

​Prerequisites

​The 3-Step Process

​Complete Example

​Understanding the Response

​Error Handling

​Next Steps

Full Workflow

Extraction Schema

Prerequisites

The 3-Step Process

Complete Example

Understanding the Response

Error Handling

Next Steps