AlterLabAlterLab
SDK
Python

Python SDK

The official Python SDK for AlterLab. Simple, type-safe, and async-ready.

PyPI versionPython versionsGitHub starsLicense
Zero Dependencies
Full Type Hints
Async Support
Python 3.8+

Installation

pip install alterlab
poetry add alterlab
pipenv install alterlab

Quick Start

from alterlab import AlterLab

# Initialize the client
client = AlterLab(api_key="sk_live_...")  # or set ALTERLAB_API_KEY env var

# Scrape a webpage
result = client.scrape("https://example.com")

# Access the content
print(result.text)          # Extracted text content
print(result.html)          # Raw HTML
print(result.json)          # Structured JSON (Schema.org, metadata)
print(result.status_code)   # HTTP status code

# Access billing info
print(result.billing.cost_dollars)  # Cost in USD
print(result.billing.tier_used)     # Which tier was used

Environment Variable

You can set ALTERLAB_API_KEY environment variable instead of passing the key directly.

Client Options

from alterlab import AlterLab

# Basic initialization
client = AlterLab(api_key="sk_live_...")

# With all options
client = AlterLab(
    api_key="sk_live_...",
    base_url="https://alterlab.io",  # Custom endpoint (optional)
    timeout=120,                      # Request timeout in seconds
    max_retries=3,                    # Auto-retry on transient failures
    retry_delay=1.0                   # Initial retry delay (exponential backoff)
)

# From environment variable
import os
os.environ["ALTERLAB_API_KEY"] = "sk_live_..."
client = AlterLab()  # Reads from ALTERLAB_API_KEY
OptionTypeDefaultDescription
api_keystrenv varYour AlterLab API key
base_urlstrhttps://alterlab.ioAPI base URL
timeoutint120Request timeout in seconds
max_retriesint3Max retries on transient failures
retry_delayfloat1.0Initial retry delay in seconds

Scraping Methods

client.scrape(url, **options)

Main scraping method with intelligent tier escalation.

# Auto mode - intelligent tier escalation
result = client.scrape("https://example.com")
print(result.text)          # Extracted text
print(result.json)          # Structured JSON data
print(result.billing.cost_dollars)  # Cost in USD

client.scrape_html(url)

Fast HTML-only scraping. Best for static sites.

# Force HTML-only mode (fastest, cheapest)
result = client.scrape_html("https://example.com")
print(result.html)  # Raw HTML content

client.scrape_js(url, **options)

JavaScript rendering for SPAs and dynamic content.

# Full JavaScript rendering
result = client.scrape_js(
    "https://spa-app.com",
    screenshot=True,        # Capture screenshot
    wait_for="#content"     # Wait for selector
)
print(result.screenshot_url)  # Screenshot URL

client.scrape_pdf(url, format="text")

Extract text from PDF documents.

result = client.scrape_pdf(
    "https://example.com/document.pdf",
    format="markdown"  # "text" or "markdown"
)
print(result.text)

client.scrape_ocr(url, language="eng")

Extract text from images using OCR.

result = client.scrape_ocr(
    "https://example.com/image.png",
    language="eng"  # eng, fra, deu, jpn, etc.
)
print(result.text)

Structured Extraction

Extract structured data using JSON Schema, natural language prompts, or pre-built profiles.

JSON Schema Extraction

result = client.scrape(
    "https://store.com/product/123",
    extraction_schema={
        "type": "object",
        "properties": {
            "name": {"type": "string"},
            "price": {"type": "number"},
            "in_stock": {"type": "boolean"}
        }
    }
)
print(result.json)  # {"name": "...", "price": 29.99, "in_stock": true}

Pre-built Profiles

# Use a pre-built extraction profile
result = client.scrape(
    "https://store.com/product/123",
    extraction_profile="product"  # product, article, job_posting, etc.
)
print(result.json)

Natural Language Prompt

result = client.scrape(
    "https://news.com/article",
    extraction_prompt="Extract the article title, author, and publish date"
)
print(result.json)

Cost Controls

Control costs by limiting tiers, setting budgets, or optimizing for cost vs speed.

from alterlab import AlterLab, CostControls

client = AlterLab(api_key="sk_live_...")

# Limit to cheap tiers only
result = client.scrape(
    "https://example.com",
    cost_controls=CostControls(
        max_tier="2",       # Don't go above HTTP tier
        prefer_cost=True,   # Optimize for lowest cost
        fail_fast=True      # Error instead of escalating
    )
)

# Estimate cost before scraping
estimate = client.estimate_cost("https://linkedin.com")
print(f"Estimated: ${estimate.estimated_cost_dollars:.4f}")
print(f"Confidence: {estimate.confidence}")

Pricing Tiers

TierNamePricePer $1Use Case
1Curl$0.00025,000Static HTML sites
2HTTP$0.00033,333TLS fingerprinting
3Stealth$0.002500Browser checks
4Browser$0.004250JS-heavy SPAs
5Captcha$0.0250CAPTCHA solving

Async Support

Use the async client for concurrent scraping with native asyncio support:

import asyncio
from alterlab import AsyncAlterLab

async def main():
    async with AsyncAlterLab(api_key="sk_live_...") as client:
        # Single request
        result = await client.scrape("https://example.com")
        print(result.text)

        # Concurrent requests (parallel scraping)
        urls = [
            "https://example.com/page1",
            "https://example.com/page2",
            "https://example.com/page3",
        ]

        results = await asyncio.gather(*[client.scrape(url) for url in urls])

        for r in results:
            print(r.title, r.billing.cost_dollars)

asyncio.run(main())

BYOP (Bring Your Own Proxy)

Get 20% discount when using your own proxy. Configure your proxy integration in the dashboard first.

from alterlab import AlterLab, AdvancedOptions

client = AlterLab(api_key="sk_live_...")

# Use your configured proxy integration
result = client.scrape(
    "https://example.com",
    advanced=AdvancedOptions(
        use_own_proxy=True,
        proxy_country="US"  # Optional: request specific geo
    )
)

# Check if BYOP was applied
if result.billing.byop_applied:
    print(f"Saved {result.billing.byop_discount_percent}%!")

20% Discount

When BYOP is successfully applied, you receive a 20% discount on all tier costs.

Error Handling

from alterlab import (
    AlterLab,
    AuthenticationError,
    InsufficientCreditsError,
    RateLimitError,
    ScrapeError,
    TimeoutError
)

client = AlterLab(api_key="sk_live_...")

try:
    result = client.scrape("https://example.com")
    print(result.text)

except AuthenticationError:
    print("Invalid API key")

except InsufficientCreditsError:
    print("Please top up your balance")

except RateLimitError as e:
    print(f"Rate limited. Retry after {e.retry_after}s")

except ScrapeError as e:
    print(f"Scraping failed: {e.message}")

except TimeoutError:
    print("Request timed out")
ExceptionHTTP CodeDescription
AuthenticationError401Invalid or missing API key
InsufficientCreditsError402Insufficient balance
RateLimitError429Too many requests
ScrapeErrorVariousScraping failed
TimeoutError408Request timed out

API Reference

ScrapeResult Object

result.url              # Scraped URL
result.status_code      # HTTP status
result.text             # Extracted text content
result.html             # HTML content
result.json             # Structured JSON content
result.title            # Page title
result.author           # Author (if detected)
result.billing          # BillingDetails object
result.billing.tier_used       # Tier that succeeded
result.billing.cost_dollars    # Final cost in USD
result.screenshot_url   # Screenshot URL (if requested)
result.pdf_url          # PDF URL (if requested)
result.cached           # Whether result was from cache

Check Usage & Balance

usage = client.get_usage()
print(f"Balance: ${usage.balance_dollars:.2f}")
print(f"Used this month: {usage.credits_used_month} credits")

Full Documentation

For complete API reference including all parameters and return types, see the GitHub repository or use your IDE's autocomplete with the full type hints.