Tutorial

Build an AI Research Agent

Build an agent that searches the web, scrapes the top results, extracts structured data, and produces a research summary — all through the AlterLab API.

Time Estimate

This tutorial takes about 15 minutes. You will need an AlterLab API key and an OpenAI API key (for the summarization step).

What You'll Build

An AI research agent that follows four steps:

Search

Find relevant pages on the web using the AlterLab Search API.

Scrape

Scrape each result page to get the full content.

Extract

Pull structured data (key facts, dates, authors) from each page using a JSON schema.

Summarize

Feed the extracted data to an LLM to produce a research summary with citations.

Prerequisites

An AlterLab API key with credits
An OpenAI API key (for the LLM summarization step — you can substitute any LLM provider)
Python 3.10+ or Node.js 18+

Bash

pip install requests openai

Step 1: Search for Sources

Start by searching for pages relevant to your research topic. We use time_range: "month" to prioritize recent content and scrape_results: true to fetch full text in one call.

Python

import requests
import time

ALTERLAB_KEY = "YOUR_ALTERLAB_API_KEY"
BASE = "https://api.alterlab.io/api/v1"

def search_and_scrape(topic: str, num_sources: int = 5) -> dict:
    """Search for a topic and scrape the results."""

    response = requests.post(
        f"{BASE}/search",
        headers={"X-API-Key": ALTERLAB_KEY},
        json={
            "query": topic,
            "num_results": num_sources,
            "time_range": "month",
            "scrape_results": True,
            "formats": ["text"],
            "extraction_schema": {
                "type": "object",
                "properties": {
                    "title": {
                        "type": "string",
                        "description": "Article or page title"
                    },
                    "author": {
                        "type": "string",
                        "description": "Author name if available"
                    },
                    "date_published": {
                        "type": "string",
                        "description": "Publication date in YYYY-MM-DD format"
                    },
                    "key_findings": {
                        "type": "array",
                        "items": {"type": "string"},
                        "description": "3-5 main findings, facts, or claims"
                    },
                    "methodology": {
                        "type": "string",
                        "description": "Research methodology if mentioned"
                    }
                },
                "required": ["title", "key_findings"]
            }
        }
    )

    data = response.json()

    # Poll if async (> 5 results)
    if response.status_code == 202:
        search_id = data["search_id"]
        while True:
            status = requests.get(
                f"{BASE}/search/{search_id}",
                headers={"X-API-Key": ALTERLAB_KEY}
            ).json()
            if status["status"] == "completed":
                return status
            time.sleep(2)

    return data

# Run it
results = search_and_scrape("large language model efficiency techniques 2026")
print(f"Found {results['results_count']} sources")

Step 2: Collect Extracted Data

The search + scrape call already extracted structured data. Now collect it into a clean list of sources:

Python

def collect_sources(search_results: dict) -> list[dict]:
    """Collect extracted data from search results into a source list."""

    sources = []
    for result in search_results.get("results", []):
        content = result.get("content") or {}
        extraction = content.get("extraction") or {}

        source = {
            "url": result["url"],
            "title": extraction.get("title", result["title"]),
            "author": extraction.get("author", "Unknown"),
            "date": extraction.get("date_published"),
            "findings": extraction.get("key_findings", []),
            "methodology": extraction.get("methodology"),
            "text_preview": (content.get("text") or "")[:500],
        }
        sources.append(source)

    return sources

sources = collect_sources(results)

# Preview what we collected
for i, s in enumerate(sources, 1):
    print(f"\n[{i}] {s['title']}")
    print(f"    URL: {s['url']}")
    print(f"    Author: {s['author']} | Date: {s['date']}")
    print(f"    Findings: {len(s['findings'])} key points")
    for f in s["findings"]:
        print(f"      - {f}")

Step 3: Summarize with an LLM

Feed the extracted data to an LLM to produce a research summary with proper citations. We use OpenAI here, but any LLM works.

Python

from openai import OpenAI

openai_client = OpenAI(api_key="YOUR_OPENAI_API_KEY")

def summarize_research(topic: str, sources: list[dict]) -> str:
    """Generate a research summary from collected sources."""

    # Build context from sources
    source_texts = []
    for i, s in enumerate(sources, 1):
        findings = "\n".join(f"  - {f}" for f in s["findings"])
        source_texts.append(
            f"[Source {i}] {s['title']}\n"
            f"URL: {s['url']}\n"
            f"Author: {s['author']} | Date: {s['date']}\n"
            f"Key findings:\n{findings}\n"
            f"Text preview: {s['text_preview']}"
        )

    context = "\n\n---\n\n".join(source_texts)

    response = openai_client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {
                "role": "system",
                "content": (
                    "You are a research analyst. Synthesize the provided "
                    "sources into a clear, well-structured research summary. "
                    "Cite sources using [Source N] notation. Highlight areas "
                    "of agreement and disagreement between sources."
                )
            },
            {
                "role": "user",
                "content": (
                    f"Research topic: {topic}\n\n"
                    f"Sources:\n\n{context}\n\n"
                    "Write a research summary (300-500 words) with:\n"
                    "1. Executive summary (2-3 sentences)\n"
                    "2. Key findings (organized by theme)\n"
                    "3. Areas of disagreement\n"
                    "4. Conclusion and recommended next steps"
                )
            }
        ],
        temperature=0.3,
    )

    return response.choices[0].message.content

summary = summarize_research(
    "large language model efficiency techniques 2026",
    sources
)
print(summary)

Full Agent Code

Here is the complete research agent combining all three steps:

Python

"""AI Research Agent — Search, scrape, extract, and summarize."""

import requests
import time
from openai import OpenAI

# Configuration
ALTERLAB_KEY = "YOUR_ALTERLAB_API_KEY"
OPENAI_KEY = "YOUR_OPENAI_API_KEY"
BASE = "https://api.alterlab.io/api/v1"

openai_client = OpenAI(api_key=OPENAI_KEY)

EXTRACTION_SCHEMA = {
    "type": "object",
    "properties": {
        "title": {"type": "string", "description": "Article title"},
        "author": {"type": "string", "description": "Author name"},
        "date_published": {"type": "string", "description": "Date in YYYY-MM-DD"},
        "key_findings": {
            "type": "array",
            "items": {"type": "string"},
            "description": "3-5 main findings or claims"
        },
        "methodology": {"type": "string", "description": "Research method if mentioned"}
    },
    "required": ["title", "key_findings"]
}


def research(topic: str, num_sources: int = 5) -> str:
    """Run the full research pipeline: search -> extract -> summarize."""

    print(f"Researching: {topic}")
    print(f"Searching for {num_sources} sources...\n")

    # Step 1: Search + Scrape + Extract
    response = requests.post(
        f"{BASE}/search",
        headers={"X-API-Key": ALTERLAB_KEY},
        json={
            "query": topic,
            "num_results": num_sources,
            "time_range": "month",
            "scrape_results": True,
            "formats": ["text"],
            "extraction_schema": EXTRACTION_SCHEMA
        }
    )
    data = response.json()

    # Poll if needed
    if response.status_code == 202:
        search_id = data["search_id"]
        while True:
            status = requests.get(
                f"{BASE}/search/{search_id}",
                headers={"X-API-Key": ALTERLAB_KEY}
            ).json()
            print(f"  Scraping: {status['completed']}/{status['results_count']} done")
            if status["status"] == "completed":
                data = status
                break
            time.sleep(2)

    # Step 2: Collect sources
    sources = []
    for result in data.get("results", []):
        content = result.get("content") or {}
        ext = content.get("extraction") or {}
        sources.append({
            "url": result["url"],
            "title": ext.get("title", result["title"]),
            "author": ext.get("author", "Unknown"),
            "date": ext.get("date_published"),
            "findings": ext.get("key_findings", []),
            "methodology": ext.get("methodology"),
            "text": (content.get("text") or "")[:1000],
        })

    print(f"\nCollected {len(sources)} sources:")
    for i, s in enumerate(sources, 1):
        print(f"  [{i}] {s['title']}")

    # Step 3: Summarize
    print("\nGenerating summary...\n")

    source_texts = []
    for i, s in enumerate(sources, 1):
        findings = "\n".join(f"  - {f}" for f in s["findings"])
        source_texts.append(
            f"[Source {i}] {s['title']}\n"
            f"URL: {s['url']}\n"
            f"Author: {s['author']} | Date: {s['date']}\n"
            f"Findings:\n{findings}\n"
            f"Text: {s['text']}"
        )

    context = "\n\n---\n\n".join(source_texts)

    summary = openai_client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {
                "role": "system",
                "content": (
                    "You are a research analyst. Synthesize sources into a "
                    "structured summary. Cite with [Source N]. Highlight "
                    "agreements and disagreements."
                )
            },
            {
                "role": "user",
                "content": (
                    f"Topic: {topic}\n\n{context}\n\n"
                    "Write a 300-500 word summary with:\n"
                    "1. Executive summary\n"
                    "2. Key findings by theme\n"
                    "3. Disagreements\n"
                    "4. Conclusion"
                )
            }
        ],
        temperature=0.3,
    ).choices[0].message.content

    # Add source list
    refs = "\n".join(
        f"[{i}] {s['title']} — {s['url']}"
        for i, s in enumerate(sources, 1)
    )

    return f"{summary}\n\n---\nSources:\n{refs}"


if __name__ == "__main__":
    report = research("large language model efficiency techniques 2026")
    print("=" * 60)
    print(report)

LangChain Integration

Wrap the AlterLab search as a LangChain tool for use in agent chains:

Bash

pip install langchain langchain-openai

Python

from langchain.tools import tool
from langchain_openai import ChatOpenAI
from langchain.agents import AgentExecutor, create_tool_calling_agent
from langchain.prompts import ChatPromptTemplate
import requests
import time

ALTERLAB_KEY = "YOUR_ALTERLAB_API_KEY"
BASE = "https://api.alterlab.io/api/v1"


@tool
def web_search(query: str) -> str:
    """Search the web and return relevant page content. Use this to find
    information about any topic. Returns titles, URLs, and key findings."""

    response = requests.post(
        f"{BASE}/search",
        headers={"X-API-Key": ALTERLAB_KEY},
        json={
            "query": query,
            "num_results": 5,
            "time_range": "month",
            "scrape_results": True,
            "formats": ["text"],
            "extraction_schema": {
                "type": "object",
                "properties": {
                    "title": {"type": "string"},
                    "key_findings": {
                        "type": "array",
                        "items": {"type": "string"},
                        "description": "3-5 key facts"
                    }
                },
                "required": ["title", "key_findings"]
            }
        }
    )
    data = response.json()

    # Poll if async
    if response.status_code == 202:
        search_id = data["search_id"]
        for _ in range(30):
            status = requests.get(
                f"{BASE}/search/{search_id}",
                headers={"X-API-Key": ALTERLAB_KEY}
            ).json()
            if status["status"] == "completed":
                data = status
                break
            time.sleep(2)

    # Format results for the agent
    output = []
    for r in data.get("results", []):
        ext = (r.get("content") or {}).get("extraction") or {}
        findings = ext.get("key_findings", [])
        findings_str = "; ".join(findings) if findings else "No findings extracted"
        output.append(
            f"- {r['title']} ({r['url']}): {findings_str}"
        )

    return "\n".join(output) if output else "No results found."


@tool
def scrape_page(url: str) -> str:
    """Scrape a specific URL and return its text content."""

    response = requests.post(
        f"{BASE}/scrape",
        headers={"X-API-Key": ALTERLAB_KEY},
        json={"url": url, "formats": ["text"]}
    )
    data = response.json()
    return (data.get("text") or "")[:3000]


# Create the agent
llm = ChatOpenAI(model="gpt-4o", temperature=0.3)
tools = [web_search, scrape_page]

prompt = ChatPromptTemplate.from_messages([
    ("system",
     "You are a research assistant. Use the web_search tool to find "
     "information, and scrape_page for deeper reads. Always cite sources."),
    ("human", "{input}"),
    ("placeholder", "{agent_scratchpad}"),
])

agent = create_tool_calling_agent(llm, tools, prompt)
executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

# Run the agent
result = executor.invoke({
    "input": "Research the latest advances in quantum error correction. "
             "Find at least 3 sources and summarize the key breakthroughs."
})
print(result["output"])

Node.js Version

Here is the same research agent in TypeScript:

TYPESCRIPT

import OpenAI from "openai";

const ALTERLAB_KEY = "YOUR_ALTERLAB_API_KEY";
const BASE = "https://api.alterlab.io/api/v1";

const openai = new OpenAI({ apiKey: "YOUR_OPENAI_API_KEY" });

interface Source {
  url: string;
  title: string;
  author: string;
  date: string | null;
  findings: string[];
  text: string;
}

async function research(topic: string, numSources = 5): Promise<string> {
  console.log(`Researching: ${topic}`);

  // Step 1: Search + Scrape + Extract
  const searchRes = await fetch(`${BASE}/search`, {
    method: "POST",
    headers: {
      "X-API-Key": ALTERLAB_KEY,
      "Content-Type": "application/json",
    },
    body: JSON.stringify({
      query: topic,
      num_results: numSources,
      time_range: "month",
      scrape_results: true,
      formats: ["text"],
      extraction_schema: {
        type: "object",
        properties: {
          title: { type: "string" },
          author: { type: "string" },
          date_published: { type: "string" },
          key_findings: {
            type: "array",
            items: { type: "string" },
            description: "3-5 key findings",
          },
        },
        required: ["title", "key_findings"],
      },
    }),
  });

  let data = await searchRes.json();

  // Poll if async
  if (searchRes.status === 202) {
    const searchId = data.search_id;
    while (true) {
      await new Promise((r) => setTimeout(r, 2000));
      const status = await fetch(`${BASE}/search/${searchId}`, {
        headers: { "X-API-Key": ALTERLAB_KEY },
      }).then((r) => r.json());
      console.log(`  Scraping: ${status.completed}/${status.results_count}`);
      if (status.status === "completed") {
        data = status;
        break;
      }
    }
  }

  // Step 2: Collect sources
  const sources: Source[] = data.results.map((r: any) => {
    const ext = r.content?.extraction ?? {};
    return {
      url: r.url,
      title: ext.title ?? r.title,
      author: ext.author ?? "Unknown",
      date: ext.date_published ?? null,
      findings: ext.key_findings ?? [],
      text: (r.content?.text ?? "").slice(0, 1000),
    };
  });

  console.log(`\nCollected ${sources.length} sources`);

  // Step 3: Summarize with LLM
  const context = sources
    .map(
      (s, i) =>
        `[Source ${i + 1}] ${s.title}\n` +
        `URL: ${s.url}\n` +
        `Findings:\n${s.findings.map((f) => `  - ${f}`).join("\n")}\n` +
        `Text: ${s.text}`
    )
    .join("\n\n---\n\n");

  const completion = await openai.chat.completions.create({
    model: "gpt-4o",
    messages: [
      {
        role: "system",
        content:
          "Synthesize sources into a structured summary. Cite with [Source N].",
      },
      {
        role: "user",
        content: `Topic: ${topic}\n\n${context}\n\nWrite a 300-500 word summary.`,
      },
    ],
    temperature: 0.3,
  });

  const summary = completion.choices[0].message.content;
  const refs = sources
    .map((s, i) => `[${i + 1}] ${s.title} — ${s.url}`)
    .join("\n");

  return `${summary}\n\n---\nSources:\n${refs}`;
}

// Run it
const report = await research("large language model efficiency techniques 2026");
console.log(report);

Next Steps

Add Multi-Query Research

Run multiple searches with different queries and combine the results for more comprehensive research. Use different time_range values for historical perspective.

Add Domain-Specific Search

Use the domain parameter to search specific sites (e.g., arxiv.org for papers, news.ycombinator.com for tech discussions).

Use Batch for Scale

For large-scale research, use Batch Scraping to process many URLs in parallel after the initial search.

Explore the Search Guide

See the Search Guide for more patterns: geo-targeted search, time-filtered discovery, and competitive analysis.

← Previous: News Monitoring Search API Reference →

Last updated: March 2026

Tutorial

Build an AI Research Agent

Build an agent that searches the web, scrapes the top results, extracts structured data, and produces a research summary — all through the AlterLab API.

Time Estimate

This tutorial takes about 15 minutes. You will need an AlterLab API key and an OpenAI API key (for the summarization step).

What You'll Build

An AI research agent that follows four steps:

Search

Find relevant pages on the web using the AlterLab Search API.

Scrape

Scrape each result page to get the full content.

Extract

Pull structured data (key facts, dates, authors) from each page using a JSON schema.

Summarize

Feed the extracted data to an LLM to produce a research summary with citations.

Prerequisites

An AlterLab API key with credits
An OpenAI API key (for the LLM summarization step — you can substitute any LLM provider)
Python 3.10+ or Node.js 18+

Bash

pip install requests openai

Step 1: Search for Sources

Start by searching for pages relevant to your research topic. We use time_range: "month" to prioritize recent content and scrape_results: true to fetch full text in one call.

Python

import requests
import time

ALTERLAB_KEY = "YOUR_ALTERLAB_API_KEY"
BASE = "https://api.alterlab.io/api/v1"

def search_and_scrape(topic: str, num_sources: int = 5) -> dict:
    """Search for a topic and scrape the results."""

    response = requests.post(
        f"{BASE}/search",
        headers={"X-API-Key": ALTERLAB_KEY},
        json={
            "query": topic,
            "num_results": num_sources,
            "time_range": "month",
            "scrape_results": True,
            "formats": ["text"],
            "extraction_schema": {
                "type": "object",
                "properties": {
                    "title": {
                        "type": "string",
                        "description": "Article or page title"
                    },
                    "author": {
                        "type": "string",
                        "description": "Author name if available"
                    },
                    "date_published": {
                        "type": "string",
                        "description": "Publication date in YYYY-MM-DD format"
                    },
                    "key_findings": {
                        "type": "array",
                        "items": {"type": "string"},
                        "description": "3-5 main findings, facts, or claims"
                    },
                    "methodology": {
                        "type": "string",
                        "description": "Research methodology if mentioned"
                    }
                },
                "required": ["title", "key_findings"]
            }
        }
    )

    data = response.json()

    # Poll if async (> 5 results)
    if response.status_code == 202:
        search_id = data["search_id"]
        while True:
            status = requests.get(
                f"{BASE}/search/{search_id}",
                headers={"X-API-Key": ALTERLAB_KEY}
            ).json()
            if status["status"] == "completed":
                return status
            time.sleep(2)

    return data

# Run it
results = search_and_scrape("large language model efficiency techniques 2026")
print(f"Found {results['results_count']} sources")

Step 2: Collect Extracted Data

The search + scrape call already extracted structured data. Now collect it into a clean list of sources:

Python

def collect_sources(search_results: dict) -> list[dict]:
    """Collect extracted data from search results into a source list."""

    sources = []
    for result in search_results.get("results", []):
        content = result.get("content") or {}
        extraction = content.get("extraction") or {}

        source = {
            "url": result["url"],
            "title": extraction.get("title", result["title"]),
            "author": extraction.get("author", "Unknown"),
            "date": extraction.get("date_published"),
            "findings": extraction.get("key_findings", []),
            "methodology": extraction.get("methodology"),
            "text_preview": (content.get("text") or "")[:500],
        }
        sources.append(source)

    return sources

sources = collect_sources(results)

# Preview what we collected
for i, s in enumerate(sources, 1):
    print(f"\n[{i}] {s['title']}")
    print(f"    URL: {s['url']}")
    print(f"    Author: {s['author']} | Date: {s['date']}")
    print(f"    Findings: {len(s['findings'])} key points")
    for f in s["findings"]:
        print(f"      - {f}")

Step 3: Summarize with an LLM

Feed the extracted data to an LLM to produce a research summary with proper citations. We use OpenAI here, but any LLM works.

Python

from openai import OpenAI

openai_client = OpenAI(api_key="YOUR_OPENAI_API_KEY")

def summarize_research(topic: str, sources: list[dict]) -> str:
    """Generate a research summary from collected sources."""

    # Build context from sources
    source_texts = []
    for i, s in enumerate(sources, 1):
        findings = "\n".join(f"  - {f}" for f in s["findings"])
        source_texts.append(
            f"[Source {i}] {s['title']}\n"
            f"URL: {s['url']}\n"
            f"Author: {s['author']} | Date: {s['date']}\n"
            f"Key findings:\n{findings}\n"
            f"Text preview: {s['text_preview']}"
        )

    context = "\n\n---\n\n".join(source_texts)

    response = openai_client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {
                "role": "system",
                "content": (
                    "You are a research analyst. Synthesize the provided "
                    "sources into a clear, well-structured research summary. "
                    "Cite sources using [Source N] notation. Highlight areas "
                    "of agreement and disagreement between sources."
                )
            },
            {
                "role": "user",
                "content": (
                    f"Research topic: {topic}\n\n"
                    f"Sources:\n\n{context}\n\n"
                    "Write a research summary (300-500 words) with:\n"
                    "1. Executive summary (2-3 sentences)\n"
                    "2. Key findings (organized by theme)\n"
                    "3. Areas of disagreement\n"
                    "4. Conclusion and recommended next steps"
                )
            }
        ],
        temperature=0.3,
    )

    return response.choices[0].message.content

summary = summarize_research(
    "large language model efficiency techniques 2026",
    sources
)
print(summary)

Full Agent Code

Here is the complete research agent combining all three steps:

Python

"""AI Research Agent — Search, scrape, extract, and summarize."""

import requests
import time
from openai import OpenAI

# Configuration
ALTERLAB_KEY = "YOUR_ALTERLAB_API_KEY"
OPENAI_KEY = "YOUR_OPENAI_API_KEY"
BASE = "https://api.alterlab.io/api/v1"

openai_client = OpenAI(api_key=OPENAI_KEY)

EXTRACTION_SCHEMA = {
    "type": "object",
    "properties": {
        "title": {"type": "string", "description": "Article title"},
        "author": {"type": "string", "description": "Author name"},
        "date_published": {"type": "string", "description": "Date in YYYY-MM-DD"},
        "key_findings": {
            "type": "array",
            "items": {"type": "string"},
            "description": "3-5 main findings or claims"
        },
        "methodology": {"type": "string", "description": "Research method if mentioned"}
    },
    "required": ["title", "key_findings"]
}


def research(topic: str, num_sources: int = 5) -> str:
    """Run the full research pipeline: search -> extract -> summarize."""

    print(f"Researching: {topic}")
    print(f"Searching for {num_sources} sources...\n")

    # Step 1: Search + Scrape + Extract
    response = requests.post(
        f"{BASE}/search",
        headers={"X-API-Key": ALTERLAB_KEY},
        json={
            "query": topic,
            "num_results": num_sources,
            "time_range": "month",
            "scrape_results": True,
            "formats": ["text"],
            "extraction_schema": EXTRACTION_SCHEMA
        }
    )
    data = response.json()

    # Poll if needed
    if response.status_code == 202:
        search_id = data["search_id"]
        while True:
            status = requests.get(
                f"{BASE}/search/{search_id}",
                headers={"X-API-Key": ALTERLAB_KEY}
            ).json()
            print(f"  Scraping: {status['completed']}/{status['results_count']} done")
            if status["status"] == "completed":
                data = status
                break
            time.sleep(2)

    # Step 2: Collect sources
    sources = []
    for result in data.get("results", []):
        content = result.get("content") or {}
        ext = content.get("extraction") or {}
        sources.append({
            "url": result["url"],
            "title": ext.get("title", result["title"]),
            "author": ext.get("author", "Unknown"),
            "date": ext.get("date_published"),
            "findings": ext.get("key_findings", []),
            "methodology": ext.get("methodology"),
            "text": (content.get("text") or "")[:1000],
        })

    print(f"\nCollected {len(sources)} sources:")
    for i, s in enumerate(sources, 1):
        print(f"  [{i}] {s['title']}")

    # Step 3: Summarize
    print("\nGenerating summary...\n")

    source_texts = []
    for i, s in enumerate(sources, 1):
        findings = "\n".join(f"  - {f}" for f in s["findings"])
        source_texts.append(
            f"[Source {i}] {s['title']}\n"
            f"URL: {s['url']}\n"
            f"Author: {s['author']} | Date: {s['date']}\n"
            f"Findings:\n{findings}\n"
            f"Text: {s['text']}"
        )

    context = "\n\n---\n\n".join(source_texts)

    summary = openai_client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {
                "role": "system",
                "content": (
                    "You are a research analyst. Synthesize sources into a "
                    "structured summary. Cite with [Source N]. Highlight "
                    "agreements and disagreements."
                )
            },
            {
                "role": "user",
                "content": (
                    f"Topic: {topic}\n\n{context}\n\n"
                    "Write a 300-500 word summary with:\n"
                    "1. Executive summary\n"
                    "2. Key findings by theme\n"
                    "3. Disagreements\n"
                    "4. Conclusion"
                )
            }
        ],
        temperature=0.3,
    ).choices[0].message.content

    # Add source list
    refs = "\n".join(
        f"[{i}] {s['title']} — {s['url']}"
        for i, s in enumerate(sources, 1)
    )

    return f"{summary}\n\n---\nSources:\n{refs}"


if __name__ == "__main__":
    report = research("large language model efficiency techniques 2026")
    print("=" * 60)
    print(report)

LangChain Integration

Wrap the AlterLab search as a LangChain tool for use in agent chains:

Bash

pip install langchain langchain-openai

Python

from langchain.tools import tool
from langchain_openai import ChatOpenAI
from langchain.agents import AgentExecutor, create_tool_calling_agent
from langchain.prompts import ChatPromptTemplate
import requests
import time

ALTERLAB_KEY = "YOUR_ALTERLAB_API_KEY"
BASE = "https://api.alterlab.io/api/v1"


@tool
def web_search(query: str) -> str:
    """Search the web and return relevant page content. Use this to find
    information about any topic. Returns titles, URLs, and key findings."""

    response = requests.post(
        f"{BASE}/search",
        headers={"X-API-Key": ALTERLAB_KEY},
        json={
            "query": query,
            "num_results": 5,
            "time_range": "month",
            "scrape_results": True,
            "formats": ["text"],
            "extraction_schema": {
                "type": "object",
                "properties": {
                    "title": {"type": "string"},
                    "key_findings": {
                        "type": "array",
                        "items": {"type": "string"},
                        "description": "3-5 key facts"
                    }
                },
                "required": ["title", "key_findings"]
            }
        }
    )
    data = response.json()

    # Poll if async
    if response.status_code == 202:
        search_id = data["search_id"]
        for _ in range(30):
            status = requests.get(
                f"{BASE}/search/{search_id}",
                headers={"X-API-Key": ALTERLAB_KEY}
            ).json()
            if status["status"] == "completed":
                data = status
                break
            time.sleep(2)

    # Format results for the agent
    output = []
    for r in data.get("results", []):
        ext = (r.get("content") or {}).get("extraction") or {}
        findings = ext.get("key_findings", [])
        findings_str = "; ".join(findings) if findings else "No findings extracted"
        output.append(
            f"- {r['title']} ({r['url']}): {findings_str}"
        )

    return "\n".join(output) if output else "No results found."


@tool
def scrape_page(url: str) -> str:
    """Scrape a specific URL and return its text content."""

    response = requests.post(
        f"{BASE}/scrape",
        headers={"X-API-Key": ALTERLAB_KEY},
        json={"url": url, "formats": ["text"]}
    )
    data = response.json()
    return (data.get("text") or "")[:3000]


# Create the agent
llm = ChatOpenAI(model="gpt-4o", temperature=0.3)
tools = [web_search, scrape_page]

prompt = ChatPromptTemplate.from_messages([
    ("system",
     "You are a research assistant. Use the web_search tool to find "
     "information, and scrape_page for deeper reads. Always cite sources."),
    ("human", "{input}"),
    ("placeholder", "{agent_scratchpad}"),
])

agent = create_tool_calling_agent(llm, tools, prompt)
executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

# Run the agent
result = executor.invoke({
    "input": "Research the latest advances in quantum error correction. "
             "Find at least 3 sources and summarize the key breakthroughs."
})
print(result["output"])

Node.js Version

Here is the same research agent in TypeScript:

TYPESCRIPT

import OpenAI from "openai";

const ALTERLAB_KEY = "YOUR_ALTERLAB_API_KEY";
const BASE = "https://api.alterlab.io/api/v1";

const openai = new OpenAI({ apiKey: "YOUR_OPENAI_API_KEY" });

interface Source {
  url: string;
  title: string;
  author: string;
  date: string | null;
  findings: string[];
  text: string;
}

async function research(topic: string, numSources = 5): Promise<string> {
  console.log(`Researching: ${topic}`);

  // Step 1: Search + Scrape + Extract
  const searchRes = await fetch(`${BASE}/search`, {
    method: "POST",
    headers: {
      "X-API-Key": ALTERLAB_KEY,
      "Content-Type": "application/json",
    },
    body: JSON.stringify({
      query: topic,
      num_results: numSources,
      time_range: "month",
      scrape_results: true,
      formats: ["text"],
      extraction_schema: {
        type: "object",
        properties: {
          title: { type: "string" },
          author: { type: "string" },
          date_published: { type: "string" },
          key_findings: {
            type: "array",
            items: { type: "string" },
            description: "3-5 key findings",
          },
        },
        required: ["title", "key_findings"],
      },
    }),
  });

  let data = await searchRes.json();

  // Poll if async
  if (searchRes.status === 202) {
    const searchId = data.search_id;
    while (true) {
      await new Promise((r) => setTimeout(r, 2000));
      const status = await fetch(`${BASE}/search/${searchId}`, {
        headers: { "X-API-Key": ALTERLAB_KEY },
      }).then((r) => r.json());
      console.log(`  Scraping: ${status.completed}/${status.results_count}`);
      if (status.status === "completed") {
        data = status;
        break;
      }
    }
  }

  // Step 2: Collect sources
  const sources: Source[] = data.results.map((r: any) => {
    const ext = r.content?.extraction ?? {};
    return {
      url: r.url,
      title: ext.title ?? r.title,
      author: ext.author ?? "Unknown",
      date: ext.date_published ?? null,
      findings: ext.key_findings ?? [],
      text: (r.content?.text ?? "").slice(0, 1000),
    };
  });

  console.log(`\nCollected ${sources.length} sources`);

  // Step 3: Summarize with LLM
  const context = sources
    .map(
      (s, i) =>
        `[Source ${i + 1}] ${s.title}\n` +
        `URL: ${s.url}\n` +
        `Findings:\n${s.findings.map((f) => `  - ${f}`).join("\n")}\n` +
        `Text: ${s.text}`
    )
    .join("\n\n---\n\n");

  const completion = await openai.chat.completions.create({
    model: "gpt-4o",
    messages: [
      {
        role: "system",
        content:
          "Synthesize sources into a structured summary. Cite with [Source N].",
      },
      {
        role: "user",
        content: `Topic: ${topic}\n\n${context}\n\nWrite a 300-500 word summary.`,
      },
    ],
    temperature: 0.3,
  });

  const summary = completion.choices[0].message.content;
  const refs = sources
    .map((s, i) => `[${i + 1}] ${s.title} — ${s.url}`)
    .join("\n");

  return `${summary}\n\n---\nSources:\n${refs}`;
}

// Run it
const report = await research("large language model efficiency techniques 2026");
console.log(report);

Next Steps

Add Multi-Query Research

Run multiple searches with different queries and combine the results for more comprehensive research. Use different time_range values for historical perspective.

Add Domain-Specific Search

Use the domain parameter to search specific sites (e.g., arxiv.org for papers, news.ycombinator.com for tech discussions).

Use Batch for Scale

For large-scale research, use Batch Scraping to process many URLs in parallel after the initial search.

Explore the Search Guide

See the Search Guide for more patterns: geo-targeted search, time-filtered discovery, and competitive analysis.

← Previous: News Monitoring Search API Reference →

Last updated: March 2026