All tutorials

Build a Document Assistant

Create an assistant that understands, summarizes, and answers questions about documents.


Overview

You'll build a document assistant that:

  • Accepts document text input
  • Stores structured document memory
  • Retrieves relevant sections by query
  • Summarizes documents on demand
  • Persists documents across sessions

Expected outcome: A deployed assistant that can summarize PDFs and answer questions from stored documents.


Architecture

Document
  ↓
Document Assistant Agent
  ↓
Memory (structured sections, metadata, embeddings)
  ↓
Response (summary, answer, extracted data)

Prerequisites


Step 1: Create Document Agent

novium agent create doc-assistant

Step 2: Implement Logic

Open agents/doc-assistant/agent.ts:

interface DocumentSection {
  id: string;
  title: string;
  content: string;
}

interface StoredDocument {
  id: string;
  name: string;
  sections: DocumentSection[];
  uploadedAt: string;
}

export default async function agent(input: {
  userId: string;
  action: "upload" | "search" | "summarize" | "list";
  documentName?: string;
  content?: string;
  query?: string;
  documentId?: string;
}) {
  switch (input.action) {
    case "upload":
      return handleUpload(input.userId, input.documentName ?? "untitled", input.content ?? "");
    case "search":
      return handleSearch(input.userId, input.query ?? "");
    case "summarize":
      return handleSummarize(input.userId, input.documentId ?? "");
    case "list":
      return handleList(input.userId);
    default:
      return { error: `Unknown action: ${input.action}` };
  }
}

async function handleUpload(userId: string, name: string, content: string) {
  const docId = `doc_${Date.now()}`;

  // Split document into sections by paragraph
  const sections: DocumentSection[] = content
    .split("\n\n")
    .filter((p) => p.trim())
    .map((paragraph, i) => ({
      id: `${docId}_sec_${i}`,
      title: `Section ${i + 1}`,
      content: paragraph.trim().substring(0, 1000),
    }));

  const document: StoredDocument = {
    id: docId,
    name,
    sections,
    uploadedAt: new Date().toISOString(),
  };

  // Store document summary
  await memory.save({ key: docId, value: document });

  // Index each section for search
  for (const section of sections) {
    await memory.save({
      key: `${docId}:section:${section.id}`,
      value: section,
    });
  }

  // Add to user's document index
  const userDocs = (await memory.get(`user:${userId}:docs`)) ?? [];
  userDocs.push({ id: docId, name, uploadedAt: document.uploadedAt });
  await memory.save({ key: `user:${userId}:docs`, value: userDocs });

  return {
    action: "uploaded",
    documentId: docId,
    name,
    sectionCount: sections.length,
    preview: content.substring(0, 200) + "...",
  };
}

async function handleSearch(userId: string, query: string) {
  const userDocs = (await memory.get(`user:${userId}:docs`)) ?? [];
  const results: Array<{ documentId: string; documentName: string; section: DocumentSection; score: number }> = [];

  for (const doc of userDocs) {
    const stored = await memory.get(doc.id) as StoredDocument | null;
    if (!stored) continue;

    for (const section of stored.sections) {
      const q = query.toLowerCase();
      const c = section.content.toLowerCase();
      if (c.includes(q)) {
        results.push({
          documentId: stored.id,
          documentName: stored.name,
          section,
          score: 1,
        });
      }
    }
  }

  return {
    action: "search",
    query,
    results: results.slice(0, 5),
    totalResults: results.length,
  };
}

async function handleSummarize(userId: string, documentId: string) {
  const doc = await memory.get(documentId) as StoredDocument | null;
  if (!doc) return { error: "Document not found" };

  const totalChars = doc.sections.reduce((sum, s) => sum + s.content.length, 0);
  const sectionSummaries = doc.sections.map((s) => s.content.substring(0, 150) + "...");

  return {
    action: "summarize",
    documentId,
    name: doc.name,
    sectionCount: doc.sections.length,
    totalCharacters: totalChars,
    sectionSummaries,
    uploadedAt: doc.uploadedAt,
  };
}

async function handleList(userId: string) {
  const userDocs = (await memory.get(`user:${userId}:docs`)) ?? [];

  return {
    action: "list",
    documents: userDocs.map((d: any) => ({
      id: d.id,
      name: d.name,
      uploadedAt: d.uploadedAt,
    })),
    totalDocuments: userDocs.length,
  };
}

| Action | Description | | ------ | ----------- | | upload | Split a document into sections and store with memory indexing | | search | Find sections across all documents matching a query | | summarize | Return section-level summaries of a document | | list | List all uploaded documents for a user |


Step 3: Add Workflow

novium workflow create doc-pipeline

Open workflows/doc-pipeline/workflow.ts:

export default {
  trigger: { type: "http", method: "POST", path: "/doc" },
  steps: [
    {
      id: "process-document",
      agent: "doc-assistant",
      input: {
        userId: "$input.userId",
        action: "$input.action",
        documentName: "$input.documentName",
        content: "$input.content",
        query: "$input.query",
        documentId: "$input.documentId",
      },
    },
    { id: "log-result", action: "log", message: "Document action: $input.action" },
  ],
};
Trigger (HTTP POST /doc)
  ↓
process-document  ← doc-assistant
  ↓
log-result        ← log action type

Step 4: Run Locally

novium agent dev

Upload a document:

curl -X POST http://localhost:3000 \
  -H "Content-Type: application/json" \
  -d '{
    "userId": "user-1",
    "action": "upload",
    "documentName": "Q4 Report",
    "content": "Revenue grew 15% in Q4. Customer retention improved to 92%. The AI product line contributed 40% of new revenue. International expansion into 3 new markets is planned for Q1."
  }'
{
  "action": "uploaded",
  "documentId": "doc_1705310000000",
  "name": "Q4 Report",
  "sectionCount": 1,
  "preview": "Revenue grew 15% in Q4..."
}

Search the document:

curl -X POST http://localhost:3000 \
  -H "Content-Type: application/json" \
  -d '{"userId": "user-1", "action": "search", "query": "revenue"}'
{
  "action": "search",
  "query": "revenue",
  "results": [
    {
      "documentId": "doc_1705310000000",
      "documentName": "Q4 Report",
      "section": { "content": "Revenue grew 15% in Q4..." }
    }
  ],
  "totalResults": 1
}

Summarize:

curl -X POST http://localhost:3000 \
  -H "Content-Type: application/json" \
  -d '{"userId": "user-1", "action": "summarize", "documentId": "doc_1705310000000"}'
{
  "action": "summarize",
  "name": "Q4 Report",
  "sectionCount": 1,
  "totalCharacters": 192,
  "sectionSummarizes": ["Revenue grew 15% in Q4..."]
}

Step 5: Deploy

novium deploy
✓ Deployed

  Agent "doc-assistant":
    Endpoint: https://ai-assistant.novium.cloud/doc-assistant

  Workflow "doc-pipeline":
    Endpoint: https://ai-assistant.novium.cloud/doc

Final Result

A document assistant that accepts documents, indexes them in memory, supports search and summarization.


What You Learned

  • ✅ Structured memory with section indexing
  • ✅ Document parsing and storage
  • ✅ Multi-action agent (upload, search, summarize, list)
  • ✅ Cross-document keyword search
  • ✅ Document summarization from memory

Next Tutorial

Build a Coding Agent →