RAG with Vault
Build a retrieval-augmented generation pipeline with the Vault SDK. Embed your documents, store them in a vector database, retrieve relevant chunks at query time, and generate grounded responses.
Overview
1
Embed documents
Convert your document corpus into vector embeddings using the Vault embedding endpoint.
2
Store in a vector DB
Upsert embeddings into a vector database (Pinecone, pgvector, Qdrant).
3
Retrieve at query time
Embed the user query and retrieve the top-k most similar document chunks.
4
Generate with context
Pass the retrieved chunks as context to vault.infer() to produce a grounded response.
Embeddings
lib/embed.ts
import { vault } from '@/lib/vault';
export async function embedDocuments(chunks: string[]): Promise<number[][]> {
const results = await Promise.all(
chunks.map((chunk) =>
vault.embed({
model: 'vault-embed-v1',
input: chunk,
})
)
);
return results.map((r) => r.embedding);
}
export async function embedQuery(query: string): Promise<number[]> {
const result = await vault.embed({
model: 'vault-embed-v1',
input: query,
});
return result.embedding;
}Retrieval
lib/retrieve.ts
import { embedQuery } from './embed';
import { vectorDb } from './vector-db'; // your DB client
export async function retrieve(query: string, topK = 5): Promise<string[]> {
const queryEmbedding = await embedQuery(query);
const results = await vectorDb.query({
vector: queryEmbedding,
topK,
includeMetadata: true,
});
return results.matches.map((m) => m.metadata.text as string);
}Generation
lib/generate.ts
import { vault } from '@/lib/vault';
export async function generate(query: string, context: string[]): Promise<string> {
const contextBlock = context
.map((c, i) => `[${i + 1}] ${c}`)
.join('\n\n');
const result = await vault.infer({
model: 'vault-3-pro',
prompt: `Answer the question using only the provided context. \
If the context does not contain the answer, say so.
Context:
${contextBlock}
Question: ${query}`,
maxTokens: 500,
});
return result.text;
}Full Pipeline
app/api/rag/route.ts
import { retrieve } from '@/lib/retrieve';
import { generate } from '@/lib/generate';
export async function POST(req: Request) {
const { query } = await req.json();
const context = await retrieve(query);
const response = await generate(query, context);
return Response.json({ response, sources: context.length });
}For production, add source citations by storing document IDs alongside embeddings and returning them with the retrieved chunks.