Vector Search (RAG)#
This guide covers how to build RAG (Retrieval-Augmented Generation) applications with Redis vector search.
Overview#
RedisVectorStore provides:
HNSW indexing for fast approximate nearest neighbor search
Metadata filtering to narrow search results
Automatic embedding generation using OpenAI
import os
os.environ.setdefault("OPENAI_API_KEY", "your-api-key-here")
REDIS_URL = os.environ.get("REDIS_URL", "redis://localhost:6379")
Basic Usage#
from redis_openai_agents import RedisVectorStore
# Create a vector store
store = RedisVectorStore(
name="knowledge-base",
redis_url=REDIS_URL
)
print(f"Vector store '{store.name}' initialized")
Adding Documents#
# Add documents with metadata
documents = [
{
"content": "Redis is an open source, in-memory data store used as a database, cache, and message broker.",
"source": "docs",
"category": "overview"
},
{
"content": "Redis supports various data structures including strings, hashes, lists, sets, and sorted sets.",
"source": "docs",
"category": "data-structures"
},
{
"content": "Redis 8 ships with vector search built in.",
"source": "docs",
"category": "features"
},
{
"content": "The OpenAI Agents SDK provides a framework for building AI agents with tools, guardrails, and handoffs.",
"source": "tutorial",
"category": "agents"
},
{
"content": "Semantic caching stores LLM responses and returns them for similar queries, reducing API costs.",
"source": "tutorial",
"category": "caching"
}
]
# Add to store (embeddings generated automatically)
store.add_documents(documents)
print(f"Added {len(documents)} documents")
Searching#
# Basic search
results = store.search("What is Redis?", k=3)
print("Search results for 'What is Redis?':\n")
for i, result in enumerate(results, 1):
print(f"{i}. (score: {result['score']:.4f})")
print(f" {result['content']}")
print(f" Source: {result['metadata'].get('source')}")
print()
# Search with metadata filter
results = store.search(
query="How does caching work?",
k=3,
filter={"source": "tutorial"}
)
print("Filtered search (source=tutorial):\n")
for result in results:
print(f"- {result['content'][:80]}...")
print(f" Category: {result['metadata'].get('category')}")
Building a RAG Agent#
from agents import Agent, Runner, function_tool
@function_tool
def search_knowledge_base(query: str, k: int = 3) -> str:
"""Search the knowledge base for relevant information.
Args:
query: The search query
k: Number of results to return
"""
results = store.search(query, k=k)
if not results:
return "No relevant information found."
context = "\n\n".join([
f"[{r.metadata.get('source', 'unknown')}]: {r.content}"
for r in results
])
return context
# Create RAG agent
rag_agent = Agent(
name="rag-assistant",
instructions="""You are a helpful assistant that answers questions using the knowledge base.
Always search the knowledge base first before answering.
Cite your sources when answering.""",
tools=[search_knowledge_base]
)
# Test the RAG agent
result = await Runner.run(rag_agent, input="What data structures does Redis support?")
print(result.final_output)
# Another question
result = await Runner.run(rag_agent, input="How can I reduce LLM API costs?")
print(result.final_output)
Document Management#
# Add single document
store.add_document(
content="Redis Streams is a data structure for managing event streams.",
metadata={"source": "docs", "category": "streaming"}
)
# Verify it was added
results = store.search("event streaming", k=1)
print(f"Found: {results[0].content}")
# Get document count
count = store.count()
print(f"Total documents: {count}")
Batch Operations#
# Batch add documents
new_docs = [
{"content": f"Document {i} about topic {i % 3}", "topic": str(i % 3)}
for i in range(10)
]
store.add_documents(new_docs, batch_size=5)
print(f"Added {len(new_docs)} documents in batches")
Caching embeddings#
Re-embedding identical text on every ingestion wastes CPU (local models) or
API budget (hosted vectorizers). Pass an EmbeddingsCache to
RedisVectorStore to memoize embeddings by (content, model_name) in Redis.
The cache is transparent - subsequent ingests of the same content pull the vector from Redis instead of invoking the vectorizer.
from redisvl.extensions.cache.embeddings import EmbeddingsCache
emb_cache = EmbeddingsCache(
name="rag_embeddings_cache",
redis_url=REDIS_URL,
ttl=86400, # 1 day
)
cached_store = RedisVectorStore(
name="rag_docs_cached",
redis_url=REDIS_URL,
embeddings_cache=emb_cache,
)
content = "Redis stores embeddings efficiently."
cached_store.add_documents([{"content": content, "metadata": {"id": 1}}])
# The vector is now in the cache; subsequent adds of the same content
# skip the vectorizer entirely.
hit = emb_cache.get(content=content, model_name=cached_store.vectorizer_model)
print(f"Embedding cached: {hit is not None}")
Best Practices#
1. Chunk Large Documents#
def chunk_text(text: str, chunk_size: int = 500, overlap: int = 50) -> list[str]:
chunks = []
start = 0
while start < len(text):
end = start + chunk_size
chunks.append(text[start:end])
start = end - overlap
return chunks
2. Include Rich Metadata#
store.add_document(
content=content,
metadata={
"source": "company_docs",
"category": "hr_policy",
"date": "2025-01-01",
"author": "HR Team"
}
)
3. Use Filters for Scoped Searches#
# Search only HR documents
results = store.search(
query="vacation policy",
filter={"category": "hr_policy"}
)
Cleanup#
# Clear the vector store
store.clear()
print("Vector store cleared!")