Vector Search (RAG)#

This guide covers how to build RAG (Retrieval-Augmented Generation) applications with Redis vector search.

Overview#

RedisVectorStore provides:

  • HNSW indexing for fast approximate nearest neighbor search

  • Metadata filtering to narrow search results

  • Automatic embedding generation using OpenAI

import os
os.environ.setdefault("OPENAI_API_KEY", "your-api-key-here")

REDIS_URL = os.environ.get("REDIS_URL", "redis://localhost:6379")

Basic Usage#

from redis_openai_agents import RedisVectorStore

# Create a vector store
store = RedisVectorStore(
    name="knowledge-base",
    redis_url=REDIS_URL
)

print(f"Vector store '{store.name}' initialized")

Adding Documents#

# Add documents with metadata
documents = [
    {
        "content": "Redis is an open source, in-memory data store used as a database, cache, and message broker.",
        "source": "docs",
        "category": "overview"
    },
    {
        "content": "Redis supports various data structures including strings, hashes, lists, sets, and sorted sets.",
        "source": "docs",
        "category": "data-structures"
    },
    {
        "content": "Redis 8 ships with vector search built in.",
        "source": "docs",
        "category": "features"
    },
    {
        "content": "The OpenAI Agents SDK provides a framework for building AI agents with tools, guardrails, and handoffs.",
        "source": "tutorial",
        "category": "agents"
    },
    {
        "content": "Semantic caching stores LLM responses and returns them for similar queries, reducing API costs.",
        "source": "tutorial",
        "category": "caching"
    }
]

# Add to store (embeddings generated automatically)
store.add_documents(documents)
print(f"Added {len(documents)} documents")

Searching#

# Basic search
results = store.search("What is Redis?", k=3)

print("Search results for 'What is Redis?':\n")
for i, result in enumerate(results, 1):
    print(f"{i}. (score: {result['score']:.4f})")
    print(f"   {result['content']}")
    print(f"   Source: {result['metadata'].get('source')}")
    print()
# Search with metadata filter
results = store.search(
    query="How does caching work?",
    k=3,
    filter={"source": "tutorial"}
)

print("Filtered search (source=tutorial):\n")
for result in results:
    print(f"- {result['content'][:80]}...")
    print(f"  Category: {result['metadata'].get('category')}")

Building a RAG Agent#

from agents import Agent, Runner, function_tool

@function_tool
def search_knowledge_base(query: str, k: int = 3) -> str:
    """Search the knowledge base for relevant information.
    
    Args:
        query: The search query
        k: Number of results to return
    """
    results = store.search(query, k=k)
    
    if not results:
        return "No relevant information found."
    
    context = "\n\n".join([
        f"[{r.metadata.get('source', 'unknown')}]: {r.content}"
        for r in results
    ])
    
    return context

# Create RAG agent
rag_agent = Agent(
    name="rag-assistant",
    instructions="""You are a helpful assistant that answers questions using the knowledge base.
    Always search the knowledge base first before answering.
    Cite your sources when answering.""",
    tools=[search_knowledge_base]
)
# Test the RAG agent
result = await Runner.run(rag_agent, input="What data structures does Redis support?")
print(result.final_output)
# Another question
result = await Runner.run(rag_agent, input="How can I reduce LLM API costs?")
print(result.final_output)

Document Management#

# Add single document
store.add_document(
    content="Redis Streams is a data structure for managing event streams.",
    metadata={"source": "docs", "category": "streaming"}
)

# Verify it was added
results = store.search("event streaming", k=1)
print(f"Found: {results[0].content}")
# Get document count
count = store.count()
print(f"Total documents: {count}")

Batch Operations#

# Batch add documents
new_docs = [
    {"content": f"Document {i} about topic {i % 3}", "topic": str(i % 3)}
    for i in range(10)
]

store.add_documents(new_docs, batch_size=5)
print(f"Added {len(new_docs)} documents in batches")

Caching embeddings#

Re-embedding identical text on every ingestion wastes CPU (local models) or API budget (hosted vectorizers). Pass an EmbeddingsCache to RedisVectorStore to memoize embeddings by (content, model_name) in Redis.

The cache is transparent - subsequent ingests of the same content pull the vector from Redis instead of invoking the vectorizer.

from redisvl.extensions.cache.embeddings import EmbeddingsCache

emb_cache = EmbeddingsCache(
    name="rag_embeddings_cache",
    redis_url=REDIS_URL,
    ttl=86400,  # 1 day
)

cached_store = RedisVectorStore(
    name="rag_docs_cached",
    redis_url=REDIS_URL,
    embeddings_cache=emb_cache,
)

content = "Redis stores embeddings efficiently."
cached_store.add_documents([{"content": content, "metadata": {"id": 1}}])

# The vector is now in the cache; subsequent adds of the same content
# skip the vectorizer entirely.
hit = emb_cache.get(content=content, model_name=cached_store.vectorizer_model)
print(f"Embedding cached: {hit is not None}")

Best Practices#

1. Chunk Large Documents#

def chunk_text(text: str, chunk_size: int = 500, overlap: int = 50) -> list[str]:
    chunks = []
    start = 0
    while start < len(text):
        end = start + chunk_size
        chunks.append(text[start:end])
        start = end - overlap
    return chunks

2. Include Rich Metadata#

store.add_document(
    content=content,
    metadata={
        "source": "company_docs",
        "category": "hr_policy",
        "date": "2025-01-01",
        "author": "HR Team"
    }
)

3. Use Filters for Scoped Searches#

# Search only HR documents
results = store.search(
    query="vacation policy",
    filter={"category": "hr_policy"}
)

Cleanup#

# Clear the vector store
store.clear()
print("Vector store cleared!")