Hybrid Search#
This guide covers combining vector similarity with BM25 full-text search for better retrieval.
Overview#
Hybrid search combines:
Vector Search - Semantic similarity using embeddings
BM25 Search - Keyword matching using full-text search
This provides better retrieval by capturing both semantic meaning and exact keyword matches.
import os
os.environ.setdefault("OPENAI_API_KEY", "your-api-key-here")
REDIS_URL = os.environ.get("REDIS_URL", "redis://localhost:6379")
HybridSearchService#
from redis_openai_agents import HybridSearchService
# Create hybrid search service
search = HybridSearchService(
name="hybrid-docs",
redis_url=REDIS_URL
)
print(f"Hybrid search service '{search.name}' initialized")
Adding Documents#
# Add documents (indexed for both vector and text search)
documents = [
{
"content": "Redis is an open-source, in-memory data structure store that can be used as a database, cache, and message broker.",
"title": "Introduction to Redis",
"category": "overview"
},
{
"content": "Redis 8 extends the Redis core with JSON, Time Series, and Search capabilities built in.",
"title": "Redis 8 Features",
"category": "features"
},
{
"content": "The RediSearch module provides full-text search with features like stemming, phonetic matching, and fuzzy search.",
"title": "RediSearch Full-Text",
"category": "search"
},
{
"content": "Vector similarity search in Redis uses the HNSW algorithm for efficient approximate nearest neighbor queries.",
"title": "Vector Search in Redis",
"category": "search"
},
{
"content": "Semantic caching uses vector embeddings to find similar queries and return cached responses.",
"title": "Semantic Caching",
"category": "caching"
}
]
search.add_documents(documents)
print(f"Added {len(documents)} documents")
Hybrid Search#
# Search with default weights (50% vector, 50% text)
results = search.search(
query="Redis full-text search capabilities",
k=3
)
print("Hybrid search results (50/50 weights):\n")
for i, result in enumerate(results, 1):
print(f"{i}. {result.title} (score: {result.score:.4f})")
print(f" {result.content[:80]}...")
print()
# Search with more weight on vector (semantic)
results = search.search(
query="How can I find similar items?",
k=3,
vector_weight=0.8,
text_weight=0.2
)
print("Semantic-heavy search (80% vector, 20% text):\n")
for i, result in enumerate(results, 1):
print(f"{i}. {result.title} (score: {result.score:.4f})")
# Search with more weight on text (keyword)
results = search.search(
query="HNSW algorithm",
k=3,
vector_weight=0.2,
text_weight=0.8
)
print("Keyword-heavy search (20% vector, 80% text):\n")
for i, result in enumerate(results, 1):
print(f"{i}. {result.title} (score: {result.score:.4f})")
Individual Search Components#
from redis_openai_agents import RedisVectorStore, RedisFullTextSearch
# Vector-only search
vector_store = RedisVectorStore(
name="vector-only",
redis_url=REDIS_URL
)
# Full-text only search
text_search = RedisFullTextSearch(
name="text-only",
redis_url=REDIS_URL
)
# Add to both
for doc in documents:
vector_store.add_document(doc["content"], metadata={"title": doc["title"]})
text_search.add_document(doc["content"], metadata={"title": doc["title"]})
# Compare results
query = "in-memory database"
print(f"Query: '{query}'\n")
# Vector search (semantic)
vector_results = vector_store.search(query, k=2)
print("Vector Search (semantic):")
for r in vector_results:
print(f" - {r.metadata.get('title')} ({r.score:.4f})")
# Text search (keyword)
text_results = text_search.search(query, k=2)
print("\nText Search (keyword):")
for r in text_results:
print(f" - {r.metadata.get('title')} ({r.score:.4f})")
# Hybrid search
hybrid_results = search.search(query, k=2)
print("\nHybrid Search (combined):")
for r in hybrid_results:
print(f" - {r.title} ({r.score:.4f})")
Filtering#
# Search with metadata filter
results = search.search(
query="search capabilities",
k=3,
filter={"category": "search"}
)
print("Filtered search (category=search):\n")
for result in results:
print(f"- {result.title}: {result.metadata.get('category')}")
When to Use Each Approach#
Scenario |
Recommended Approach |
|---|---|
User asks a question in natural language |
Heavy vector weight (0.7-0.8) |
User searches for specific terms/names |
Heavy text weight (0.7-0.8) |
General search with unknown intent |
Balanced (0.5/0.5) |
Technical documentation search |
Balanced or text-heavy |
Conversational Q&A |
Vector-heavy |
def adaptive_search(query: str, k: int = 3):
"""Adaptively choose weights based on query characteristics."""
# Simple heuristic: if query contains specific technical terms, use more text weight
technical_terms = ["HNSW", "BM25", "API", "SDK", "JSON", "Redis"]
has_technical = any(term.lower() in query.lower() for term in technical_terms)
if has_technical:
# Technical query - favor keyword matching
vector_weight, text_weight = 0.3, 0.7
else:
# Natural language - favor semantic matching
vector_weight, text_weight = 0.7, 0.3
return search.search(
query=query,
k=k,
vector_weight=vector_weight,
text_weight=text_weight
)
# Test adaptive search
queries = [
"How can I store data quickly?", # Natural language → vector-heavy
"HNSW algorithm implementation", # Technical → text-heavy
]
for q in queries:
results = adaptive_search(q, k=1)
print(f"Query: '{q}'")
print(f" Best match: {results[0].title}\n")
Best Practices#
1. Tune Weights for Your Use Case#
Start with 50/50 and adjust based on retrieval quality.
2. Use Query Analysis#
Analyze queries to dynamically adjust weights.
3. Include Rich Text Fields#
search.add_documents([
{
"content": main_text,
"title": title,
"keywords": "redis vector search hnsw", # Boost keyword matching
"summary": summary
}
])
Cleanup#
# Clean up
search.clear()
vector_store.clear()
text_search.clear()
print("Search indices cleared!")