Vector Search in Cypher
Perform semantic search using Cypher procedures.
Vector Search Procedure
CALL db.vector.search(
index_name, // Name of vector index
query_vector, // Vector to search for
k, // Number of results
options // Optional configuration
)
YIELD node, score
Basic Vector Search
from grafito.cypher import format_vector_literal
# Prepare query vector
query_vec = model.encode("python graph database").tolist()
vector_literal = format_vector_literal(query_vec, precision=8)
# Run vector search
cypher = f"""
CALL db.vector.search('articles_vec', {vector_literal}, 10)
YIELD node, score
RETURN node.title, node.content, score
ORDER BY score DESC
"""
results = db.execute(cypher)
With Label Filters
// Search only within specific labels
CALL db.vector.search(
'articles_vec',
[0.123, -0.456, ...],
10,
{labels: ['Article', 'Tutorial']}
)
YIELD node, score
RETURN node.title, score
With Property Filters
// Search with property constraints
CALL db.vector.search(
'articles_vec',
[0.123, -0.456, ...],
10,
{
labels: ['Article'],
properties: {published: true, language: 'en'}
}
)
YIELD node, score
RETURN node.title, score
With Reranking
// Enable reranking with stored vectors
CALL db.vector.search(
'articles_vec',
[0.123, -0.456, ...],
10,
{rerank: true, candidate_multiplier: 3}
)
YIELD node, score
RETURN node.title, score
Custom Reranker
# Register custom reranker
def my_reranker(query_vector, candidates):
# candidates: [{"id": int, "vector": [...], "score": float, "node": Node}, ...]
re_ranked = []
for c in candidates:
# Custom scoring logic
boost = 1.0
if 'featured' in c['node'].labels:
boost = 1.2
re_ranked.append({
'id': c['id'],
'score': c['score'] * boost
})
return re_ranked
db.register_reranker('featured_boost', my_reranker)
// Use in Cypher
CALL db.vector.search(
'articles_vec',
[0.123, -0.456, ...],
10,
{reranker: 'featured_boost'}
)
YIELD node, score
RETURN node.title, score
Combining with Graph Patterns
# Hybrid: vector search + graph traversal
query_vec = model.encode("machine learning tutorials").tolist()
vector_literal = format_vector_literal(query_vec)
cypher = f"""
// Stage 1: Vector search for candidates
CALL db.vector.search('articles_vec', {vector_literal}, 20)
YIELD node, score
// Stage 2: Expand to authors
MATCH (node)<-[:AUTHORED]-(author:Person)
// Stage 3: Get author's other content
OPTIONAL MATCH (author)-[:AUTHORED]->(other:Article)
WHERE other <> node
RETURN
node.title as matched_article,
score,
author.name as author,
collect(DISTINCT other.title)[0..3] as other_works
ORDER BY score DESC
LIMIT 10
"""
results = db.execute(cypher)
With Path Expansion
// Vector search + related content
CALL db.vector.search('articles_vec', $query_vec, 5)
YIELD node, score
// Find related articles through shared tags
MATCH (node)-[:TAGGED]->(tag)<-[:TAGGED]-(related)
WHERE related <> node
RETURN
node.title as main_result,
score,
collect(DISTINCT related.title)[0..5] as related_articles
ORDER BY score DESC
Aggregating Vector Results
// Cluster search results by category
CALL db.vector.search('articles_vec', $query_vec, 50)
YIELD node, score
WITH node.category as category,
collect({node: node, score: score}) as items,
avg(score) as avg_score
RETURN
category,
count(*) as count,
avg_score,
items[0..3] as top_items
ORDER BY avg_score DESC
Dynamic Vector Queries
# Build vector queries dynamically
def vector_search_with_context(db, query, user_id, k=10):
# Get user's interests
interests = db.execute("""
MATCH (u:User {id: $user_id})-[:INTERESTED_IN]->(topic)
RETURN collect(topic.name) as interests
""", {'user_id': user_id})
# Build enriched query
enriched_query = query + ' ' + ' '.join(interests[0]['interests'])
query_vec = model.encode(enriched_query).tolist()
# Search with personalization boost
return db.execute("""
CALL db.vector.search('articles_vec', $vec, $k)
YIELD node, score
// Boost if matches user interests
OPTIONAL MATCH (node)-[:ABOUT]->(topic)
WHERE topic.name IN $interests
WITH node, score, count(topic) as interest_matches
ORDER BY score + (interest_matches * 0.1) DESC
RETURN node.title, node.summary, score
LIMIT $k
""", {'vec': query_vec, 'k': k, 'interests': interests[0]['interests']})
Error Handling
Unknown Index
Solution: Check index name with SHOW INDEXES or db.list_vector_indexes().
Dimension Mismatch
Solution: Ensure query vector has same dimension as index.
Unknown Reranker
Solution: Register reranker first with db.register_reranker().
Best Practices
Use Precision Parameter
# Format vector for Cypher with appropriate precision
vector_literal = format_vector_literal(query_vec, precision=6)
# Higher precision = more accurate but longer query string
Limit Initial Results
// Get more candidates than needed for reranking
CALL db.vector.search('idx', $vec, 50) // Get 50
YIELD node, score
// ... filter/process ...
RETURN ...
LIMIT 10 // Return top 10
Combine with Property Indexes
# Create property index for common filters
db.create_node_index('Article', 'published')
db.create_node_index('Article', 'language')
# Vector search will use these for pre-filtering