Skip to content

Vector Search in Cypher

Perform semantic search using Cypher procedures.

Vector Search Procedure

CALL db.vector.search(
    index_name,      // Name of vector index
    query_vector,    // Vector to search for
    k,               // Number of results
    options          // Optional configuration
)
YIELD node, score
from grafito.cypher import format_vector_literal

# Prepare query vector
query_vec = model.encode("python graph database").tolist()
vector_literal = format_vector_literal(query_vec, precision=8)

# Run vector search
cypher = f"""
    CALL db.vector.search('articles_vec', {vector_literal}, 10)
    YIELD node, score
    RETURN node.title, node.content, score
    ORDER BY score DESC
"""

results = db.execute(cypher)

With Label Filters

// Search only within specific labels
CALL db.vector.search(
    'articles_vec',
    [0.123, -0.456, ...],
    10,
    {labels: ['Article', 'Tutorial']}
)
YIELD node, score
RETURN node.title, score

With Property Filters

// Search with property constraints
CALL db.vector.search(
    'articles_vec',
    [0.123, -0.456, ...],
    10,
    {
        labels: ['Article'],
        properties: {published: true, language: 'en'}
    }
)
YIELD node, score
RETURN node.title, score

With Reranking

// Enable reranking with stored vectors
CALL db.vector.search(
    'articles_vec',
    [0.123, -0.456, ...],
    10,
    {rerank: true, candidate_multiplier: 3}
)
YIELD node, score
RETURN node.title, score

Custom Reranker

# Register custom reranker
def my_reranker(query_vector, candidates):
    # candidates: [{"id": int, "vector": [...], "score": float, "node": Node}, ...]
    re_ranked = []
    for c in candidates:
        # Custom scoring logic
        boost = 1.0
        if 'featured' in c['node'].labels:
            boost = 1.2
        re_ranked.append({
            'id': c['id'],
            'score': c['score'] * boost
        })
    return re_ranked

db.register_reranker('featured_boost', my_reranker)
// Use in Cypher
CALL db.vector.search(
    'articles_vec',
    [0.123, -0.456, ...],
    10,
    {reranker: 'featured_boost'}
)
YIELD node, score
RETURN node.title, score

Combining with Graph Patterns

# Hybrid: vector search + graph traversal
query_vec = model.encode("machine learning tutorials").tolist()
vector_literal = format_vector_literal(query_vec)

cypher = f"""
    // Stage 1: Vector search for candidates
    CALL db.vector.search('articles_vec', {vector_literal}, 20)
    YIELD node, score

    // Stage 2: Expand to authors
    MATCH (node)<-[:AUTHORED]-(author:Person)

    // Stage 3: Get author's other content
    OPTIONAL MATCH (author)-[:AUTHORED]->(other:Article)
    WHERE other <> node

    RETURN 
        node.title as matched_article,
        score,
        author.name as author,
        collect(DISTINCT other.title)[0..3] as other_works
    ORDER BY score DESC
    LIMIT 10
"""

results = db.execute(cypher)

With Path Expansion

// Vector search + related content
CALL db.vector.search('articles_vec', $query_vec, 5)
YIELD node, score

// Find related articles through shared tags
MATCH (node)-[:TAGGED]->(tag)<-[:TAGGED]-(related)
WHERE related <> node

RETURN 
    node.title as main_result,
    score,
    collect(DISTINCT related.title)[0..5] as related_articles
ORDER BY score DESC

Aggregating Vector Results

// Cluster search results by category
CALL db.vector.search('articles_vec', $query_vec, 50)
YIELD node, score

WITH node.category as category, 
     collect({node: node, score: score}) as items,
     avg(score) as avg_score

RETURN 
    category,
    count(*) as count,
    avg_score,
    items[0..3] as top_items
ORDER BY avg_score DESC

Dynamic Vector Queries

# Build vector queries dynamically
def vector_search_with_context(db, query, user_id, k=10):
    # Get user's interests
    interests = db.execute("""
        MATCH (u:User {id: $user_id})-[:INTERESTED_IN]->(topic)
        RETURN collect(topic.name) as interests
    """, {'user_id': user_id})

    # Build enriched query
    enriched_query = query + ' ' + ' '.join(interests[0]['interests'])
    query_vec = model.encode(enriched_query).tolist()

    # Search with personalization boost
    return db.execute("""
        CALL db.vector.search('articles_vec', $vec, $k)
        YIELD node, score

        // Boost if matches user interests
        OPTIONAL MATCH (node)-[:ABOUT]->(topic)
        WHERE topic.name IN $interests

        WITH node, score, count(topic) as interest_matches
        ORDER BY score + (interest_matches * 0.1) DESC

        RETURN node.title, node.summary, score
        LIMIT $k
    """, {'vec': query_vec, 'k': k, 'interests': interests[0]['interests']})

Error Handling

Unknown Index

DatabaseError: Vector index 'unknown_idx' not found

Solution: Check index name with SHOW INDEXES or db.list_vector_indexes().

Dimension Mismatch

DatabaseError: Query vector dimension 768 does not match index dimension 384

Solution: Ensure query vector has same dimension as index.

Unknown Reranker

CypherExecutionError: Unknown reranker 'invalid_name'

Solution: Register reranker first with db.register_reranker().

Best Practices

Use Precision Parameter

# Format vector for Cypher with appropriate precision
vector_literal = format_vector_literal(query_vec, precision=6)
# Higher precision = more accurate but longer query string

Limit Initial Results

// Get more candidates than needed for reranking
CALL db.vector.search('idx', $vec, 50)  // Get 50
YIELD node, score
// ... filter/process ...
RETURN ...
LIMIT 10  // Return top 10

Combine with Property Indexes

# Create property index for common filters
db.create_node_index('Article', 'published')
db.create_node_index('Article', 'language')

# Vector search will use these for pre-filtering

Cache Query Embeddings

from functools import lru_cache

@lru_cache(maxsize=1000)
def get_query_embedding(query):
    return model.encode(query).tolist()

# Reuse embeddings for repeated queries
vec = get_query_embedding("python graphql")