Skip to content
Open
168 changes: 168 additions & 0 deletions examples/rag/postgres-hybrid.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
<?php

/*
* This file is part of the Symfony package.
*
* (c) Fabien Potencier <[email protected]>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/

use Doctrine\DBAL\DriverManager;
use Doctrine\DBAL\Tools\DsnParser;
use Symfony\AI\Fixtures\Movies;
use Symfony\AI\Platform\Bridge\OpenAi\PlatformFactory;
use Symfony\AI\Store\Bridge\Postgres\HybridStore;
use Symfony\AI\Store\Bridge\Postgres\ReciprocalRankFusion;
use Symfony\AI\Store\Bridge\Postgres\TextSearch\Bm25TextSearchStrategy;
use Symfony\AI\Store\Bridge\Postgres\TextSearch\PostgresTextSearchStrategy;
use Symfony\AI\Store\Document\Loader\InMemoryLoader;
use Symfony\AI\Store\Document\Metadata;
use Symfony\AI\Store\Document\TextDocument;
use Symfony\AI\Store\Document\Vectorizer;
use Symfony\AI\Store\Exception\RuntimeException;
use Symfony\AI\Store\Indexer;
use Symfony\Component\Uid\Uuid;

require_once dirname(__DIR__).'/bootstrap.php';

echo "=== PostgreSQL Hybrid Search Demo ===\n\n";
echo "Demonstrates HybridStore with configurable search strategies:\n";
echo "- Native PostgreSQL FTS vs BM25\n";
echo "- Semantic ratio adjustment\n";
echo "- Custom RRF scoring\n\n";

$connection = DriverManager::getConnection((new DsnParser())->parse(env('POSTGRES_URI')));
$pdo = $connection->getNativeConnection();

if (!$pdo instanceof PDO) {
throw new RuntimeException('Unable to get native PDO connection from Doctrine DBAL.');
}

echo "=== Using BM25 Text Search Strategy ===\n\n";

$store = new HybridStore(
connection: $pdo,
tableName: 'hybrid_movies',
textSearchStrategy: new Bm25TextSearchStrategy('en'),
rrf: new ReciprocalRankFusion(k: 60, normalizeScores: true),
semanticRatio: 0.5,
);

// Create embeddings and documents
$documents = [];
foreach (Movies::all() as $i => $movie) {
$documents[] = new TextDocument(
id: Uuid::v4(),
content: 'Title: '.$movie['title'].\PHP_EOL.'Director: '.$movie['director'].\PHP_EOL.'Description: '.$movie['description'],
metadata: new Metadata(array_merge($movie, ['content' => 'Title: '.$movie['title'].\PHP_EOL.'Director: '.$movie['director'].\PHP_EOL.'Description: '.$movie['description']])),
);
}

// Initialize the table
$store->setup();

// Create embeddings for documents
$platform = PlatformFactory::create(env('OPENAI_API_KEY'), http_client());
$vectorizer = new Vectorizer($platform, 'text-embedding-3-small', logger());
$indexer = new Indexer(new InMemoryLoader($documents), $vectorizer, $store, logger: logger());
$indexer->index($documents);

// Create a query embedding
$queryText = 'futuristic technology and artificial intelligence';
echo "Query: \"$queryText\"\n\n";
$queryEmbedding = $vectorizer->vectorize($queryText);

// Test different semantic ratios to compare results
$ratios = [
['ratio' => 0.0, 'description' => '100% Full-text search (keyword matching)'],
['ratio' => 0.5, 'description' => 'Balanced hybrid (RRF: 50% semantic + 50% FTS)'],
['ratio' => 1.0, 'description' => '100% Semantic search (vector similarity)'],
];

foreach ($ratios as $config) {
echo "--- {$config['description']} ---\n";

// Override the semantic ratio for this specific query
$results = $store->query($queryEmbedding, [
'semanticRatio' => $config['ratio'],
'q' => 'technology', // Full-text search keyword
'limit' => 3,
]);

echo "Top 3 results:\n";
foreach ($results as $i => $result) {
$metadata = $result->metadata->getArrayCopy();
echo sprintf(
" %d. %s (Score: %.4f)\n",
$i + 1,
$metadata['title'] ?? 'Unknown',
$result->score ?? 0.0
);
}
echo "\n";
}

echo "--- Custom query with pure semantic search ---\n";
echo "Query: Movies about space exploration\n";
$spaceEmbedding = $vectorizer->vectorize('space exploration and cosmic adventures');
$results = $store->query($spaceEmbedding, [
'semanticRatio' => 1.0, // Pure semantic search
'limit' => 3,
]);

echo "Top 3 results:\n";
foreach ($results as $i => $result) {
$metadata = $result->metadata->getArrayCopy();
echo sprintf(
" %d. %s (Score: %.4f)\n",
$i + 1,
$metadata['title'] ?? 'Unknown',
$result->score ?? 0.0
);
}
echo "\n";

// Cleanup
$store->drop();

echo "=== Comparing with Native PostgreSQL FTS ===\n\n";

$storeFts = new HybridStore(
connection: $pdo,
tableName: 'hybrid_movies_fts',
textSearchStrategy: new PostgresTextSearchStrategy(),
semanticRatio: 0.5,
);

$storeFts->setup();
$indexer = new Indexer(new InMemoryLoader($documents), $vectorizer, $storeFts, logger: logger());
$indexer->index($documents);

$resultsFts = $storeFts->query($queryEmbedding, [
'semanticRatio' => 0.5,
'q' => 'technology',
'limit' => 3,
]);

echo "Top 3 results (Native FTS):\n";
foreach ($resultsFts as $i => $result) {
$metadata = $result->metadata->getArrayCopy();
echo sprintf(
" %d. %s (Score: %.4f)\n",
$i + 1,
$metadata['title'] ?? 'Unknown',
$result->score ?? 0.0
);
}

$storeFts->drop();

echo "\n=== Summary ===\n";
echo "- semanticRatio = 0.0: Pure keyword matching\n";
echo "- semanticRatio = 0.5: Balanced hybrid (RRF)\n";
echo "- semanticRatio = 1.0: Pure semantic search\n";
echo "\nText Search Strategies:\n";
echo "- PostgresTextSearchStrategy: Native FTS (ts_rank_cd)\n";
echo "- Bm25TextSearchStrategy: BM25 ranking (requires pg_bm25 extension)\n";
102 changes: 102 additions & 0 deletions src/ai-bundle/config/options.php
Original file line number Diff line number Diff line change
Expand Up @@ -738,6 +738,108 @@
->end()
->end()
->end()
->arrayNode('postgres_hybrid')
->info('PostgreSQL Hybrid Search combining pgvector (semantic) and Full-Text Search (lexical) using RRF')
->useAttributeAsKey('name')
->arrayPrototype()
->children()
->stringNode('connection')->cannotBeEmpty()->end()
->stringNode('dsn')->cannotBeEmpty()->end()
->stringNode('username')->end()
->stringNode('password')->end()
->stringNode('table_name')->isRequired()->end()
->stringNode('vector_field')->defaultValue('embedding')->end()
->stringNode('content_field')->defaultValue('content')->end()
->floatNode('semantic_ratio')
->info('Ratio between semantic (vector) and keyword (FTS) search. 0.0 = pure FTS, 0.5 = balanced, 1.0 = pure semantic')
->defaultValue(1.0)
->min(0.0)
->max(1.0)
->end()
->enumNode('distance')
->info('Distance metric to use for vector similarity search')
->enumFqcn(PostgresDistance::class)
->defaultValue(PostgresDistance::L2)
->end()
->stringNode('language')
->info('PostgreSQL text search configuration (e.g., "simple", "english", "french"). Default: "simple" (multilingual)')
->defaultValue('simple')
->end()
->stringNode('bm25_language')
->info('BM25 language code for stemming (e.g., "en", "fr", "es", "de", "it", "pt", "nl", "ru", "ar", "zh"). Default: "en"')
->defaultValue('en')
->end()
->integerNode('rrf_k')
->info('RRF (Reciprocal Rank Fusion) constant. Higher = more equal weighting. Default: 60 (Supabase)')
->defaultValue(60)
->min(1)
->end()
->floatNode('default_max_score')
->info('Default maximum distance threshold for filtering results (optional)')
->defaultNull()
->end()
->floatNode('default_min_score')
->info('Default minimum RRF score threshold for filtering results (optional)')
->defaultNull()
->end()
->booleanNode('normalize_scores')
->info('Normalize scores to 0-100 range for better readability')
->defaultTrue()
->end()
->floatNode('fuzzy_primary_threshold')
->info('Primary threshold for fuzzy matching (pg_trgm word_similarity). Higher = stricter. Default: 0.25')
->defaultValue(0.25)
->min(0.0)
->max(1.0)
->end()
->floatNode('fuzzy_secondary_threshold')
->info('Secondary threshold for fuzzy matching with double validation. Catches more typos. Default: 0.2')
->defaultValue(0.2)
->min(0.0)
->max(1.0)
->end()
->floatNode('fuzzy_strict_threshold')
->info('Strict similarity threshold for double validation to eliminate false positives. Default: 0.15')
->defaultValue(0.15)
->min(0.0)
->max(1.0)
->end()
->floatNode('fuzzy_weight')
->info('Weight of fuzzy matching vs FTS in hybrid search. 0.0 = disabled, 0.5 = equal (recommended), 1.0 = fuzzy only')
->defaultValue(0.5)
->min(0.0)
->max(1.0)
->end()
->arrayNode('searchable_attributes')
->info('Searchable attributes with field-specific boosting (similar to Meilisearch). Each attribute creates a separate tsvector column.')
->useAttributeAsKey('name')
->arrayPrototype()
->children()
->floatNode('boost')
->info('Boost multiplier for this field (e.g., 2.0 = twice as important). Default: 1.0')
->defaultValue(1.0)
->min(0.0)
->end()
->scalarNode('metadata_key')
->info('JSON path to extract value from metadata (e.g., "title", "description")')
->isRequired()
->cannotBeEmpty()
->end()
->end()
->end()
->end()
->stringNode('dbal_connection')->cannotBeEmpty()->end()
->end()
->validate()
->ifTrue(static fn ($v) => !isset($v['dsn']) && !isset($v['dbal_connection']) && !isset($v['connection']))
->thenInvalid('Either "dsn", "dbal_connection", or "connection" must be configured.')
->end()
->validate()
->ifTrue(static fn ($v) => (int) isset($v['dsn']) + (int) isset($v['dbal_connection']) + (int) isset($v['connection']) > 1)
->thenInvalid('Only one of "dsn", "dbal_connection", or "connection" can be configured.')
->end()
->end()
->end()
->end()
->end()
->arrayNode('message_store')
Expand Down
108 changes: 108 additions & 0 deletions src/ai-bundle/src/AiBundle.php
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@
use Symfony\AI\Store\Bridge\MongoDb\Store as MongoDbStore;
use Symfony\AI\Store\Bridge\Neo4j\Store as Neo4jStore;
use Symfony\AI\Store\Bridge\Pinecone\Store as PineconeStore;
use Symfony\AI\Store\Bridge\Postgres\HybridStore;
use Symfony\AI\Store\Bridge\Postgres\Store as PostgresStore;
use Symfony\AI\Store\Bridge\Qdrant\Store as QdrantStore;
use Symfony\AI\Store\Bridge\Redis\Store as RedisStore;
Expand Down Expand Up @@ -1366,6 +1367,113 @@ private function processStoreConfig(string $type, array $stores, ContainerBuilde
}
}

if ('postgres_hybrid' === $type) {
foreach ($stores as $name => $store) {
$definition = new Definition(HybridStore::class);

// Handle connection (PDO service reference, DBAL connection, or DSN)
if (\array_key_exists('connection', $store)) {
// Direct PDO service reference
$serviceId = ltrim($store['connection'], '@');
$connection = new Reference($serviceId);
$arguments = [
$connection,
$store['table_name'],
];
} elseif (\array_key_exists('dbal_connection', $store)) {
// DBAL connection - extract native PDO
$connection = (new Definition(\PDO::class))
->setFactory([new Reference($store['dbal_connection']), 'getNativeConnection']);
$arguments = [
$connection,
$store['table_name'],
];
} else {
// Create new PDO instance from DSN
$pdo = new Definition(\PDO::class);
$pdo->setArguments([
$store['dsn'],
$store['username'] ?? null,
$store['password'] ?? null],
);

$arguments = [
$pdo,
$store['table_name'],
];
}

// Add optional parameters
if (\array_key_exists('vector_field', $store)) {
$arguments[2] = $store['vector_field'];
}

if (\array_key_exists('content_field', $store)) {
$arguments[3] = $store['content_field'];
}

if (\array_key_exists('semantic_ratio', $store)) {
$arguments[4] = $store['semantic_ratio'];
}

if (\array_key_exists('distance', $store)) {
$arguments[5] = $store['distance'];
}

if (\array_key_exists('language', $store)) {
$arguments[6] = $store['language'];
}

if (\array_key_exists('rrf_k', $store)) {
$arguments[7] = $store['rrf_k'];
}

if (\array_key_exists('default_max_score', $store)) {
$arguments[8] = $store['default_max_score'];
}

if (\array_key_exists('default_min_score', $store)) {
$arguments[9] = $store['default_min_score'];
}

if (\array_key_exists('normalize_scores', $store)) {
$arguments[10] = $store['normalize_scores'];
}

if (\array_key_exists('fuzzy_primary_threshold', $store)) {
$arguments[11] = $store['fuzzy_primary_threshold'];
}

if (\array_key_exists('fuzzy_secondary_threshold', $store)) {
$arguments[12] = $store['fuzzy_secondary_threshold'];
}

if (\array_key_exists('fuzzy_strict_threshold', $store)) {
$arguments[13] = $store['fuzzy_strict_threshold'];
}

if (\array_key_exists('fuzzy_weight', $store)) {
$arguments[14] = $store['fuzzy_weight'];
}

if (\array_key_exists('searchable_attributes', $store)) {
$arguments[15] = $store['searchable_attributes'];
}

if (\array_key_exists('bm25_language', $store)) {
$arguments[16] = $store['bm25_language'];
}

$definition
->addTag('ai.store')
->setArguments($arguments);

$container->setDefinition('ai.store.'.$type.'.'.$name, $definition);
$container->registerAliasForArgument('ai.store.'.$type.'.'.$name, StoreInterface::class, $name);
$container->registerAliasForArgument('ai.store.'.$type.'.'.$name, StoreInterface::class, $type.'_'.$name);
}
}

if ('supabase' === $type) {
foreach ($stores as $name => $store) {
$arguments = [
Expand Down
Loading