Skip to content

Commit d7446d5

Browse files
committed
docs(examples): enhance postgres-hybrid example with new features
- Demonstrate BM25TextSearchStrategy vs native PostgreSQL FTS - Show explicit ReciprocalRankFusion configuration - Add comparison between both text search strategies - Simplify summary and improve clarity
1 parent a86b753 commit d7446d5

File tree

6 files changed

+122
-89
lines changed

6 files changed

+122
-89
lines changed

examples/rag/postgres-hybrid.php

Lines changed: 51 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@
1414
use Symfony\AI\Fixtures\Movies;
1515
use Symfony\AI\Platform\Bridge\OpenAi\PlatformFactory;
1616
use Symfony\AI\Store\Bridge\Postgres\HybridStore;
17+
use Symfony\AI\Store\Bridge\Postgres\ReciprocalRankFusion;
18+
use Symfony\AI\Store\Bridge\Postgres\TextSearch\Bm25TextSearchStrategy;
19+
use Symfony\AI\Store\Bridge\Postgres\TextSearch\PostgresTextSearchStrategy;
1720
use Symfony\AI\Store\Document\Loader\InMemoryLoader;
1821
use Symfony\AI\Store\Document\Metadata;
1922
use Symfony\AI\Store\Document\TextDocument;
@@ -25,21 +28,26 @@
2528
require_once dirname(__DIR__).'/bootstrap.php';
2629

2730
echo "=== PostgreSQL Hybrid Search Demo ===\n\n";
28-
echo "This example demonstrates how to configure the semantic ratio to balance\n";
29-
echo "between semantic (vector) search and PostgreSQL Full-Text Search.\n\n";
31+
echo "Demonstrates HybridStore with configurable search strategies:\n";
32+
echo "- Native PostgreSQL FTS vs BM25\n";
33+
echo "- Semantic ratio adjustment\n";
34+
echo "- Custom RRF scoring\n\n";
3035

31-
// Initialize the hybrid store with balanced search (50/50)
3236
$connection = DriverManager::getConnection((new DsnParser())->parse(env('POSTGRES_URI')));
3337
$pdo = $connection->getNativeConnection();
3438

3539
if (!$pdo instanceof PDO) {
3640
throw new RuntimeException('Unable to get native PDO connection from Doctrine DBAL.');
3741
}
3842

43+
echo "=== Using BM25 Text Search Strategy ===\n\n";
44+
3945
$store = new HybridStore(
4046
connection: $pdo,
4147
tableName: 'hybrid_movies',
42-
semanticRatio: 0.5, // Balanced hybrid search by default
48+
textSearchStrategy: new Bm25TextSearchStrategy('en'),
49+
rrf: new ReciprocalRankFusion(k: 60, normalizeScores: true),
50+
semanticRatio: 0.5,
4351
);
4452

4553
// Create embeddings and documents
@@ -119,9 +127,42 @@
119127
// Cleanup
120128
$store->drop();
121129

122-
echo "=== Summary ===\n";
123-
echo "- semanticRatio = 0.0: Best for exact keyword matches (PostgreSQL FTS)\n";
124-
echo "- semanticRatio = 0.5: Balanced approach using RRF (Reciprocal Rank Fusion)\n";
125-
echo "- semanticRatio = 1.0: Best for conceptual similarity searches (pgvector)\n";
126-
echo "\nYou can set the default ratio when instantiating the HybridStore,\n";
127-
echo "and override it per query using the 'semanticRatio' option.\n";
130+
echo "=== Comparing with Native PostgreSQL FTS ===\n\n";
131+
132+
$storeFts = new HybridStore(
133+
connection: $pdo,
134+
tableName: 'hybrid_movies_fts',
135+
textSearchStrategy: new PostgresTextSearchStrategy(),
136+
semanticRatio: 0.5,
137+
);
138+
139+
$storeFts->setup();
140+
$indexer = new Indexer(new InMemoryLoader($documents), $vectorizer, $storeFts, logger: logger());
141+
$indexer->index($documents);
142+
143+
$resultsFts = $storeFts->query($queryEmbedding, [
144+
'semanticRatio' => 0.5,
145+
'q' => 'technology',
146+
'limit' => 3,
147+
]);
148+
149+
echo "Top 3 results (Native FTS):\n";
150+
foreach ($resultsFts as $i => $result) {
151+
$metadata = $result->metadata->getArrayCopy();
152+
echo sprintf(
153+
" %d. %s (Score: %.4f)\n",
154+
$i + 1,
155+
$metadata['title'] ?? 'Unknown',
156+
$result->score ?? 0.0
157+
);
158+
}
159+
160+
$storeFts->drop();
161+
162+
echo "\n=== Summary ===\n";
163+
echo "- semanticRatio = 0.0: Pure keyword matching\n";
164+
echo "- semanticRatio = 0.5: Balanced hybrid (RRF)\n";
165+
echo "- semanticRatio = 1.0: Pure semantic search\n";
166+
echo "\nText Search Strategies:\n";
167+
echo "- PostgresTextSearchStrategy: Native FTS (ts_rank_cd)\n";
168+
echo "- Bm25TextSearchStrategy: BM25 ranking (requires pg_bm25 extension)\n";

src/store/src/Bridge/Postgres/HybridStore.php

Lines changed: 19 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -41,20 +41,20 @@ final class HybridStore implements ManagedStoreInterface, StoreInterface
4141
private readonly TextSearchStrategyInterface $textSearchStrategy;
4242

4343
/**
44-
* @param string $vectorFieldName Name of the vector field
45-
* @param string $contentFieldName Name of the text field for FTS
46-
* @param float $semanticRatio Ratio between semantic and keyword search (0.0 to 1.0)
47-
* @param Distance $distance Distance metric for vector similarity
48-
* @param string $language PostgreSQL text search configuration
49-
* @param TextSearchStrategyInterface|null $textSearchStrategy Text search strategy (defaults to native PostgreSQL)
50-
* @param ReciprocalRankFusion|null $rrf RRF calculator (defaults to k=60, normalized)
51-
* @param float|null $defaultMaxScore Default max distance for vector search
52-
* @param float|null $defaultMinScore Default min RRF score threshold
53-
* @param float $fuzzyPrimaryThreshold Primary threshold for fuzzy matching
54-
* @param float $fuzzySecondaryThreshold Secondary threshold for fuzzy matching
55-
* @param float $fuzzyStrictThreshold Strict threshold for double validation
56-
* @param float $fuzzyWeight Weight of fuzzy matching (0.0 to 1.0)
57-
* @param array<string, array{metadata_key: string, boost?: float}> $searchableAttributes Searchable attributes with boosting config
44+
* @param string $vectorFieldName Name of the vector field
45+
* @param string $contentFieldName Name of the text field for FTS
46+
* @param float $semanticRatio Ratio between semantic and keyword search (0.0 to 1.0)
47+
* @param Distance $distance Distance metric for vector similarity
48+
* @param string $language PostgreSQL text search configuration
49+
* @param TextSearchStrategyInterface|null $textSearchStrategy Text search strategy (defaults to native PostgreSQL)
50+
* @param ReciprocalRankFusion|null $rrf RRF calculator (defaults to k=60, normalized)
51+
* @param float|null $defaultMaxScore Default max distance for vector search
52+
* @param float|null $defaultMinScore Default min RRF score threshold
53+
* @param float $fuzzyPrimaryThreshold Primary threshold for fuzzy matching
54+
* @param float $fuzzySecondaryThreshold Secondary threshold for fuzzy matching
55+
* @param float $fuzzyStrictThreshold Strict threshold for double validation
56+
* @param float $fuzzyWeight Weight of fuzzy matching (0.0 to 1.0)
57+
* @param array<string, array{metadata_key: string, boost?: float}> $searchableAttributes Searchable attributes with boosting config
5858
*/
5959
public function __construct(
6060
private readonly \PDO $connection,
@@ -75,17 +75,11 @@ public function __construct(
7575
private readonly array $searchableAttributes = [],
7676
) {
7777
if ($semanticRatio < 0.0 || $semanticRatio > 1.0) {
78-
throw new InvalidArgumentException(\sprintf(
79-
'The semantic ratio must be between 0.0 and 1.0, "%s" given.',
80-
$semanticRatio
81-
));
78+
throw new InvalidArgumentException(\sprintf('The semantic ratio must be between 0.0 and 1.0, "%s" given.', $semanticRatio));
8279
}
8380

8481
if ($fuzzyWeight < 0.0 || $fuzzyWeight > 1.0) {
85-
throw new InvalidArgumentException(\sprintf(
86-
'The fuzzy weight must be between 0.0 and 1.0, "%s" given.',
87-
$fuzzyWeight
88-
));
82+
throw new InvalidArgumentException(\sprintf('The fuzzy weight must be between 0.0 and 1.0, "%s" given.', $fuzzyWeight));
8983
}
9084

9185
$this->textSearchStrategy = $textSearchStrategy ?? new PostgresTextSearchStrategy();
@@ -314,11 +308,11 @@ private function createSearchTextTrigger(): void
314308

315309
$this->connection->exec(
316310
\sprintf(
317-
"DROP TRIGGER IF EXISTS trigger_update_search_text ON %s;
311+
'DROP TRIGGER IF EXISTS trigger_update_search_text ON %s;
318312
CREATE TRIGGER trigger_update_search_text
319313
BEFORE INSERT OR UPDATE ON %s
320314
FOR EACH ROW
321-
EXECUTE FUNCTION update_search_text();",
315+
EXECUTE FUNCTION update_search_text();',
322316
$this->tableName,
323317
$this->tableName,
324318
),
@@ -353,10 +347,7 @@ private function createTsvectorIndexes(): void
353347
private function validateSemanticRatio(float $ratio): float
354348
{
355349
if ($ratio < 0.0 || $ratio > 1.0) {
356-
throw new InvalidArgumentException(\sprintf(
357-
'The semantic ratio must be between 0.0 and 1.0, "%s" given.',
358-
$ratio
359-
));
350+
throw new InvalidArgumentException(\sprintf('The semantic ratio must be between 0.0 and 1.0, "%s" given.', $ratio));
360351
}
361352

362353
return $ratio;

src/store/src/Bridge/Postgres/ReciprocalRankFusion.php

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,8 @@ public function __construct(
3636
/**
3737
* Calculate RRF score for a single result with multiple rankings.
3838
*
39-
* @param array<string, array{rank: int, score: float, weight: float}> $rankings
40-
* Each entry contains: rank (1-based), score (normalized 0-1), weight (0-1)
39+
* @param array<string, array{rank: int|null, score: float, weight: float}> $rankings
40+
* Each entry contains: rank (1-based or null), score (normalized 0-1), weight (0-1)
4141
*
4242
* @return float The combined RRF score
4343
*/
@@ -142,7 +142,7 @@ public function buildCombinedSqlExpression(array $sources): string
142142
);
143143
}
144144

145-
return '(' . implode(' + ', $expressions) . ')';
145+
return '('.implode(' + ', $expressions).')';
146146
}
147147

148148
public function getK(): int

src/store/src/Bridge/Postgres/TextSearch/Bm25TextSearchStrategy.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ public function isAvailable(\PDO $connection): bool
132132
"SELECT 1 FROM pg_proc WHERE proname = 'bm25topk' LIMIT 1"
133133
);
134134

135-
return $stmt->fetchColumn() !== false;
135+
return false !== $stmt->fetchColumn();
136136
} catch (\PDOException) {
137137
return false;
138138
}

0 commit comments

Comments
 (0)