Skip to content

Commit 45e40dd

Browse files
Fix documentdb text (#1263)
* Update helm. * Fix DocumentDB search. * Fix tests * Fix compare. * Revert settings. * Fixes
1 parent 65963ed commit 45e40dd

File tree

23 files changed

+371
-54
lines changed

23 files changed

+371
-54
lines changed

backend/extensions/Squidex.Extensions/Actions/Algolia/AlgoliaFlowStep.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ public override ValueTask PrepareAsync(FlowExecutionContext executionContext,
6868
{
6969
var @event = ((FlowEventContext)executionContext.Context).Event;
7070

71-
if (!@event.ShouldDelete(executionContext, Delete))
71+
if (@event.ShouldDelete(executionContext, Delete))
7272
{
7373
Document = null;
7474
return default;

backend/extensions/Squidex.Extensions/Actions/ElasticSearch/ElasticSearchFlowStep.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ public override ValueTask PrepareAsync(FlowExecutionContext executionContext,
7474
{
7575
var @event = ((FlowEventContext)executionContext.Context).Event;
7676

77-
if (!@event.ShouldDelete(executionContext, Delete))
77+
if (@event.ShouldDelete(executionContext, Delete))
7878
{
7979
Document = null;
8080
return default;

backend/extensions/Squidex.Extensions/Actions/OpenSearch/OpenSearchFlowStep.cs

Lines changed: 17 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -76,30 +76,28 @@ public override ValueTask PrepareAsync(FlowExecutionContext executionContext,
7676

7777
if (@event.ShouldDelete(executionContext, Delete))
7878
{
79-
OpenSearchContent content;
80-
try
81-
{
82-
content = executionContext.DeserializeJson<OpenSearchContent>(Document!);
83-
}
84-
catch (Exception ex)
85-
{
86-
content = new OpenSearchContent
87-
{
88-
More = new Dictionary<string, object>
89-
{
90-
["error"] = $"Invalid JSON: {ex.Message}",
91-
},
92-
};
93-
}
79+
Document = null;
80+
return default;
81+
}
9482

95-
Document = executionContext.SerializeJson(content);
83+
OpenSearchContent content;
84+
try
85+
{
86+
content = executionContext.DeserializeJson<OpenSearchContent>(Document!);
9687
}
97-
else
88+
catch (Exception ex)
9889
{
99-
Document = null;
90+
content = new OpenSearchContent
91+
{
92+
More = new Dictionary<string, object>
93+
{
94+
["error"] = $"Invalid JSON: {ex.Message}",
95+
},
96+
};
10097
}
10198

102-
return base.PrepareAsync(executionContext, ct);
99+
Document = executionContext.SerializeJson(content);
100+
return default;
103101
}
104102

105103
public override async ValueTask<FlowStepResult> ExecuteAsync(FlowExecutionContext executionContext,

backend/extensions/Squidex.Extensions/Actions/Typesense/TypesenseFlowStep.cs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ public override ValueTask PrepareAsync(FlowExecutionContext executionContext,
5959
{
6060
var @event = ((FlowEventContext)executionContext.Context).Event;
6161

62-
if (!@event.ShouldDelete(executionContext, Delete))
62+
if (@event.ShouldDelete(executionContext, Delete))
6363
{
6464
Document = null;
6565
return default;
@@ -84,8 +84,7 @@ public override ValueTask PrepareAsync(FlowExecutionContext executionContext,
8484
}
8585

8686
Document = executionContext.SerializeJson(content);
87-
88-
return base.PrepareAsync(executionContext, ct);
87+
return default;
8988
}
9089

9190
public override async ValueTask<FlowStepResult> ExecuteAsync(FlowExecutionContext executionContext,

backend/src/Squidex.Data.MongoDb/Domain/Apps/Entities/Contents/CollectionProvider.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ await schemaCollection.Indexes.CreateManyAsync(
4141
.Ascending(x => x.IndexedSchemaId)
4242
.Ascending(x => x.IsDeleted)
4343
.Descending(x => x.LastModified)),
44-
]);
44+
]);
4545

4646
return schemaCollection;
4747
}

backend/src/Squidex.Data.MongoDb/Domain/Apps/Entities/Contents/Text/AtlasTextIndex.cs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,8 @@
1717

1818
namespace Squidex.Domain.Apps.Entities.Contents.Text;
1919

20-
public sealed class AtlasTextIndex(IMongoDatabase database, IHttpClientFactory atlasClient, IOptions<AtlasOptions> atlasOptions, string shardKey) : MongoTextIndexBase<Dictionary<string, string>>(database, shardKey, new CommandFactory<Dictionary<string, string>>(BuildTexts))
20+
public sealed class AtlasTextIndex(IMongoDatabase database, IHttpClientFactory atlasClient, IOptions<AtlasOptions> atlasOptions, string shardKey)
21+
: MongoTextIndexBase<Dictionary<string, string>>(database, shardKey, new CommandFactory<Dictionary<string, string>>(BuildTexts))
2122
{
2223
private static readonly LuceneQueryVisitor QueryVisitor = new LuceneQueryVisitor(AtlasIndexDefinition.GetFieldPath);
2324
private static readonly LuceneQueryAnalyzer QueryParser =
Lines changed: 179 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,179 @@
1+
// ==========================================================================
2+
// Squidex Headless CMS
3+
// ==========================================================================
4+
// Copyright (c) Squidex UG (haftungsbeschraenkt)
5+
// All rights reserved. Licensed under the MIT license.
6+
// ==========================================================================
7+
8+
using MongoDB.Driver;
9+
using MongoDB.Driver.GeoJsonObjectModel;
10+
using Squidex.Domain.Apps.Core.Apps;
11+
using Squidex.Infrastructure;
12+
using Squidex.Infrastructure.ObjectPool;
13+
using Squidex.Infrastructure.Translations;
14+
15+
namespace Squidex.Domain.Apps.Entities.Contents.Text;
16+
17+
public sealed class DocumentDbTextIndex(IMongoDatabase database, string shardKey)
18+
: MongoTextIndexBase<string>(database, shardKey, new CommandFactory<string>(BuildTexts))
19+
{
20+
private record struct SearchOperation
21+
{
22+
required public App App { get; init; }
23+
24+
required public List<(DomainId Id, double Score)> Results { get; init; }
25+
26+
required public string SearchTerms { get; init; }
27+
28+
required public int Take { get; set; }
29+
30+
required public SearchScope SearchScope { get; init; }
31+
}
32+
33+
protected override async Task SetupCollectionAsync(IMongoCollection<MongoTextIndexEntity<string>> collection,
34+
CancellationToken ct)
35+
{
36+
await collection.Indexes.CreateOneAsync(
37+
new CreateIndexModel<MongoTextIndexEntity<string>>(
38+
Index.Text(x => x.Texts)),
39+
cancellationToken: ct);
40+
41+
42+
await collection.Indexes.CreateManyAsync(
43+
[
44+
new CreateIndexModel<MongoTextIndexEntity<string>>(
45+
Index
46+
.Ascending(x => x.AppId)
47+
.Ascending(x => x.ContentId)),
48+
49+
new CreateIndexModel<MongoTextIndexEntity<string>>(
50+
Index
51+
.Ascending(x => x.AppId)
52+
.Ascending(x => x.SchemaId)
53+
.Ascending(x => x.GeoField)
54+
.Geo2DSphere(x => x.GeoObject)),
55+
], ct);
56+
}
57+
58+
public override async Task<List<DomainId>?> SearchAsync(App app, GeoQuery query, SearchScope scope,
59+
CancellationToken ct = default)
60+
{
61+
Guard.NotNull(app);
62+
Guard.NotNull(query);
63+
64+
var point = new GeoJsonPoint<GeoJson2DCoordinates>(new GeoJson2DCoordinates(query.Longitude, query.Latitude));
65+
66+
// Use the filter in the correct order to leverage the index in the best way.
67+
var findFilter =
68+
Filter.And(
69+
Filter.Eq(x => x.AppId, app.Id),
70+
Filter.Eq(x => x.SchemaId, query.SchemaId),
71+
Filter.Eq(x => x.GeoField, query.Field),
72+
Filter.NearSphere(x => x.GeoObject, point, query.Radius),
73+
FilterByScope(scope));
74+
75+
var byGeo =
76+
await GetCollection(scope).Find(findFilter).Limit(query.Take)
77+
.Project<MongoTextResult>(Projection.Include(x => x.ContentId))
78+
.ToListAsync(ct);
79+
80+
return byGeo.Select(x => x.ContentId).ToList();
81+
}
82+
83+
public override async Task<List<DomainId>?> SearchAsync(App app, TextQuery query, SearchScope scope,
84+
CancellationToken ct = default)
85+
{
86+
Guard.NotNull(app);
87+
Guard.NotNull(query);
88+
89+
if (string.IsNullOrWhiteSpace(query.Text))
90+
{
91+
return null;
92+
}
93+
94+
// Use a custom tokenizer to leverage stop words from multiple languages.
95+
var search = new SearchOperation
96+
{
97+
App = app,
98+
SearchTerms = Tokenizer.Query(query.Text),
99+
SearchScope = scope,
100+
Results = [],
101+
Take = query.Take,
102+
};
103+
104+
if (query.RequiredSchemaIds?.Count > 0)
105+
{
106+
await SearchBySchemaAsync(search, query.RequiredSchemaIds, 1, ct);
107+
}
108+
else if (query.PreferredSchemaId == null)
109+
{
110+
await SearchByAppAsync(search, 1, ct);
111+
}
112+
else
113+
{
114+
// We cannot write queries that prefer results from the same schema, therefore make two queries.
115+
search.Take /= 2;
116+
117+
// Increasing the scoring of the results from the schema by 10 percent.
118+
await SearchBySchemaAsync(search, Enumerable.Repeat(query.PreferredSchemaId.Value, 1), 1.1, ct);
119+
await SearchByAppAsync(search, 1, ct);
120+
}
121+
122+
return search.Results.OrderByDescending(x => x.Score).Select(x => x.Id).Distinct().ToList();
123+
}
124+
125+
private Task SearchBySchemaAsync(SearchOperation search, IEnumerable<DomainId> schemaIds, double factor,
126+
CancellationToken ct)
127+
{
128+
var filter =
129+
Filter.And(
130+
Filter.Eq(x => x.AppId, search.App.Id),
131+
Filter.Text(search.SearchTerms),
132+
Filter.In(x => x.SchemaId, schemaIds),
133+
FilterByScope(search.SearchScope));
134+
135+
return SearchAsync(search, filter, factor, ct);
136+
}
137+
138+
private Task SearchByAppAsync(SearchOperation search, double factor,
139+
CancellationToken ct)
140+
{
141+
var filter =
142+
Filter.And(
143+
Filter.Eq(x => x.AppId, search.App.Id),
144+
Filter.Text(search.SearchTerms),
145+
FilterByScope(search.SearchScope));
146+
147+
return SearchAsync(search, filter, factor, ct);
148+
}
149+
150+
private async Task SearchAsync(SearchOperation search, FilterDefinition<MongoTextIndexEntity<string>> filter, double factor,
151+
CancellationToken ct)
152+
{
153+
var byText =
154+
await GetCollection(search.SearchScope).Find(filter).Limit(search.Take)
155+
.Project<MongoTextResult>(Projection.Include(x => x.ContentId).MetaTextScore("score")).Sort(Sort.MetaTextScore("score"))
156+
.ToListAsync(ct);
157+
158+
search.Results.AddRange(byText.Select(x => (x.ContentId, x.Score * factor)));
159+
}
160+
161+
private static string BuildTexts(Dictionary<string, string> source)
162+
{
163+
var sb = DefaultPools.StringBuilder.Get();
164+
try
165+
{
166+
foreach (var (key, value) in source)
167+
{
168+
sb.Append(' ');
169+
sb.Append(Tokenizer.Terms(value, key));
170+
}
171+
172+
return sb.ToString();
173+
}
174+
finally
175+
{
176+
DefaultPools.StringBuilder.Return(sb);
177+
}
178+
}
179+
}

backend/src/Squidex.Data.MongoDb/Domain/Apps/Entities/Contents/Text/MongoTextIndex.cs

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,8 @@
1111

1212
namespace Squidex.Domain.Apps.Entities.Contents.Text;
1313

14-
public sealed class MongoTextIndex(IMongoDatabase database, string shardKey) : MongoTextIndexBase<List<MongoTextIndexEntityText>>(database, shardKey, new CommandFactory<List<MongoTextIndexEntityText>>(BuildTexts))
14+
public sealed class MongoTextIndex(IMongoDatabase database, string shardKey)
15+
: MongoTextIndexBase<List<MongoTextIndexEntityText>>(database, shardKey, new CommandFactory<List<MongoTextIndexEntityText>>(BuildTexts))
1516
{
1617
private record struct SearchOperation
1718
{
@@ -33,9 +34,7 @@ protected override async Task SetupCollectionAsync(IMongoCollection<MongoTextInd
3334

3435
await collection.Indexes.CreateOneAsync(
3536
new CreateIndexModel<MongoTextIndexEntity<List<MongoTextIndexEntityText>>>(
36-
Index
37-
.Ascending(x => x.AppId)
38-
.Text("t.t")),
37+
Index.Ascending(x => x.AppId).Text("t.t")),
3938
cancellationToken: ct);
4039
}
4140

backend/src/Squidex.Data.MongoDb/Domain/Apps/Entities/Contents/Text/MongoTextIndexBase.cs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,9 @@ public async virtual Task ExecuteAsync(IndexCommand[] commands,
114114
catch (MongoBulkWriteException ex)
115115
{
116116
// Ignore invalid geo data when writing content. Our insert is unordered anyway.
117-
if (ex.WriteErrors.Any(error => error.Code != MongoDbErrorCodes.Errror16755_InvalidGeoData))
117+
if (!ex.WriteErrors.All(e =>
118+
MongoDbErrorCodes.IsInvalidGeoData(e) ||
119+
MongoDbErrorCodes.IsInvalidDocumentDbGeoData(e)))
118120
{
119121
throw;
120122
}

backend/src/Squidex.Data.MongoDb/Infrastructure/MongoDbErrorCodes.cs

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,19 @@
77

88
#pragma warning disable SA1310 // Field names should not contain underscore
99

10+
using MongoDB.Driver;
11+
1012
namespace Squidex.Infrastructure;
1113

1214
public static class MongoDbErrorCodes
1315
{
14-
public const int Errror16755_InvalidGeoData = 16755;
16+
public static bool IsInvalidGeoData(WriteError error)
17+
{
18+
return error.Code == 16755;
19+
}
20+
21+
public static bool IsInvalidDocumentDbGeoData(WriteError error)
22+
{
23+
return error.Code == 2;
24+
}
1525
}

0 commit comments

Comments
 (0)