Skip to content

Commit 1cdf1ef

Browse files
committed
fix(search): Cut off result set size
1 parent fd5a3c7 commit 1cdf1ef

File tree

4 files changed

+40
-19
lines changed

4 files changed

+40
-19
lines changed

src/core/search/index_result.h

Lines changed: 24 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,8 @@ class IndexResult {
3939

4040
BorrowedView Borrowed() const;
4141

42-
// Move out of owned or copy borrowed
43-
DocVec Take();
42+
// Move out of owned or copy borrowed. Take up to `limit` entries and return original size.
43+
std::pair<DocVec, size_t /* full size */> Take(size_t limit = std::numeric_limits<size_t>::max());
4444

4545
private:
4646
bool IsOwned() const;
@@ -82,18 +82,34 @@ inline IndexResult::BorrowedView IndexResult::Borrowed() const {
8282
return std::visit(cb, value_);
8383
}
8484

85-
inline IndexResult::DocVec IndexResult::Take() {
85+
inline std::pair<IndexResult::DocVec, size_t> IndexResult::Take(size_t limit) {
8686
if (IsOwned()) {
87-
return std::move(std::get<DocVec>(value_));
87+
auto& vec = std::get<DocVec>(value_);
88+
size_t size = vec.size();
89+
return {std::move(vec), size};
8890
}
8991

90-
auto cb = [](auto* set) -> DocVec {
92+
// Numeric ranges need to be filtered and don't know their size ahead
93+
if (std::holds_alternative<RangeResult>(value_)) {
94+
auto cb = [limit](auto* range) -> std::pair<DocVec, size_t> {
95+
DocVec out;
96+
out.reserve(range->size());
97+
for (auto it = range->begin(); it != range->end(); ++it)
98+
out.push_back(*it);
99+
size_t total = out.size();
100+
out.resize(std::min(out.size(), limit));
101+
return {std::move(out), total};
102+
};
103+
return std::visit(cb, Borrowed());
104+
}
105+
106+
// Generic borrowed results sets don't need to be filtered, so we can tell the result size ahead
107+
auto cb = [limit](auto* set) -> std::pair<DocVec, size_t> {
91108
DocVec out;
92109
out.reserve(set->size());
93-
for (auto it = set->begin(); it != set->end(); ++it) {
110+
for (auto it = set->begin(); it != set->end() && out.size() < limit; ++it)
94111
out.push_back(*it);
95-
}
96-
return out;
112+
return {std::move(out), set->size()};
97113
};
98114
return std::visit(cb, Borrowed());
99115
}

src/core/search/search.cc

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -289,7 +289,7 @@ struct BasicSearch {
289289

290290
// negate -(*subquery*): explicitly compute result complement. Needs further optimizations
291291
IndexResult Search(const AstNegateNode& node, string_view active_field) {
292-
vector<DocId> matched = SearchGeneric(*node.node, active_field).Take();
292+
auto matched = SearchGeneric(*node.node, active_field).Take().first;
293293
vector<DocId> all = indices_->GetAllDocs();
294294

295295
// To negate a result, we have to find the complement of matched to all documents,
@@ -358,7 +358,7 @@ struct BasicSearch {
358358
knn_distances_ = vec_index->Knn(knn.vec.first.get(), knn.limit, knn.ef_runtime);
359359
else
360360
knn_distances_ =
361-
vec_index->Knn(knn.vec.first.get(), knn.limit, knn.ef_runtime, sub_results.Take());
361+
vec_index->Knn(knn.vec.first.get(), knn.limit, knn.ef_runtime, sub_results.Take().first);
362362
}
363363

364364
// [KNN limit @field vec]: Compute distance from `vec` to all vectors keep closest `limit`
@@ -405,7 +405,6 @@ struct BasicSearch {
405405

406406
// Top level results don't need to be sorted, because they will be scored, sorted by fields or
407407
// used by knn
408-
409408
DCHECK(top_level || holds_alternative<AstKnnNode>(node.Variant()) ||
410409
holds_alternative<AstGeoNode>(node.Variant()) ||
411410
visit([](auto* set) { return is_sorted(set->begin(), set->end()); }, result.Borrowed()));
@@ -416,16 +415,15 @@ struct BasicSearch {
416415
return result;
417416
}
418417

419-
SearchResult Search(const AstNode& query) {
418+
SearchResult Search(const AstNode& query, size_t cuttoff_limit) {
420419
IndexResult result = SearchGeneric(query, "", true);
421420

422421
// Extract profile if enabled
423422
optional<AlgorithmProfile> profile =
424423
profile_builder_ ? make_optional(profile_builder_->Take()) : nullopt;
425424

426-
auto out = result.Take();
427-
const size_t total = out.size();
428-
return SearchResult{total, std::move(out), std::move(knn_scores_), std::move(profile),
425+
auto [out, total_size] = result.Take(cuttoff_limit);
426+
return SearchResult{total_size, std::move(out), std::move(knn_scores_), std::move(profile),
429427
std::move(error_)};
430428
}
431429

@@ -654,11 +652,11 @@ bool SearchAlgorithm::Init(string_view query, const QueryParams* params,
654652
return true;
655653
}
656654

657-
SearchResult SearchAlgorithm::Search(const FieldIndices* index) const {
655+
SearchResult SearchAlgorithm::Search(const FieldIndices* index, size_t cuttoff_limit) const {
658656
auto bs = BasicSearch{index};
659657
if (profiling_enabled_)
660658
bs.EnableProfiling();
661-
return bs.Search(*query_);
659+
return bs.Search(*query_, cuttoff_limit);
662660
}
663661

664662
optional<KnnScoreSortOption> SearchAlgorithm::GetKnnScoreSortOption() const {

src/core/search/search.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -197,7 +197,9 @@ class SearchAlgorithm {
197197
bool Init(std::string_view query, const QueryParams* params,
198198
const OptionalFilters* filters = nullptr);
199199

200-
SearchResult Search(const FieldIndices* index) const;
200+
// Search on given index with predefined limit for cutting off result ids
201+
SearchResult Search(const FieldIndices* index,
202+
size_t cuttoff_limit = std::numeric_limits<size_t>::max()) const;
201203

202204
// if enabled, return limit & alias for knn query
203205
std::optional<KnnScoreSortOption> GetKnnScoreSortOption() const;

src/server/search/doc_index.cc

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -408,7 +408,12 @@ vector<search::SortableValue> ShardDocIndex::KeepTopKSorted(vector<DocId>* ids,
408408
SearchResult ShardDocIndex::Search(const OpArgs& op_args, const SearchParams& params,
409409
search::SearchAlgorithm* search_algo) const {
410410
size_t limit = params.limit_offset + params.limit_total;
411-
auto result = search_algo->Search(&*indices_);
411+
412+
// If we don't sort the documents, we don't need to copy more ids than are requested
413+
bool can_cut = !params.sort_option && !search_algo->GetKnnScoreSortOption();
414+
size_t id_cutoff_limit = can_cut ? limit : numeric_limits<size_t>::max();
415+
416+
auto result = search_algo->Search(&*indices_, id_cutoff_limit);
412417
if (!result.error.empty())
413418
return {facade::ErrorReply(std::move(result.error))};
414419

0 commit comments

Comments
 (0)