Skip to content

Commit d8e8215

Browse files
authored
Search: improve queries (#12569)
- Set some values for fuzzyness. Search terms like `webdav~` timeout ES, with these settings now the query resolves between 400-800ms. Not great, but at least doesn't kill ES. - Enforce a limit of 3 results for inner hits (blocks). - Move the filter query so it's at the top (according to chatgpt this is an improvement). I tested several queries on ES, and all of them take around 200ms, even the simplest one... Sometimes some queries do take more time (randomly). My guess is that some other slow queries add some overhead to the server, so all queries get affected. Other theories are the index size, and using the wrong tokenizer/analyzer for the type of queries users want.
1 parent 4ec5cd5 commit d8e8215

File tree

1 file changed

+11
-5
lines changed

1 file changed

+11
-5
lines changed

readthedocs/search/faceted_search.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -113,10 +113,15 @@ def _get_text_queries(self, *, query, fields):
113113
is_advanced_query = self.use_advanced_query or self._is_advanced_query(query)
114114
for operator in self.operators:
115115
if is_advanced_query:
116+
# See all valid options at:
117+
# https://www.elastic.co/docs/reference/query-languages/query-dsl/query-dsl-simple-query-string-query.
116118
query_string = SimpleQueryString(
117119
query=query,
118120
fields=fields,
119121
default_operator=operator,
122+
# Restrict fuzziness to avoid timeouts with complex queries.
123+
fuzzy_prefix_length=1,
124+
fuzzy_max_expansions=15,
120125
)
121126
else:
122127
query_string = self._get_fuzzy_query(
@@ -283,13 +288,13 @@ def _get_projects_query(self):
283288

284289
if isinstance(self.projects, dict):
285290
versions_query = [
286-
Bool(filter=[Term(project=project), Term(version=version)])
291+
Bool(must=[Term(project=project), Term(version=version)])
287292
for project, version in self.projects.items()
288293
]
289294
return Bool(should=versions_query)
290295

291296
if isinstance(self.projects, list):
292-
return Bool(filter=Terms(project=self.projects))
297+
return Terms(project=self.projects)
293298

294299
raise ValueError("projects must be a list or a dict!")
295300

@@ -312,13 +317,14 @@ def query(self, search, query):
312317
query=query,
313318
path="sections",
314319
fields=self._section_fields,
320+
limit=3,
315321
)
316322
queries.append(sections_nested_query)
317323
bool_query = Bool(should=queries)
318324

319325
projects_query = self._get_projects_query()
320326
if projects_query:
321-
bool_query = Bool(must=[bool_query, projects_query])
327+
bool_query = Bool(must=[bool_query], filter=projects_query)
322328

323329
final_query = FunctionScore(
324330
query=bool_query,
@@ -327,7 +333,7 @@ def query(self, search, query):
327333
search = search.query(final_query)
328334
return search
329335

330-
def _get_nested_query(self, *, query, path, fields):
336+
def _get_nested_query(self, *, query, path, fields, limit=3):
331337
"""Generate a nested query with passed parameters."""
332338
queries = self._get_queries(
333339
query=query,
@@ -348,7 +354,7 @@ def _get_nested_query(self, *, query, path, fields):
348354

349355
return Nested(
350356
path=path,
351-
inner_hits={"highlight": highlight},
357+
inner_hits={"highlight": highlight, "size": limit},
352358
query=bool_query,
353359
)
354360

0 commit comments

Comments
 (0)