Skip to content

Commit 986054f

Browse files
feat: delete all documents for AzureAISearch DocumentStore (#2404)
Co-authored-by: Julian Risch <[email protected]>
1 parent 5641b7c commit 986054f

File tree

2 files changed

+51
-0
lines changed

2 files changed

+51
-0
lines changed

integrations/azure_ai_search/src/haystack_integrations/document_stores/azure_ai_search/document_store.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -385,6 +385,44 @@ def delete_documents(self, document_ids: List[str]) -> None:
385385
if documents:
386386
self.client.delete_documents(documents)
387387

388+
def delete_all_documents(self, recreate_index: bool = False) -> None: # noqa: FBT002, FBT001
389+
"""
390+
Deletes all documents in the document store.
391+
392+
:param recreate_index: If True, the index will be deleted and recreated with the original schema.
393+
If False, all documents will be deleted while preserving the index.
394+
"""
395+
try:
396+
if recreate_index:
397+
# Get current index definition
398+
if self._index_client is None:
399+
msg = "Index client is not initialized"
400+
raise ValueError(msg)
401+
current_index = self._index_client.get_index(self._index_name)
402+
403+
# Delete and recreate index
404+
self._index_client.delete_index(self._index_name)
405+
self._index_client.create_index(current_index)
406+
logger.info("Index '{idx_name}' recreated with original schema.", idx_name=self._index_name)
407+
else:
408+
# Delete all documents without recreating index
409+
if self.count_documents() == 0:
410+
return
411+
412+
# Search for all documents (pagination handled by Azure SDK)
413+
all_docs = list(self.client.search(search_text="*", select=["id"], top=100000))
414+
415+
if all_docs:
416+
self.client.delete_documents(all_docs)
417+
logger.info(
418+
"Deleted {n_docs} documents from index '{idx_name}'.",
419+
n_docs=len(all_docs),
420+
idx_name=self._index_name,
421+
)
422+
except Exception as e:
423+
msg = f"Failed to delete all documents from Azure AI Search: {e!s}"
424+
raise HttpResponseError(msg) from e
425+
388426
def get_documents_by_id(self, document_ids: List[str]) -> List[Document]:
389427
return self._convert_search_result_to_documents(self._get_raw_documents_by_id(document_ids))
390428

integrations/azure_ai_search/tests/test_document_store.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,19 @@ def test_write_documents_duplicate_fail(self, document_store: AzureAISearchDocum
291291
@pytest.mark.skip(reason="Azure AI search index overwrites duplicate documents by default")
292292
def test_write_documents_duplicate_skip(self, document_store: AzureAISearchDocumentStore): ...
293293

294+
def test_delete_all_documents(self, document_store: AzureAISearchDocumentStore):
295+
docs = [Document(content="first doc"), Document(content="second doc")]
296+
document_store.write_documents(docs)
297+
assert document_store.count_documents() == 2
298+
299+
document_store.delete_all_documents()
300+
assert document_store.count_documents() == 0
301+
302+
def test_delete_all_documents_empty_index(self, document_store: AzureAISearchDocumentStore):
303+
assert document_store.count_documents() == 0
304+
document_store.delete_all_documents()
305+
assert document_store.count_documents() == 0
306+
294307

295308
def _random_embeddings(n):
296309
return [round(random.random(), 7) for _ in range(n)] # nosec: S311

0 commit comments

Comments
 (0)