@@ -50,14 +50,31 @@ export async function postDocuments(request: HttpRequest, context: InvocationCon
5050
5151 // Initialize embeddings model and vector database
5252 const embeddings = new AzureOpenAIEmbeddings ( { azureADTokenProvider } ) ;
53- await AzureCosmosDBNoSQLVectorStore . fromDocuments ( documents , embeddings , { credentials } ) ;
53+ const store = await AzureCosmosDBNoSQLVectorStore . fromDocuments ( [ ] , embeddings , { credentials } ) ;
54+
55+ // Remove existing documents with the same filename to avoid duplicates
56+ try {
57+ await store . delete ( {
58+ filter : `SELECT * FROM c WHERE c.metadata.source = "${ filename . replaceAll ( '"' , '\\"' ) } "` ,
59+ } ) ;
60+ } catch ( error : unknown ) {
61+ // If deletion fails (e.g., container doesn't exist yet), just log and continue
62+ context . log ( `Warning: Could not delete existing documents: ${ ( error as Error ) . message } ` ) ;
63+ }
64+
65+ // Add the new documents
66+ await store . addDocuments ( documents ) ;
5467 } else {
5568 // If no environment variables are set, it means we are running locally
5669 context . log ( 'No Azure OpenAI endpoint set, using Ollama models and local DB' ) ;
5770 const embeddings = new OllamaEmbeddings ( { model : ollamaEmbeddingsModel } ) ;
5871 const folderExists = await checkFolderExists ( faissStoreFolder ) ;
5972 if ( folderExists ) {
6073 const store = await FaissStore . load ( faissStoreFolder , embeddings ) ;
74+
75+ // Remove existing documents with the same filename to avoid duplicates
76+ await removeDuplicateDocuments ( store , filename ) ;
77+
6178 await store . addDocuments ( documents ) ;
6279 await store . save ( faissStoreFolder ) ;
6380 } else {
@@ -90,6 +107,25 @@ export async function postDocuments(request: HttpRequest, context: InvocationCon
90107 }
91108}
92109
110+ async function removeDuplicateDocuments ( store : FaissStore , filename : string ) : Promise < void > {
111+ const docstore = store . getDocstore ( ) ;
112+ const mapping = store . getMapping ( ) ;
113+ const idsToDelete : string [ ] = [ ] ;
114+
115+ // Find all document IDs that have the same filename
116+ for ( const [ vectorIndex , documentId ] of Object . entries ( mapping ) ) {
117+ const document = docstore . search ( documentId ) ;
118+ if ( document && document . metadata ?. source === filename ) {
119+ idsToDelete . push ( documentId ) ;
120+ }
121+ }
122+
123+ // Delete the existing documents with the same filename
124+ if ( idsToDelete . length > 0 ) {
125+ await store . delete ( { ids : idsToDelete } ) ;
126+ }
127+ }
128+
93129async function checkFolderExists ( folderPath : string ) : Promise < boolean > {
94130 try {
95131 const stats = await fs . stat ( folderPath ) ;
0 commit comments