Skip to content

Commit a8c7bd0

Browse files
patrykkopycinskiakowalska622
authored andcommitted
[Security Assistant] Fix Inference endpoint setup (elastic#219024)
## Summary Fixes an issue introduced in version 8.18.0, where the system switched to using the default inference endpoint and, as part of this change, cleaned up the dedicated inference endpoint. However, `Index entries` in the Knowledge Base might still reference the dedicated endpoint. This PR addresses the problem by deleting the dedicated endpoint only when it is no longer in use, and adds logic to recreate the endpoint if any `Index entries` still point to it. Additionally, a new `responseTimeout` parameter has been added to the plugin configuration. This allows controlling the response timeout in the Security Assistant, which can be useful when running local models. Testing guideline: 1. Set up version `8.17.5` using [QAF](https://docs.elastic.dev/appex-qa/qaf/getting-started) ```qaf deployments create --stack-version 8.17.5 --no-es-ssl --name upgrade-test-elser --bootstrap-password changeme``` 2. Set up Knowledge base http://localhost:5601/app/management/kibana/securityAiAssistantManagement?tab=knowledge_base 3. Add custom index with `semantic_text` using `elastic-security-ai-assistant-elser2` 4. Add KB `Index entry` using this index 5. Upgrade to `8.18.0` ```qaf deployments upgrade upgrade-test-elser --stack-version 8.18.0 --full-cluster-restart``` 6. Check if `elastic-security-ai-assistant-elser2` was deleted in http://localhost:5601/app/ml/overview 7. Stop kibana ````qaf deployments stop-kibana upgrade-test-elser``` 8. Run kibana on this branch https://github.com/patrykkopycinski/kibana/tree/fix/security-inference-endpoint-8-18 9. Check if `elastic-security-ai-assistant-elser2` was recreated http://localhost:5601/app/ml/overview
1 parent c576530 commit a8c7bd0

File tree

35 files changed

+615
-459
lines changed

35 files changed

+615
-459
lines changed

x-pack/platform/packages/shared/kbn-elastic-assistant-common/constants.ts

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -125,16 +125,3 @@ export const ATTACK_DISCOVERY_ALERTS_COMMON_INDEX_PREFIX =
125125
*/
126126
export const ATTACK_DISCOVERY_ALERTS_AD_HOC_INDEX_RESOURCE_PREFIX =
127127
`${ATTACK_DISCOVERY_ALERTS_COMMON_INDEX_PREFIX}-ad-hoc` as const;
128-
129-
/**
130-
* The server timeout is set to 4 minutes to allow for long-running requests.
131-
* The allows slower LLMs (like Llama 3.1 70B) and complex tasks such as ESQL generation to complete
132-
* without being interrupted.
133-
*/
134-
export const INVOKE_LLM_SERVER_TIMEOUT = 4 * 60 * 1000; // 4 minutes
135-
/**
136-
* The client timeout is set to 3 seconds less than the server timeout to prevent
137-
* the `core-http-browser` from retrying the request.
138-
*
139-
*/
140-
export const INVOKE_LLM_CLIENT_TIMEOUT = INVOKE_LLM_SERVER_TIMEOUT - 3000; // 4 minutes - 3 second

x-pack/platform/packages/shared/kbn-elastic-assistant/impl/assistant/api/chat_complete/use_chat_complete.ts

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,10 @@
66
*/
77

88
import { useCallback, useMemo, useRef, useState } from 'react';
9-
import { INVOKE_LLM_CLIENT_TIMEOUT, PromptIds, Replacements } from '@kbn/elastic-assistant-common';
9+
import { PromptIds, Replacements } from '@kbn/elastic-assistant-common';
1010
import { HttpFetchQuery } from '@kbn/core-http-browser';
1111
import { ChatCompleteResponse, postChatComplete } from './post_chat_complete';
1212
import { useAssistantContext, useLoadConnectors } from '../../../..';
13-
import { FETCH_MESSAGE_TIMEOUT_ERROR } from '../../use_send_message/translations';
1413

1514
interface SendMessageProps {
1615
message: string;
@@ -39,11 +38,6 @@ export const useChatComplete = ({ connectorId }: { connectorId: string }): UseCh
3938
async ({ message, promptIds, replacements, query }: SendMessageProps) => {
4039
setIsLoading(true);
4140

42-
const timeoutId = setTimeout(() => {
43-
abortController.current.abort(FETCH_MESSAGE_TIMEOUT_ERROR);
44-
abortController.current = new AbortController();
45-
}, INVOKE_LLM_CLIENT_TIMEOUT);
46-
4741
try {
4842
return await postChatComplete({
4943
actionTypeId,
@@ -58,7 +52,6 @@ export const useChatComplete = ({ connectorId }: { connectorId: string }): UseCh
5852
traceOptions,
5953
});
6054
} finally {
61-
clearTimeout(timeoutId);
6255
setIsLoading(false);
6356
}
6457
},

x-pack/platform/packages/shared/kbn-elastic-assistant/impl/assistant/use_send_message/index.tsx

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,10 @@
77

88
import { HttpSetup } from '@kbn/core-http-browser';
99
import { useCallback, useRef, useState } from 'react';
10-
import { ApiConfig, INVOKE_LLM_CLIENT_TIMEOUT, Replacements } from '@kbn/elastic-assistant-common';
10+
import { ApiConfig, Replacements } from '@kbn/elastic-assistant-common';
1111
import moment from 'moment';
1212
import { useAssistantContext } from '../../assistant_context';
1313
import { fetchConnectorExecuteAction, FetchConnectorExecuteResponse } from '../api';
14-
import * as i18n from './translations';
1514

1615
interface SendMessageProps {
1716
apiConfig: ApiConfig;
@@ -40,11 +39,6 @@ export const useSendMessage = (): UseSendMessage => {
4039
async ({ apiConfig, http, message, conversationId, replacements }: SendMessageProps) => {
4140
setIsLoading(true);
4241

43-
const timeoutId = setTimeout(() => {
44-
abortController.current.abort(i18n.FETCH_MESSAGE_TIMEOUT_ERROR);
45-
abortController.current = new AbortController();
46-
}, INVOKE_LLM_CLIENT_TIMEOUT);
47-
4842
try {
4943
return await fetchConnectorExecuteAction({
5044
conversationId,
@@ -62,7 +56,6 @@ export const useSendMessage = (): UseSendMessage => {
6256
},
6357
});
6458
} finally {
65-
clearTimeout(timeoutId);
6659
setIsLoading(false);
6760
}
6861
},

x-pack/platform/plugins/shared/ai_infra/product_doc_base/server/services/package_installer/package_installer.test.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,7 @@ describe('PackageInstaller', () => {
125125

126126
expect(populateIndexMock).toHaveBeenCalledTimes(1);
127127
expect(populateIndexMock).toHaveBeenCalledWith({
128+
elserInferenceId: defaultInferenceEndpoints.ELSER,
128129
indexName,
129130
archive: zipArchive,
130131
manifestVersion: TEST_FORMAT_VERSION,

x-pack/platform/plugins/shared/ai_infra/product_doc_base/server/services/package_installer/package_installer.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,7 @@ export class PackageInstaller {
191191
archive: zipArchive,
192192
esClient: this.esClient,
193193
log: this.log,
194+
elserInferenceId: this.elserInferenceId,
194195
});
195196
await this.productDocClient.setInstallationSuccessful(productName, indexName);
196197

x-pack/platform/plugins/shared/ai_infra/product_doc_base/server/services/package_installer/steps/populate_index.ts

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,14 @@ export const populateIndex = async ({
1919
manifestVersion,
2020
archive,
2121
log,
22+
elserInferenceId,
2223
}: {
2324
esClient: ElasticsearchClient;
2425
indexName: string;
2526
manifestVersion: string;
2627
archive: ZipArchive;
2728
log: Logger;
29+
elserInferenceId?: string;
2830
}) => {
2931
log.debug(`Starting populating index ${indexName}`);
3032

@@ -36,7 +38,13 @@ export const populateIndex = async ({
3638
const entryPath = contentEntries[i];
3739
log.debug(`Indexing content for entry ${entryPath}`);
3840
const contentBuffer = await archive.getEntryContent(entryPath);
39-
await indexContentFile({ indexName, esClient, contentBuffer, legacySemanticText });
41+
await indexContentFile({
42+
indexName,
43+
esClient,
44+
contentBuffer,
45+
legacySemanticText,
46+
elserInferenceId,
47+
});
4048
}
4149

4250
log.debug(`Done populating index ${indexName}`);

x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/task.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,8 +68,8 @@ export function naturalLanguageToEsql<TToolOptions extends ToolOptions>({
6868
switchMap((documentationEvent) => {
6969
return askLlmToRespond({
7070
documentationRequest: {
71-
commands: documentationEvent.output.commands,
72-
functions: documentationEvent.output.functions,
71+
commands: documentationEvent.output?.commands,
72+
functions: documentationEvent.output?.functions,
7373
},
7474
});
7575
})

x-pack/solutions/security/plugins/elastic_assistant/server/__mocks__/data_clients.mock.ts

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,6 @@ const createKnowledgeBaseDataClientMock = () => {
9898
getRequiredKnowledgeBaseDocumentEntries: jest.fn(),
9999
getWriter: jest.fn().mockResolvedValue({ bulk: jest.fn() }),
100100
isInferenceEndpointExists: jest.fn(),
101-
getInferenceEndpointId: jest.fn(),
102101
isModelInstalled: jest.fn(),
103102
isSecurityLabsDocsLoaded: jest.fn(),
104103
isSetupAvailable: jest.fn(),

x-pack/solutions/security/plugins/elastic_assistant/server/ai_assistant_data_clients/knowledge_base/index.test.ts

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,20 @@ describe('AIAssistantKnowledgeBaseDataClient', () => {
162162

163163
describe('isInferenceEndpointExists', () => {
164164
it('returns true when the model is fully allocated and started in ESS', async () => {
165+
esClientMock.inference.get.mockResolvedValue({
166+
endpoints: [
167+
{
168+
service_settings: {
169+
model_id: 'elser-id',
170+
deployment_id: ASSISTANT_ELSER_INFERENCE_ID,
171+
allocation_status: { state: 'fully_allocated' },
172+
},
173+
inference_id: ASSISTANT_ELSER_INFERENCE_ID,
174+
task_type: 'sparse_embedding',
175+
service: 'elasticsearch',
176+
},
177+
],
178+
});
165179
const client = new AIAssistantKnowledgeBaseDataClient(mockOptions);
166180
trainedModelsProviderMock.getTrainedModelsStats.mockResolvedValueOnce({
167181
trained_model_stats: [

0 commit comments

Comments
 (0)