From eb87889934bbe693b6fa410a366a1e46ad136242 Mon Sep 17 00:00:00 2001 From: Marius Iversen Date: Wed, 27 Nov 2024 16:30:34 +0100 Subject: [PATCH] [SIEM Rule Migration] Minor prompt improvements (#201941) ## Summary Adding some minor improvements and modifies the format on some of the prompts used in the siem_migration process, a few specifics below: 1. Moved prebuilt rules matching to JSON output parsing, as its less prone for issues on some models. 2. Moved some of the formatting of the prompts to focus on xml-like tags, as it often gives good results on multiple models. Also context that is relevant to perform an action is moved to system, while context related to the question (and to the response) is kept in the human prompt. 3. Moved process_query to ChatPromptTemplate, moved the collection of ResourceContext outside of the prompt because of this. --- .../siem_migrations/rules/task/agent/graph.ts | 2 +- .../match_prebuilt_rule.ts | 46 ++-- .../nodes/match_prebuilt_rule/prompts.ts | 38 +++- .../nodes/process_query/process_query.ts | 14 +- .../nodes/process_query/prompts.ts | 214 ++++++++++-------- .../nodes/retrieve_integrations/prompts.ts | 10 +- .../nodes/translate_rule/translate_rule.ts | 2 +- 7 files changed, 187 insertions(+), 139 deletions(-) diff --git a/x-pack/plugins/security_solution/server/lib/siem_migrations/rules/task/agent/graph.ts b/x-pack/plugins/security_solution/server/lib/siem_migrations/rules/task/agent/graph.ts index 0ec705a9268dc..078b3ffdcdcb4 100644 --- a/x-pack/plugins/security_solution/server/lib/siem_migrations/rules/task/agent/graph.ts +++ b/x-pack/plugins/security_solution/server/lib/siem_migrations/rules/task/agent/graph.ts @@ -20,7 +20,7 @@ export function getRuleMigrationAgent({ connectorId, logger, }: MigrateRuleGraphParams) { - const matchPrebuiltRuleNode = getMatchPrebuiltRuleNode({ model, prebuiltRulesMap, logger }); + const matchPrebuiltRuleNode = getMatchPrebuiltRuleNode({ model, prebuiltRulesMap }); const translationSubGraph = getTranslateRuleGraph({ model, inferenceClient, diff --git a/x-pack/plugins/security_solution/server/lib/siem_migrations/rules/task/agent/nodes/match_prebuilt_rule/match_prebuilt_rule.ts b/x-pack/plugins/security_solution/server/lib/siem_migrations/rules/task/agent/nodes/match_prebuilt_rule/match_prebuilt_rule.ts index 4a0404acf653d..056b41bf088a0 100644 --- a/x-pack/plugins/security_solution/server/lib/siem_migrations/rules/task/agent/nodes/match_prebuilt_rule/match_prebuilt_rule.ts +++ b/x-pack/plugins/security_solution/server/lib/siem_migrations/rules/task/agent/nodes/match_prebuilt_rule/match_prebuilt_rule.ts @@ -5,17 +5,19 @@ * 2.0. */ -import type { Logger } from '@kbn/core/server'; -import { StringOutputParser } from '@langchain/core/output_parsers'; +import { JsonOutputParser } from '@langchain/core/output_parsers'; import type { ChatModel } from '../../../util/actions_client_chat'; -import type { GraphNode } from '../../types'; import { filterPrebuiltRules, type PrebuiltRulesMapByName } from '../../../util/prebuilt_rules'; +import type { GraphNode } from '../../types'; import { MATCH_PREBUILT_RULE_PROMPT } from './prompts'; interface GetMatchPrebuiltRuleNodeParams { model: ChatModel; prebuiltRulesMap: PrebuiltRulesMapByName; - logger: Logger; +} + +interface GetMatchedRuleResponse { + match: string; } export const getMatchPrebuiltRuleNode = @@ -25,34 +27,32 @@ export const getMatchPrebuiltRuleNode = if (!mitreAttackIds?.length) { return {}; } + const filteredPrebuiltRulesMap = filterPrebuiltRules(prebuiltRulesMap, mitreAttackIds); if (filteredPrebuiltRulesMap.size === 0) { return {}; } - const outputParser = new StringOutputParser(); + const outputParser = new JsonOutputParser(); const matchPrebuiltRule = MATCH_PREBUILT_RULE_PROMPT.pipe(model).pipe(outputParser); - const elasticSecurityRules = Array(filteredPrebuiltRulesMap.keys()).join('\n'); - const response = await matchPrebuiltRule.invoke({ + const elasticSecurityRules = [...filteredPrebuiltRulesMap.keys()].join('\n'); + const response = (await matchPrebuiltRule.invoke({ elasticSecurityRules, ruleTitle: state.original_rule.title, - }); - const cleanResponse = response.trim(); - if (cleanResponse === 'no_match') { - return {}; - } - - const result = filteredPrebuiltRulesMap.get(cleanResponse); - if (result != null) { - return { - elastic_rule: { - title: result.rule.name, - description: result.rule.description, - prebuilt_rule_id: result.rule.rule_id, - id: result.installedRuleId, - }, - }; + })) as GetMatchedRuleResponse; + if (response.match) { + const result = filteredPrebuiltRulesMap.get(response.match); + if (result != null) { + return { + elastic_rule: { + title: result.rule.name, + description: result.rule.description, + prebuilt_rule_id: result.rule.rule_id, + id: result.installedRuleId, + }, + }; + } } return {}; diff --git a/x-pack/plugins/security_solution/server/lib/siem_migrations/rules/task/agent/nodes/match_prebuilt_rule/prompts.ts b/x-pack/plugins/security_solution/server/lib/siem_migrations/rules/task/agent/nodes/match_prebuilt_rule/prompts.ts index 434636d0519b1..ab5d7383e27d4 100644 --- a/x-pack/plugins/security_solution/server/lib/siem_migrations/rules/task/agent/nodes/match_prebuilt_rule/prompts.ts +++ b/x-pack/plugins/security_solution/server/lib/siem_migrations/rules/task/agent/nodes/match_prebuilt_rule/prompts.ts @@ -11,25 +11,39 @@ export const MATCH_PREBUILT_RULE_PROMPT = ChatPromptTemplate.fromMessages([ 'system', `You are an expert assistant in Cybersecurity, your task is to help migrating a SIEM detection rule, from Splunk Security to Elastic Security. You will be provided with a Splunk Detection Rule name by the user, your goal is to try find an Elastic Detection Rule that covers the same threat, if any. -The list of Elastic Detection Rules suggested is provided in the context below. +Here are some context for you to reference for your task, read it carefully as you will get questions about it later: -Guidelines: -If there is no Elastic rule in the list that covers the same threat, answer only with the string: no_match -If there is one Elastic rule in the list that covers the same threat, answer only with its name without any further explanation. -If there are multiple rules in the list that cover the same threat, answer with the most specific of them, for example: "Linux User Account Creation" is more specific than "User Account Creation". - - + + {elasticSecurityRules} - + + `, ], [ 'human', - `The Splunk Detection Rule is: -<> + `See the below description of the relevant splunk rule and try to match it with any of the elastic detection rules with similar names. + {ruleTitle} -<> + + + +- Always reply with a JSON object with the key "match" and the value being the most relevant matched elastic detection rule name. Do not reply with anything else. +- Only reply with exact matches, if you are unsure or do not find a very confident match, always reply with an empty string value in the match key, do not guess or reply with anything else. +- If there is one Elastic rule in the list that covers the same threat, set the name of the matching rule as a value of the match key. Do not reply with anything else. +- If there are multiple rules in the list that cover the same threat, answer with the most specific of them, for example: "Linux User Account Creation" is more specific than "User Account Creation". + + + +U: +Linux Auditd Add User Account Type + +A: Please find the match JSON object below: +\`\`\`json +{{"match": "Linux User Account Creation"}} +\`\`\` + `, ], - ['ai', 'Please find the answer below:'], + ['ai', 'Please find the match JSON object below:'], ]); diff --git a/x-pack/plugins/security_solution/server/lib/siem_migrations/rules/task/agent/sub_graphs/translate_rule/nodes/process_query/process_query.ts b/x-pack/plugins/security_solution/server/lib/siem_migrations/rules/task/agent/sub_graphs/translate_rule/nodes/process_query/process_query.ts index 0f90d74dafba3..97d4168f1283e 100644 --- a/x-pack/plugins/security_solution/server/lib/siem_migrations/rules/task/agent/sub_graphs/translate_rule/nodes/process_query/process_query.ts +++ b/x-pack/plugins/security_solution/server/lib/siem_migrations/rules/task/agent/sub_graphs/translate_rule/nodes/process_query/process_query.ts @@ -10,7 +10,7 @@ import { isEmpty } from 'lodash/fp'; import type { ChatModel } from '../../../../../util/actions_client_chat'; import type { RuleResourceRetriever } from '../../../../../util/rule_resource_retriever'; import type { GraphNode } from '../../types'; -import { getReplaceQueryResourcesPrompt } from './prompts'; +import { REPLACE_QUERY_RESOURCE_PROMPT, getResourcesContext } from './prompts'; interface GetProcessQueryNodeParams { model: ChatModel; @@ -25,9 +25,15 @@ export const getProcessQueryNode = ({ let query = state.original_rule.query; const resources = await resourceRetriever.getResources(state.original_rule); if (!isEmpty(resources)) { - const replaceQueryResourcesPrompt = getReplaceQueryResourcesPrompt(state, resources); - const stringParser = new StringOutputParser(); - query = await model.pipe(stringParser).invoke(replaceQueryResourcesPrompt); + const replaceQueryParser = new StringOutputParser(); + const replaceQueryResourcePrompt = + REPLACE_QUERY_RESOURCE_PROMPT.pipe(model).pipe(replaceQueryParser); + const resourceContext = getResourcesContext(resources); + query = await replaceQueryResourcePrompt.invoke({ + query: state.original_rule.query, + macros: resourceContext.macros, + lookup_tables: resourceContext.lists, + }); } return { inline_query: query }; }; diff --git a/x-pack/plugins/security_solution/server/lib/siem_migrations/rules/task/agent/sub_graphs/translate_rule/nodes/process_query/prompts.ts b/x-pack/plugins/security_solution/server/lib/siem_migrations/rules/task/agent/sub_graphs/translate_rule/nodes/process_query/prompts.ts index 5d2e6648c1d85..f074da6b27d1a 100644 --- a/x-pack/plugins/security_solution/server/lib/siem_migrations/rules/task/agent/sub_graphs/translate_rule/nodes/process_query/prompts.ts +++ b/x-pack/plugins/security_solution/server/lib/siem_migrations/rules/task/agent/sub_graphs/translate_rule/nodes/process_query/prompts.ts @@ -5,119 +5,147 @@ * 2.0. */ +import { ChatPromptTemplate } from '@langchain/core/prompts'; import type { RuleMigrationResources } from '../../../../../util/rule_resource_retriever'; -import type { TranslateRuleState } from '../../types'; -const getResourcesContext = (resources: RuleMigrationResources): string => { - const resourcesContext = []; +interface ResourceContext { + macros: string; + lists: string; +} + +export const getResourcesContext = (resources: RuleMigrationResources): ResourceContext => { + const result: ResourceContext = { macros: '', lists: '' }; + + // Process macros if (resources.macro?.length) { - const macrosSummary = resources.macro - .map((macro) => `\`${macro.name}\`: ${macro.content}`) - .join('\n'); - resourcesContext.push('<>', macrosSummary, '<>'); + const macrosMap = resources.macro.reduce((acc, macro) => { + acc[macro.name] = macro.content; + return acc; + }, {} as Record); + + result.macros = JSON.stringify(macrosMap, null, 2); } + + // Process lists if (resources.list?.length) { - const lookupsSummary = resources.list - .map((list) => `lookup ${list.name}: ${list.content}`) - .join('\n'); - resourcesContext.push('<>', lookupsSummary, '<>'); + const listsMap = resources.list.reduce((acc, list) => { + acc[list.name] = list.content; + return acc; + }, {} as Record); + + result.lists = JSON.stringify(listsMap, null, 2); } - return resourcesContext.join('\n'); + + return result; }; -export const getReplaceQueryResourcesPrompt = ( - state: TranslateRuleState, - resources: RuleMigrationResources -): string => { - const resourcesContext = getResourcesContext(resources); - return `You are an agent expert in Splunk SPL (Search Processing Language). +export const REPLACE_QUERY_RESOURCE_PROMPT = ChatPromptTemplate.fromMessages([ + [ + 'system', + `You are an agent expert in Splunk SPL (Search Processing Language). Your task is to inline a set of macros and lookup tables syntax using their values in a SPL query. +Here are some context for you to reference for your task, read it carefully as you will get questions about it later: -# Guidelines -- You will be provided with a SPL query and also the resources reference with the values of macros and lookup tables. -- You have to replace the macros and lookup tables syntax in the SPL query and use their values inline, if provided. -- The original and modified queries must be equivalent. - -# Process -- Go through the SPL query and identify all the macros and lookup tables that are used. Two scenarios may arise: - - The macro or lookup table is provided in the resources: Replace it using its actual content. - - The macro or lookup table is not provided in the resources: Do not replace it, keep it in the query as it is. - -## Macros replacements + + -### Notes: +Always follow the below guidelines when replacing macros: - Macros names have the number of arguments in parentheses, e.g., \`macroName(2)\`. You must replace the correct macro accounting for the number of arguments. -### Example: - Having the following macros: - \`someSource\`: sourcetype="somesource" - \`searchTitle(1)\`: search title="$value$" - \`searchTitle\`: search title=* - \`searchType\`: search type=* - And the following SPL query: - \`\`\`spl - \`someSource\` \`someFilter\` - | \`searchTitle("sometitle")\` - | \`searchType("sometype")\` - | table * - \`\`\` - The correct replacement would be: - \`\`\`spl - sourcetype="somesource" \`someFilter\` - | search title="sometitle" - | \`searchType("sometype")\` - | table * - \`\`\` - -## Lookups replacements - -### Notes: +Having the following macros: + \`someSource\`: sourcetype="somesource" + \`searchTitle(1)\`: search title="$value$" + \`searchTitle\`: search title=* + \`searchType\`: search type=* +And the following SPL query: + \`\`\`spl + \`someSource\` \`someFilter\` + | \`searchTitle("sometitle")\` + | \`searchType("sometype")\` + | table * + \`\`\` +The correct replacement would be: + \`\`\`spl + sourcetype="somesource" \`someFilter\` + | search title="sometitle" + | \`searchType("sometype")\` + | table * + \`\`\` + + + +Always follow the below guidelines when replacing lookup tables: - OUTPUTNEW and OUTPUT fields should be replaced with the values from the lookup table. - Use the \`case\` function to evaluate conditions in the same order provided by the lookup table. - Ensure all lookup matching fields are correctly matched to their respective case conditions. - If there are more than one field to match, use the \`AND\` operator to combine them inside the \`case\` function. - The transformed SPL query should function equivalently to the original query with the \`lookup\` command. -### Example: - Having the following lookup table: - uid,username,department - 1066,Claudia Garcia,Engineering - 1690,Rutherford Sullivan,Engineering - 1815,Vanya Patel,IT - 1862,Wei Zhang,Engineering - 1916,Alex Martin,Personnel - And the following SPL query: - \`\`\`spl - ... | lookup users uid OUTPUTNEW username, department - \`\`\` - The correct replacement would be: - \`\`\`spl - ... | eval username=case(uid=1066, "Claudia Garcia", - uid=1690, "Rutherford Sullivan", - uid=1815, "Vanya Patel", - uid=1862, "Wei Zhang", - uid=1916, "Alex Martin", - true, null), - department=case(uid=1066, "Engineering", - uid=1690, "Engineering", - uid=1815, "IT", - uid=1862, "Engineering", - uid=1916, "Personnel", - true, null) - \`\`\` - - -## Important: You must respond only with the modified query inside a \`\`\`spl code block, nothing else. - -# Find the macros and lookup tables below: - -${resourcesContext} - -# Find the SPL query below: +Having the following lookup table: + uid,username,department + 1066,Claudia Garcia,Engineering + 1690,Rutherford Sullivan,Engineering + 1815,Vanya Patel,IT + 1862,Wei Zhang,Engineering + 1916,Alex Martin,Personnel +And the following SPL query: + \`\`\`spl + ... | lookup users uid OUTPUTNEW username, department + \`\`\` +The correct replacement would be: + \`\`\`spl + ... | eval username=case(uid=1066, "Claudia Garcia", + uid=1690, "Rutherford Sullivan", + uid=1815, "Vanya Patel", + uid=1862, "Wei Zhang", + uid=1916, "Alex Martin", + true, null), + department=case(uid=1066, "Engineering", + uid=1690, "Engineering", + uid=1815, "IT", + uid=1862, "Engineering", + uid=1916, "Personnel", + true, null) + \`\`\` + +`, + ], + [ + 'human', + `Go through the SPL query and identify all the macros and lookup tables that are used. + +{macros} + + +{lookup_tables} + + + \`\`\`spl -${state.original_rule.query} +{query} \`\`\` + + +Divide the query up into separate section and go through each section one at a time to identify the macros and lookup tables used that need to be replaced using one of two scenarios: +- The macro or lookup table is provided in the resources: Replace it using its actual content. +- The macro or lookup table is not provided in the resources: Do not replace it, keep it in the query as it is. -`; -}; + +- You will be provided with a SPL query and also the resources reference with the values of macros and lookup tables. +- You have to replace the macros and lookup tables syntax in the SPL query and use their values inline, if provided. +- The original and modified queries must be equivalent. +- You must respond only with the modified query inside a \`\`\`spl code block, nothing else similar to the example response below. + + + +A: Please find the modified SPL query below: +\`\`\`json +{{"match": "Linux User Account Creation"}} +\`\`\` + + +`, + ], + ['ai', 'Please find the modified SPL query below:'], +]); diff --git a/x-pack/plugins/security_solution/server/lib/siem_migrations/rules/task/agent/sub_graphs/translate_rule/nodes/retrieve_integrations/prompts.ts b/x-pack/plugins/security_solution/server/lib/siem_migrations/rules/task/agent/sub_graphs/translate_rule/nodes/retrieve_integrations/prompts.ts index 4d15ad71d6794..962de190acd02 100644 --- a/x-pack/plugins/security_solution/server/lib/siem_migrations/rules/task/agent/sub_graphs/translate_rule/nodes/retrieve_integrations/prompts.ts +++ b/x-pack/plugins/security_solution/server/lib/siem_migrations/rules/task/agent/sub_graphs/translate_rule/nodes/retrieve_integrations/prompts.ts @@ -11,9 +11,9 @@ export const CREATE_SEMANTIC_QUERY_PROMPT = ChatPromptTemplate.fromMessages([ [ 'system', `You are a helpful assistant that helps in translating provided titles, descriptions and data sources into a single summary of keywords specifically crafted to be used as a semantic search query, which are usually short and includes keywords that are valid for the usecase. - The data provided are collected from SIEM detection rules, and it is trying to match the description of a list of data sources, so provide good keywords that match this usecase. - Try to also detect what sort of vendor, solution or technology is required and add these as keywords as well. - Some examples would be to identify if its cloud, which vendor, network, host, endpoint, etc.`, +The data provided are collected from SIEM detection rules, and it is trying to match the description of a list of data sources, so provide good keywords that match this usecase. +Try to also detect what sort of vendor, solution or technology is required and add these as keywords as well. +Some examples would be to identify if its cloud, which vendor, network, host, endpoint, etc.`, ], [ 'human', @@ -32,7 +32,7 @@ Go through the relevant title, description and data sources from the above query - Always reply with a JSON object with the key "query" and the value as the semantic search query inside three backticks as shown in the below example. - + U: Title: Processes created by netsh Description: This search looks for processes launching netsh.exe to execute various commands via the netsh command-line utility. Netsh.exe is a command-line scripting utility that allows you to, either locally or remotely, display or modify the network configuration of a computer that is currently running. Netsh can be used as a persistence proxy technique to execute a helper .dll when netsh.exe is executed. In this search, we are looking for processes spawned by netsh.exe that are executing commands via the command line. Deprecated because we have another detection of the same type. @@ -42,7 +42,7 @@ A: Please find the query keywords JSON object below: \`\`\`json {{"query": "windows host endpoint netsh.exe process creation command-line utility network configuration persistence proxy dll execution sysmon event id 1"}} \`\`\` -`, +`, ], ['ai', 'Please find the query keywords JSON object below:'], ]); diff --git a/x-pack/plugins/security_solution/server/lib/siem_migrations/rules/task/agent/sub_graphs/translate_rule/nodes/translate_rule/translate_rule.ts b/x-pack/plugins/security_solution/server/lib/siem_migrations/rules/task/agent/sub_graphs/translate_rule/nodes/translate_rule/translate_rule.ts index 3fcd968b55650..a39e7c10146c0 100644 --- a/x-pack/plugins/security_solution/server/lib/siem_migrations/rules/task/agent/sub_graphs/translate_rule/nodes/translate_rule/translate_rule.ts +++ b/x-pack/plugins/security_solution/server/lib/siem_migrations/rules/task/agent/sub_graphs/translate_rule/nodes/translate_rule/translate_rule.ts @@ -34,7 +34,7 @@ export const getTranslateRuleNode = ({ ); const integrationIds = state.integrations.map((integration) => integration.id); - const prompt = getEsqlTranslationPrompt(state, indexPatterns.join(' ')); + const prompt = getEsqlTranslationPrompt(state, indexPatterns.join(',')); const response = await esqlKnowledgeBaseCaller(prompt); const esqlQuery = response.match(/```esql\n([\s\S]*?)\n```/)?.[1] ?? '';