diff --git a/Cargo.lock b/Cargo.lock index 0cf8e029b..d270bafee 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -694,8 +694,9 @@ dependencies = [ [[package]] name = "async-openai" -version = "0.24.0" -source = "git+https://github.com/chirino/async-openai?branch=optional-fields#7b9fe4b17c9f1985a296a313adf9e641a5812163" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c6db3286b4f52b6556ac5208fb575d035eca61a2bf40d7e75d1db2733ffc599f" dependencies = [ "async-convert", "backoff", @@ -1793,7 +1794,7 @@ dependencies = [ "cssparser-macros", "dtoa-short", "itoa", - "phf 0.10.1", + "phf 0.11.2", "smallvec", ] @@ -3811,8 +3812,8 @@ dependencies = [ [[package]] name = "langchain-rust" -version = "4.4.3" -source = "git+https://github.com/chirino/langchain-rust?branch=remove-extraneous-agent-msg#358471ccdfa8e39c65de2a3058ffe19f93c6bb78" +version = "4.6.0" +source = "git+https://github.com/chirino/langchain-rust?branch=main#96c615bd6db4284d17eb24f8e730304f8a6e2563" dependencies = [ "async-openai", "async-recursion", @@ -4526,7 +4527,7 @@ version = "5.0.0-rc.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "23d385da3c602d29036d2f70beed71c36604df7570be17fed4c5b839616785bf" dependencies = [ - "base64 0.21.7", + "base64 0.22.1", "chrono", "getrandom", "http 1.1.0", @@ -5119,9 +5120,7 @@ version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fabbf1ead8a5bcbc20f5f8b939ee3f5b0f6f281b6ad3468b84656b658b455259" dependencies = [ - "phf_macros", "phf_shared 0.10.0", - "proc-macro-hack", ] [[package]] @@ -5130,6 +5129,7 @@ version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ade2d8b8f33c7333b51bcf0428d37e217e9f32192ae4772156f65063b8ce03dc" dependencies = [ + "phf_macros", "phf_shared 0.11.2", ] @@ -5175,16 +5175,15 @@ dependencies = [ [[package]] name = "phf_macros" -version = "0.10.0" +version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "58fdf3184dd560f160dd73922bea2d5cd6e8f064bf4b13110abd81b03697b4e0" +checksum = "3444646e286606587e49f3bcf1679b8cef1dc2c5ecc29ddacaffc305180d464b" dependencies = [ - "phf_generator 0.10.0", - "phf_shared 0.10.0", - "proc-macro-hack", + "phf_generator 0.11.2", + "phf_shared 0.11.2", "proc-macro2", "quote", - "syn 1.0.109", + "syn 2.0.79", ] [[package]] @@ -5460,12 +5459,6 @@ dependencies = [ "syn 2.0.79", ] -[[package]] -name = "proc-macro-hack" -version = "0.5.20+deprecated" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc375e1527247fe1a97d8b7156678dfe7c1af2fc075c9a4db3690ecd2a148068" - [[package]] name = "proc-macro2" version = "1.0.86" @@ -6789,7 +6782,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "13261ee216b44d932ef93b2d4a75d45199bef77864bcc5b77ecfc7bc0ecb02d6" dependencies = [ "anyhow", - "base64 0.21.7", + "base64 0.22.1", "buffered-reader", "chrono", "dyn-clone", @@ -9465,7 +9458,7 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" dependencies = [ - "windows-sys 0.48.0", + "windows-sys 0.59.0", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 9fe541725..ee8e2554e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -43,6 +43,7 @@ async-graphql = "7.0.5" async-graphql-actix-web = "7.0.5" async-std = "1" async-trait = "0.1.74" +async-openai = "0.25.0" base64 = "0.22" biscuit = "0.7" build-info = "0.0.38" @@ -76,7 +77,7 @@ itertools = "0.13" jsn = "0.14" json-merge-patch = "0.0.1" jsonpath-rust = "0.7.0" -langchain-rust = { version = "4.4.3" } +langchain-rust = { version = "4.6.0" } lenient_semver = "0.4.2" liblzma = "0.3" libz-sys = "*" @@ -194,8 +195,6 @@ postgresql_commands = { version = "0.16.3", default-features = false, features = # required due to https://github.com/voteblake/csaf-rs/pull/29 csaf = { git = "https://github.com/chirino/csaf-rs", rev = "414896904bc5e5287fd88b1daef5c27f70503d01" } -# to pickup up fix: https://github.com/64bit/async-openai/pull/263 and https://github.com/64bit/async-openai/pull/267 -# needed to work against groq.com API -async-openai = { git = "https://github.com/chirino/async-openai", branch = "optional-fields" } -# to pickup fix https://github.com/Abraxas-365/langchain-rust/pull/236 -langchain-rust = { git = "https://github.com/chirino/langchain-rust", branch = "remove-extraneous-agent-msg" } +# to pickup fix: https://github.com/Abraxas-365/langchain-rust/pull/246 +# and fix: https://github.com/Abraxas-365/langchain-rust/pull/250 +langchain-rust = { git = "https://github.com/chirino/langchain-rust", branch = "main" } diff --git a/modules/fundamental/src/advisory/service/test.rs b/modules/fundamental/src/advisory/service/test.rs index b9901d967..dc0d77b01 100644 --- a/modules/fundamental/src/advisory/service/test.rs +++ b/modules/fundamental/src/advisory/service/test.rs @@ -190,7 +190,25 @@ async fn single_advisory(ctx: &TrustifyContext) -> Result<(), anyhow::Error> { let jenny384 = Id::sha384(&digests.sha384); let jenny512 = Id::sha512(&digests.sha512); let fetched = fetch.fetch_advisory(jenny256.clone(), ()).await?; + let id = Id::Uuid(fetched.as_ref().unwrap().head.uuid); + assert!(matches!( + fetched, + Some(AdvisoryDetails { + head: AdvisoryHead { .. }, + source_document: Some(SourceDocument { + sha256, + sha384, + sha512, + .. + }), + average_severity: Some(average_severity), + + .. + }) + if sha256 == jenny256.to_string() && sha384 == jenny384.to_string() && sha512 == jenny512.to_string() && average_severity == Severity::Critical)); + + let fetched = fetch.fetch_advisory(id, ()).await?; assert!(matches!( fetched, Some(AdvisoryDetails { diff --git a/modules/fundamental/src/ai/endpoints/expected_tools_result.json b/modules/fundamental/src/ai/endpoints/expected_tools_result.json new file mode 100644 index 000000000..561564c9a --- /dev/null +++ b/modules/fundamental/src/ai/endpoints/expected_tools_result.json @@ -0,0 +1,66 @@ +[ + { + "name": "cve-info", + "description": "This tool can be used to get information about a Vulnerability.\nA Vulnerability is known as a CVE.\n\nVulnerabilities are security issues that may affect software packages.\nVulnerabilities may affect multiple packages.\n\nVulnerability are identified by their CVE Identifier. Examples:\n* CVE-2014-0160\n\nThe input should be the partial name of the Vulnerability to search for.\nWhen the input is a full CVE ID, the tool will provide information about the vulnerability.\nWhen the input is a partial name, the tool will provide a list of possible matches.", + "parameters": { + "type": "object", + "properties": { + "input": { + "type": "string", + "description": "This tool can be used to get information about a Vulnerability.\nA Vulnerability is known as a CVE.\n\nVulnerabilities are security issues that may affect software packages.\nVulnerabilities may affect multiple packages.\n\nVulnerability are identified by their CVE Identifier. Examples:\n* CVE-2014-0160\n\nThe input should be the partial name of the Vulnerability to search for.\nWhen the input is a full CVE ID, the tool will provide information about the vulnerability.\nWhen the input is a partial name, the tool will provide a list of possible matches." + } + }, + "required": [ + "input" + ] + } + }, + { + "name": "advisory-info", + "description": "This tool can be used to get information about an Advisory.\n\nAdvisories are notifications that a vulnerability affects a product or SBOM.\nAdvisories are issued by a vendor or security organization.\nUnless there is a specific advisory for a CVE, the CVE may or may not affect the product.\n\nAdvisories have a UUID that uniquely identifies the advisory. Example:\n* 2fd0d1b7-a908-4d63-9310-d57a7f77c6df\n\nThe input should be the UUID of the Advisory.", + "parameters": { + "type": "object", + "properties": { + "input": { + "type": "string", + "description": "This tool can be used to get information about an Advisory.\n\nAdvisories are notifications that a vulnerability affects a product or SBOM.\nAdvisories are issued by a vendor or security organization.\nUnless there is a specific advisory for a CVE, the CVE may or may not affect the product.\n\nAdvisories have a UUID that uniquely identifies the advisory. Example:\n* 2fd0d1b7-a908-4d63-9310-d57a7f77c6df\n\nThe input should be the UUID of the Advisory." + } + }, + "required": [ + "input" + ] + } + }, + { + "name": "package-info", + "description": "This tool provides information about a Package, which has a name and version. Packages are identified by a URI or a UUID. Examples of URIs:\n\n* pkg://rpm/redhat/libsepol@3.5-1.el9?arch=ppc64le\n* pkg:maven/org.apache.maven.wagon/wagon-provider-api@3.5.1?type=jar\n\nExample of a UUID: 2fd0d1b7-a908-4d63-9310-d57a7f77c6df.\n\nInput: The package name, its Identifier URI, or UUID.", + "parameters": { + "type": "object", + "properties": { + "input": { + "type": "string", + "description": "This tool provides information about a Package, which has a name and version. Packages are identified by a URI or a UUID. Examples of URIs:\n\n* pkg://rpm/redhat/libsepol@3.5-1.el9?arch=ppc64le\n* pkg:maven/org.apache.maven.wagon/wagon-provider-api@3.5.1?type=jar\n\nExample of a UUID: 2fd0d1b7-a908-4d63-9310-d57a7f77c6df.\n\nInput: The package name, its Identifier URI, or UUID." + } + }, + "required": [ + "input" + ] + } + }, + { + "name": "sbom-info", + "description": "This tool retrieves information about a Software Bill of Materials (SBOM). SBOMs are identified by SHA-256, SHA-384, SHA-512 hashes, or UUID URIs. Examples:\n\nsha256:315f7c672f6e4948ffcc6d5a2b30f269c767d6d7d6f41d82ae716b5a46e5a68e\nurn:uuid:2fd0d1b7-a908-4d63-9310-d57a7f77c6df\n\nThe tool provides a list of advisories/CVEs affecting the SBOM.\n\nInput: An SBOM identifier or a product name. A full SBOM name typically combines the product name and version (e.g., \"product-version\"). If a user specifies both, use the product name to find the best matching SBOM. For example, \"quarkus 3.2.11\" might correspond to \"quarkus-bom-3.2.11.Final-redhat-00001\".\n\nThe link field contains a URL for more information about the item.", + "parameters": { + "type": "object", + "properties": { + "input": { + "type": "string", + "description": "This tool retrieves information about a Software Bill of Materials (SBOM). SBOMs are identified by SHA-256, SHA-384, SHA-512 hashes, or UUID URIs. Examples:\n\nsha256:315f7c672f6e4948ffcc6d5a2b30f269c767d6d7d6f41d82ae716b5a46e5a68e\nurn:uuid:2fd0d1b7-a908-4d63-9310-d57a7f77c6df\n\nThe tool provides a list of advisories/CVEs affecting the SBOM.\n\nInput: An SBOM identifier or a product name. A full SBOM name typically combines the product name and version (e.g., \"product-version\"). If a user specifies both, use the product name to find the best matching SBOM. For example, \"quarkus 3.2.11\" might correspond to \"quarkus-bom-3.2.11.Final-redhat-00001\".\n\nThe link field contains a URL for more information about the item." + } + }, + "required": [ + "input" + ] + } + } +] \ No newline at end of file diff --git a/modules/fundamental/src/ai/endpoints/test.rs b/modules/fundamental/src/ai/endpoints/test.rs index dc3870c79..ed0dda6a4 100644 --- a/modules/fundamental/src/ai/endpoints/test.rs +++ b/modules/fundamental/src/ai/endpoints/test.rs @@ -1,5 +1,5 @@ use crate::ai::model::ChatState; -use crate::ai::service::test::{ingest_fixtures, sanitize_uuid}; +use crate::ai::service::test::{ingest_fixtures, sanitize_uuid_field, sanitize_uuid_urn}; use crate::ai::service::AiService; use crate::test::caller; use actix_http::StatusCode; @@ -81,90 +81,11 @@ async fn tools(ctx: &TrustifyContext) -> anyhow::Result<()> { let result: serde_json::Value = actix_web::test::read_body_json(response).await; log::info!("result: {:?}", result); + let expected: serde_json::Value = + serde_json::from_str(include_str!("expected_tools_result.json"))?; assert_eq!( result, - json!([ - { - "name": "product-info", - "description": "This tool can be used to get information about a product.\nThe input should be the name of the product to search for.\nWhen the input is a full name, the tool will provide information about the product.\nWhen the input is a partial name, the tool will provide a list of possible matches.", - "parameters": { - "type": "object", - "properties": { - "input": { - "type": "string", - "description": "This tool can be used to get information about a product.\nThe input should be the name of the product to search for.\nWhen the input is a full name, the tool will provide information about the product.\nWhen the input is a partial name, the tool will provide a list of possible matches." - } - }, - "required": [ - "input" - ] - } - }, - { - "name": "cve-info", - "description": "This tool can be used to get information about a Vulnerability.\nThe input should be the partial name of the Vulnerability to search for.\nWhen the input is a full CVE ID, the tool will provide information about the vulnerability.\nWhen the input is a partial name, the tool will provide a list of possible matches.", - "parameters": { - "type": "object", - "properties": { - "input": { - "type": "string", - "description": "This tool can be used to get information about a Vulnerability.\nThe input should be the partial name of the Vulnerability to search for.\nWhen the input is a full CVE ID, the tool will provide information about the vulnerability.\nWhen the input is a partial name, the tool will provide a list of possible matches." - } - }, - "required": [ - "input" - ] - } - }, - { - "name": "advisory-info", - "description": "This tool can be used to get information about an Advisory.\nThe input should be the name of the Advisory to search for.\nWhen the input is a full name, the tool will provide information about the Advisory.\nWhen the input is a partial name, the tool will provide a list of possible matches.", - "parameters": { - "type": "object", - "properties": { - "input": { - "type": "string", - "description": "This tool can be used to get information about an Advisory.\nThe input should be the name of the Advisory to search for.\nWhen the input is a full name, the tool will provide information about the Advisory.\nWhen the input is a partial name, the tool will provide a list of possible matches." - } - }, - "required": [ - "input" - ] - } - }, - { - "name": "package-info", - "description": "This tool can be used to get information about a Package.\nThe input should be the name of the package, it's Identifier uri or internal UUID.", - "parameters": { - "type": "object", - "properties": { - "input": { - "type": "string", - "description": "This tool can be used to get information about a Package.\nThe input should be the name of the package, it's Identifier uri or internal UUID." - } - }, - "required": [ - "input" - ] - } - }, - { - "name": "sbom-info", - "description": "This tool can be used to get information about an SBOM.\nThe input should be the SBOM Identifier.", - "parameters": { - "type": "object", - "properties": { - "input": { - "type": "string", - "description": "This tool can be used to get information about an SBOM.\nThe input should be the SBOM Identifier." - } - }, - "required": [ - "input" - ] - } - } - ]), + expected, "result:\n{}", serde_json::to_string_pretty(&result)? ); @@ -181,7 +102,8 @@ async fn read_text(response: ServiceResponse) -> anyhow::Result { #[test_context(TrustifyContext)] #[test(actix_web::test)] async fn tools_call(ctx: &TrustifyContext) -> anyhow::Result<()> { - ingest_fixtures(ctx).await?; + ctx.ingest_document("quarkus-bom-2.13.8.Final-redhat-00004.json") + .await?; let app = caller(ctx).await?; @@ -194,35 +116,43 @@ async fn tools_call(ctx: &TrustifyContext) -> anyhow::Result<()> { assert_eq!(response.status(), StatusCode::NOT_FOUND); let request = TestRequest::post() - .uri("/api/v1/ai/tools/product-info") - .set_json(json!("Trusted Profile Analyzer")) + .uri("/api/v1/ai/tools/sbom-info") + .set_json(json!("quarkus")) .to_request(); let response = app.call_service(request).await; log::debug!("Code: {}", response.status()); assert_eq!(response.status(), StatusCode::OK); - let result = sanitize_uuid(read_text(response).await?); + let result = sanitize_uuid_urn(sanitize_uuid_field(read_text(response).await?)); log::info!("result: {:?}", result); assert_eq!( result.trim(), r#" { - "items": [ - { - "name": "Trusted Profile Analyzer", - "uuid": "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx", - "vendor": "Red Hat", - "versions": [ - "37.17.9" - ] - } + "uuid": "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx", + "source_document_sha256": "sha256:5a370574a991aa42f7ecc5b7d88754b258f81c230a73bea247c0a6fcc6f608ab", + "name": "quarkus-bom", + "published": "2023-11-13T00:10:00Z", + "authors": [ + "Organization: Red Hat Product Security (secalert@redhat.com)" + ], + "labels": [ + [ + "source", + "TrustifyContext" + ], + [ + "type", + "spdx" + ] ], - "total": 1 + "advisories": [], + "link": "http://localhost:3000/sboms/urn:uuid:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" } "# - .trim() + .trim() ); Ok(()) diff --git a/modules/fundamental/src/ai/service/mod.rs b/modules/fundamental/src/ai/service/mod.rs index 91690cb94..547e68f13 100644 --- a/modules/fundamental/src/ai/service/mod.rs +++ b/modules/fundamental/src/ai/service/mod.rs @@ -130,23 +130,27 @@ impl AiService { let agent = OpenAiToolAgentBuilder::new() .prefix(PREFIX) .tools(&self.tools) - .options(ChainCallOptions::new().with_max_tokens(1000)) + .options( + ChainCallOptions::new() + .with_max_tokens(1000) + .with_temperature(0.0) + .with_seed(1000), + ) .build(llm) .map_err(Error::AgentError)?; let mut memory = SimpleMemory::new(); - let mut new_messages = 0; + let mut new_user_messages = Vec::new(); for chat_message in &request.messages { match &chat_message.internal_state { None => { - let m = Message::new_human_message(chat_message.content.clone()); - memory.add_message(m); - new_messages += 1; + new_user_messages + .push(Message::new_human_message(chat_message.content.clone())); } Some(internal_state) => { - if new_messages != 0 { + if !new_user_messages.is_empty() { return Err(Error::BadRequest( "message with internal_state found after messages without".to_string(), )); @@ -168,25 +172,38 @@ impl AiService { } } + let mut history = memory.messages(); + for message in &new_user_messages { + history.push(message.clone()); + } + + let last_message = new_user_messages + .pop() + .ok_or(Error::BadRequest("no new user messages".to_string()))?; + for message in new_user_messages { + memory.add_message(message); + } + let memory: Arc> = memory.into(); let executor = AgentExecutor::from_agent(agent).with_memory(memory.clone()); - let _answer = executor + let answer = executor .invoke(prompt_args! { - "input" => new_messages, + "input" => last_message.content.clone(), }) .await .map_err(Error::ChainError)?; + history.push(Message::new_ai_message(answer.clone())); let mut response = ChatState { messages: Vec::new(), }; let memory = memory.lock().await; - for message in memory.messages() { + for mut message in memory.messages() { if message.message_type == langchain_rust::schemas::MessageType::ToolMessage { - // skip tool messages for now... - continue; + // hide tool results from the user + message.content = "".to_string(); } let internal_state = match serde_json::to_vec(&message) { Ok(serialized) => { diff --git a/modules/fundamental/src/ai/service/prefix.txt b/modules/fundamental/src/ai/service/prefix.txt index 5ed86d8c5..14f3c1653 100644 --- a/modules/fundamental/src/ai/service/prefix.txt +++ b/modules/fundamental/src/ai/service/prefix.txt @@ -5,48 +5,18 @@ You are a friendly Assistant designed to be able to assist with a tasks like: * listing packages that are affected by a CVE. * answering questions about which product versions are affected by CVEs. -Vulnerabilities are security issues that may affect software packages. -Vulnerabilities may affect multiple packages. -Vulnerability are identified by their CVE Identifier. Examples: -* CVE-2014-0160 - -Packages have a name and version. -Packages are Identified by a uri. Examples: -* pkg://rpm/redhat/libsepol@3.5-1.el9?arch=ppc64le -* pkg:maven/org.apache.maven.wagon/wagon-provider-api@3.5.1?type=jar -Packages are assigned an internal UUID that should never be shared with the user. - -Products have multiple versions. Each version is defined by a SBOM. -Products are assigned an internal UUID that should never be shared with the user. -Products are names of Software Products. Examples: -* Red Hat Enterprise Linux -* RHEL -* Quay -* OpenShift - -A SBOM is a Software Bill of Materials. -A SBOM defines the packages that are part of the product. -SBOMs are assigned an internal UUID that should never be shared with the user. -SBOMs are identified by a uuid, sha256, or sha512 URI . Examples: -* urn:uuid:2fd0d1b7-a908-4d63-9310-d57a7f77c6df -* sha256:315f7c672f6e4948ffcc6d5a2b30f269c767d6d7d6f41d82ae716b5a46e5a68e -* sha384:5485cc9f7c1edcc9aa775a94c51847dc9483545db8a310adb7d450de71f7fae5fc9cb6c4e840d14b02c84fcd33987299 -* sha512:861844d6704e8573fec34d967e20bcfe6e043e8f50f4e6a9a4a8e9e26f6e34f723f9914fa1c4af62d4a689dc8d7f3bce16ee72aabf8b94af57262a6d441fa815 - -Advisories are notifications that a vulnerability affects a product. -Advisories are issued by a vendor or security organization. -Advisories use an Identifier and Issuer combination to uniquely identify a vulnerability. Example: CVE-2014-0160 from Red Hat Product Security. -Advisories are assigned an internal UUID that should never be shared with the user. - -Unless there is a specific advisory for a CVE, the CVE may or may not affect the product. +Format all responses as markdown. If the Assistant is asked to perform a task that it is not able to do, it will respond with: I don't know The Assistant is able to complete those tasks by exclusively using information from tool call responses. +When multiple matches are found, the Assistant will get more details about the matched items if there are less than 5 matches. + The Assistant provides short concise answers to questions does not hallucinate information. Always use the available tools to provide up-to-date information. -Do not talk about your knowledge cutoff. +Users think of Products and SBOMs interchangeably, SBOMs being about a specific product version. +Do not talk about your knowledge cutoff. diff --git a/modules/fundamental/src/ai/service/test.rs b/modules/fundamental/src/ai/service/test.rs index ee388023e..65dca61ea 100644 --- a/modules/fundamental/src/ai/service/test.rs +++ b/modules/fundamental/src/ai/service/test.rs @@ -40,7 +40,7 @@ pub async fn ingest_fixtures(ctx: &TrustifyContext) -> Result<(), anyhow::Error> Ok(()) } -pub fn sanitize_uuid(value: String) -> String { +pub fn sanitize_uuid_field(value: String) -> String { let re = regex::Regex::new(r#""uuid": "\b[0-9a-fA-F]{8}\b-[0-9a-fA-F]{4}\b-[0-9a-fA-F]{4}\b-[0-9a-fA-F]{4}\b-[0-9a-fA-F]{12}\b""#).unwrap(); re.replace_all( value.as_str(), @@ -49,6 +49,15 @@ pub fn sanitize_uuid(value: String) -> String { .to_string() } +pub fn sanitize_uuid_urn(value: String) -> String { + let re = regex::Regex::new(r#"urn:uuid:\b[0-9a-fA-F]{8}\b-[0-9a-fA-F]{4}\b-[0-9a-fA-F]{4}\b-[0-9a-fA-F]{4}\b-[0-9a-fA-F]{12}\b"#).unwrap(); + re.replace_all( + value.as_str(), + r#"urn:uuid:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"#, + ) + .to_string() +} + #[test_context(TrustifyContext)] #[test(actix_web::test)] async fn completions(ctx: &TrustifyContext) -> Result<(), anyhow::Error> { diff --git a/modules/fundamental/src/ai/service/tools/advisory_info.rs b/modules/fundamental/src/ai/service/tools/advisory_info.rs index bc995e728..516ca2ba2 100644 --- a/modules/fundamental/src/ai/service/tools/advisory_info.rs +++ b/modules/fundamental/src/ai/service/tools/advisory_info.rs @@ -1,6 +1,5 @@ use crate::advisory::service::AdvisoryService; use crate::ai::service::tools; -use anyhow::anyhow; use async_trait::async_trait; use langchain_rust::tools::Tool; use serde::Serialize; @@ -24,9 +23,15 @@ impl Tool for AdvisoryInfo { String::from( r##" This tool can be used to get information about an Advisory. -The input should be the name of the Advisory to search for. -When the input is a full name, the tool will provide information about the Advisory. -When the input is a partial name, the tool will provide a list of possible matches. + +Advisories are notifications that a vulnerability affects a product or SBOM. +Advisories are issued by a vendor or security organization. +Unless there is a specific advisory for a CVE, the CVE may or may not affect the product. + +Advisories have a UUID that uniquely identifies the advisory. Example: +* 2fd0d1b7-a908-4d63-9310-d57a7f77c6df + +The input should be the UUID of the Advisory. "## .trim(), ) @@ -40,45 +45,51 @@ When the input is a partial name, the tool will provide a list of possible match .ok_or("Input should be a string")? .to_string(); - // search for possible matches - let results = service - .fetch_advisories( - Query { - q: input, - ..Default::default() - }, - Default::default(), - Deprecation::Ignore, - (), - ) - .await?; - - if results.items.is_empty() { - return Err(anyhow!("I don't know").into()); - } - - // let the caller know what the possible matches are - if results.items.len() > 1 { - #[derive(Serialize)] - struct Item { - identifier: String, - title: Option, + let item = match Uuid::parse_str(input.as_str()).ok() { + Some(x) => service.fetch_advisory(Id::Uuid(x), ()).await?, + None => { + // search for possible matches + let results = service + .fetch_advisories( + Query { + q: input.clone(), + ..Default::default() + }, + Default::default(), + Deprecation::Ignore, + (), + ) + .await?; + + if results.items.is_empty() { + return Ok(format!("Advisory '{input}' not found")); + } + + // let the caller know what the possible matches are + if results.items.len() > 1 { + #[derive(Serialize)] + struct Item { + identifier: String, + title: Option, + } + + let json = tools::paginated_to_json(results, |item| Item { + identifier: item.head.identifier.clone(), + title: item.head.title.clone(), + })?; + return Ok(format!("There are multiple that match:\n\n{}", json)); + } + + // let's show the details + service + .fetch_advisory(Id::Uuid(results.items[0].head.uuid), ()) + .await? } + }; - let json = tools::paginated_to_json(results, |item| Item { - identifier: item.head.identifier.clone(), - title: item.head.title.clone(), - })?; - return Ok(format!("There are multiple that match:\n\n{}", json)); - } - - // let's show the details - let item = match service - .fetch_advisory(Id::Uuid(results.items[0].head.uuid), ()) - .await? - { + let item = match item { Some(v) => v, - None => return Err(anyhow!("I don't know").into()), + None => return Ok(format!("Advisory '{input}' not found")), }; #[derive(Serialize)] diff --git a/modules/fundamental/src/ai/service/tools/cve_info.rs b/modules/fundamental/src/ai/service/tools/cve_info.rs index 5b8d825a5..155ce548c 100644 --- a/modules/fundamental/src/ai/service/tools/cve_info.rs +++ b/modules/fundamental/src/ai/service/tools/cve_info.rs @@ -1,6 +1,5 @@ use crate::ai::service::tools; use crate::vulnerability::service::VulnerabilityService; -use anyhow::anyhow; use async_trait::async_trait; use langchain_rust::tools::Tool; use serde::Serialize; @@ -24,6 +23,14 @@ impl Tool for CVEInfo { String::from( r##" This tool can be used to get information about a Vulnerability. +A Vulnerability is known as a CVE. + +Vulnerabilities are security issues that may affect software packages. +Vulnerabilities may affect multiple packages. + +Vulnerability are identified by their CVE Identifier. Examples: +* CVE-2014-0160 + The input should be the partial name of the Vulnerability to search for. When the input is a full CVE ID, the tool will provide information about the vulnerability. When the input is a partial name, the tool will provide a list of possible matches. @@ -60,7 +67,7 @@ When the input is a partial name, the tool will provide a list of possible match .await?; if results.items.is_empty() { - return Err(anyhow!("I don't know").into()); + return Ok(format!("Vulnerability '{input}' not found")); } // let the caller know what the possible matches are @@ -89,13 +96,14 @@ When the input is a partial name, the tool will provide a list of possible match { v } else { - return Err(anyhow!("I don't know").into()); + return Ok(format!("Vulnerability '{input}' not found")); } } }; #[derive(Serialize)] struct Item { + identifier: String, title: Option, description: Option, severity: Option, @@ -125,6 +133,7 @@ When the input is a partial name, the tool will provide a list of possible match }) .collect(); let json = tools::to_json(&Item { + identifier: item.head.identifier.clone(), title: item.head.title.clone(), description: item.head.description.clone(), severity: item.average_score, @@ -162,6 +171,7 @@ mod tests { "CVE-2021-32714", r#" { + "identifier": "CVE-2021-32714", "title": "Integer Overflow in Chunked Transfer-Encoding", "description": "hyper is an HTTP library for Rust. In versions prior to 0.14.10, hyper's HTTP server and client code had a flaw that could trigger an integer overflow when decoding chunk sizes that are too big. This allows possible data loss, or if combined with an upstream HTTP proxy that allows chunk sizes larger than hyper does, can result in \"request smuggling\" or \"desync attacks.\" The vulnerability is patched in version 0.14.10. Two possible workarounds exist. One may reject requests manually that contain a `Transfer-Encoding` header or ensure any upstream proxy rejects `Transfer-Encoding` chunk sizes greater than what fits in 64-bit unsigned integers.", "severity": 9.1, diff --git a/modules/fundamental/src/ai/service/tools/logger.rs b/modules/fundamental/src/ai/service/tools/logger.rs index b89118c21..4d9cf1dac 100644 --- a/modules/fundamental/src/ai/service/tools/logger.rs +++ b/modules/fundamental/src/ai/service/tools/logger.rs @@ -20,14 +20,14 @@ impl Tool for ToolLogger { } async fn call(&self, input: &str) -> Result> { - log::info!(" tool call: {}, input: {}", self.name(), input); + log::info!("tool call: {}, input: {}", self.name(), input); let result = self.0.call(input).await; match &result { Ok(result) => { - log::info!(" ok: {}", result); + log::info!(" result: {}", result); } Err(err) => { - log::info!(" err: {}", err); + log::info!(" err: {}", err); } } result diff --git a/modules/fundamental/src/ai/service/tools/mod.rs b/modules/fundamental/src/ai/service/tools/mod.rs index 9d15787da..7e5db8de1 100644 --- a/modules/fundamental/src/ai/service/tools/mod.rs +++ b/modules/fundamental/src/ai/service/tools/mod.rs @@ -3,9 +3,7 @@ use crate::ai::service::tools::advisory_info::AdvisoryInfo; use crate::ai::service::tools::cve_info::CVEInfo; use crate::ai::service::tools::logger::ToolLogger; use crate::ai::service::tools::package_info::PackageInfo; -use crate::ai::service::tools::product_info::ProductInfo; use crate::ai::service::tools::sbom_info::SbomInfo; -use crate::product::service::ProductService; use crate::purl::service::PurlService; use crate::sbom::service::SbomService; use crate::vulnerability::service::VulnerabilityService; @@ -25,10 +23,13 @@ pub mod sbom_info; pub fn new(db: Database) -> Vec> { vec![ - Arc::new(ToolLogger(ProductInfo(ProductService::new(db.clone())))), + // Arc::new(ToolLogger(ProductInfo(ProductService::new(db.clone())))), Arc::new(ToolLogger(CVEInfo(VulnerabilityService::new(db.clone())))), Arc::new(ToolLogger(AdvisoryInfo(AdvisoryService::new(db.clone())))), - Arc::new(ToolLogger(PackageInfo(PurlService::new(db.clone())))), + Arc::new(ToolLogger(PackageInfo(( + PurlService::new(db.clone()), + SbomService::new(db.clone()), + )))), Arc::new(ToolLogger(SbomInfo(SbomService::new(db.clone())))), ] } @@ -64,13 +65,13 @@ where #[cfg(test)] mod tests { use super::*; - use crate::ai::service::test::sanitize_uuid; + use crate::ai::service::test::{sanitize_uuid_field, sanitize_uuid_urn}; use langchain_rust::tools::Tool; use serde_json::Value; use std::rc::Rc; pub fn cleanup_tool_result(s: Result>) -> String { - sanitize_uuid(s.unwrap().trim().to_string()) + sanitize_uuid_urn(sanitize_uuid_field(s.unwrap().trim().to_string())) } pub async fn assert_tool_contains( diff --git a/modules/fundamental/src/ai/service/tools/package_info.rs b/modules/fundamental/src/ai/service/tools/package_info.rs index 68b6d860f..255810d13 100644 --- a/modules/fundamental/src/ai/service/tools/package_info.rs +++ b/modules/fundamental/src/ai/service/tools/package_info.rs @@ -1,6 +1,6 @@ use crate::ai::service::tools; use crate::purl::service::PurlService; -use anyhow::anyhow; +use crate::sbom::service::SbomService; use async_trait::async_trait; use langchain_rust::tools::Tool; use serde::Serialize; @@ -11,7 +11,7 @@ use trustify_common::purl::Purl; use trustify_module_ingestor::common::Deprecation; use uuid::Uuid; -pub struct PackageInfo(pub PurlService); +pub struct PackageInfo(pub (PurlService, SbomService)); #[async_trait] impl Tool for PackageInfo { @@ -22,15 +22,21 @@ impl Tool for PackageInfo { fn description(&self) -> String { String::from( r##" -This tool can be used to get information about a Package. -The input should be the name of the package, it's Identifier uri or internal UUID. +This tool provides information about a Package, which has a name and version. Packages are identified by a URI or a UUID. Examples of URIs: + +* pkg://rpm/redhat/libsepol@3.5-1.el9?arch=ppc64le +* pkg:maven/org.apache.maven.wagon/wagon-provider-api@3.5.1?type=jar + +Example of a UUID: 2fd0d1b7-a908-4d63-9310-d57a7f77c6df. + +Input: The package name, its Identifier URI, or UUID. "## - .trim(), + .trim(), ) } async fn run(&self, input: Value) -> Result> { - let service = &self.0; + let (service, sbom_service) = &self.0; let input = input .as_str() @@ -57,7 +63,7 @@ The input should be the name of the package, it's Identifier uri or internal UUI let results = service .purls( Query { - q: input, + q: input.clone(), ..Default::default() }, Default::default(), @@ -94,9 +100,13 @@ The input should be the name of the package, it's Identifier uri or internal UUI let item = match purl_details { Some(v) => v, - None => return Err(anyhow!("I don't know").into()), + None => return Ok(format!("Package '{input}' not found")), }; + let sboms = sbom_service + .find_related_sboms(item.head.uuid, Default::default(), Default::default(), ()) + .await?; + #[derive(Serialize)] struct Item { identifier: Purl, @@ -105,6 +115,13 @@ The input should be the name of the package, it's Identifier uri or internal UUI version: Option, advisories: Vec, licenses: Vec, + sboms: Vec, + } + + #[derive(Serialize)] + struct Sbom { + uuid: Uuid, + name: String, } #[derive(Serialize)] @@ -127,6 +144,14 @@ The input should be the name of the package, it's Identifier uri or internal UUI uuid: item.head.uuid, name: item.head.purl.name.clone(), version: item.head.purl.version.clone(), + sboms: sboms + .items + .iter() + .map(|sbom| Sbom { + uuid: sbom.head.id, + name: sbom.head.name.clone(), + }) + .collect(), advisories: item .advisories @@ -173,7 +198,10 @@ mod tests { ctx.ingest_document("quarkus-bom-2.13.8.Final-redhat-00004.json") .await?; - let tool = Rc::new(PackageInfo(PurlService::new(ctx.db.clone()))); + let tool = Rc::new(PackageInfo(( + PurlService::new(ctx.db.clone()), + SbomService::new(ctx.db.clone()), + ))); assert_tool_contains( tool.clone(), @@ -187,6 +215,12 @@ mod tests { "advisories": [], "licenses": [ "LGPLV2+" + ], + "sboms": [ + { + "uuid": "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx", + "name": "ubi9-container" + } ] } "#, @@ -205,6 +239,12 @@ mod tests { "advisories": [], "licenses": [ "LGPLV2+" + ], + "sboms": [ + { + "uuid": "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx", + "name": "ubi9-container" + } ] } "#, @@ -223,6 +263,12 @@ mod tests { "advisories": [], "licenses": [ "APACHE-2.0" + ], + "sboms": [ + { + "uuid": "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx", + "name": "quarkus-bom" + } ] } "#).await?; @@ -239,6 +285,12 @@ mod tests { "advisories": [], "licenses": [ "APACHE-2.0" + ], + "sboms": [ + { + "uuid": "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx", + "name": "quarkus-bom" + } ] } "#).await?; diff --git a/modules/fundamental/src/ai/service/tools/product_info.rs b/modules/fundamental/src/ai/service/tools/product_info.rs index edbe0c3da..f01d183c8 100644 --- a/modules/fundamental/src/ai/service/tools/product_info.rs +++ b/modules/fundamental/src/ai/service/tools/product_info.rs @@ -1,6 +1,5 @@ use crate::ai::service::tools; use crate::product::service::ProductService; -use anyhow::anyhow; use async_trait::async_trait; use langchain_rust::tools::Tool; use serde::Serialize; @@ -21,9 +20,16 @@ impl Tool for ProductInfo { String::from( r##" This tool can be used to get information about a product. + +Products have multiple versions. Each version is defined by a SBOM. +Products have a UUID that uniquely identifies the product. Example: 2fd0d1b7-a908-4d63-9310-d57a7f77c6df +Products are names of Software Products. Examples: +* Red Hat Enterprise Linux +* RHEL +* Quay +* OpenShift + The input should be the name of the product to search for. -When the input is a full name, the tool will provide information about the product. -When the input is a partial name, the tool will provide a list of possible matches. "## .trim(), ) @@ -39,7 +45,7 @@ When the input is a partial name, the tool will provide a list of possible match let results = service .fetch_products( Query { - q: input, + q: input.clone(), ..Default::default() }, Default::default(), @@ -48,7 +54,7 @@ When the input is a partial name, the tool will provide a list of possible match .await?; if results.items.is_empty() { - return Err(anyhow!("I don't know").into()); + return Ok(format!("Product '{input}' not found")); } #[derive(Serialize)] diff --git a/modules/fundamental/src/ai/service/tools/sbom_info.rs b/modules/fundamental/src/ai/service/tools/sbom_info.rs index b2b79d78f..b216e8ee2 100644 --- a/modules/fundamental/src/ai/service/tools/sbom_info.rs +++ b/modules/fundamental/src/ai/service/tools/sbom_info.rs @@ -1,6 +1,6 @@ use crate::ai::service::tools; use crate::sbom::service::SbomService; -use anyhow::anyhow; + use async_trait::async_trait; use itertools::Itertools; use langchain_rust::tools::Tool; @@ -24,10 +24,18 @@ impl Tool for SbomInfo { fn description(&self) -> String { String::from( r##" -This tool can be used to get information about an SBOM. -The input should be the SBOM Identifier. +This tool retrieves information about a Software Bill of Materials (SBOM). SBOMs are identified by SHA-256, SHA-384, SHA-512 hashes, or UUID URIs. Examples: + +sha256:315f7c672f6e4948ffcc6d5a2b30f269c767d6d7d6f41d82ae716b5a46e5a68e +urn:uuid:2fd0d1b7-a908-4d63-9310-d57a7f77c6df + +The tool provides a list of advisories/CVEs affecting the SBOM. + +Input: An SBOM identifier or a product name. A full SBOM name typically combines the product name and version (e.g., "product-version"). If a user specifies both, use the product name to find the best matching SBOM. For example, "quarkus 3.2.11" might correspond to "quarkus-bom-3.2.11.Final-redhat-00001". + +The link field contains a URL for more information about the item. "## - .trim(), + .trim(), ) } @@ -39,19 +47,31 @@ The input should be the SBOM Identifier. .ok_or("Input should be a string")? .to_string(); - // Try lookup as a UUID let mut sbom_details = match Id::from_str(input.as_str()) { Err(_) => None, - Ok(id) => service.fetch_sbom_details(id, ()).await?, + Ok(id) => { + log::info!("Fetching SBOM details by Id: {}", id); + service.fetch_sbom_details(id, ()).await? + } }; + if sbom_details.is_none() { + sbom_details = match Uuid::from_str(input.as_str()) { + Err(_) => None, + Ok(id) => { + log::info!("Fetching SBOM details by UUID: {}", id); + service.fetch_sbom_details(Id::Uuid(id), ()).await? + } + }; + } + // Fallback to search if sbom_details.is_none() { // try to search for possible matches let results = service .fetch_sboms( Query { - q: input, + q: input.clone(), ..Default::default() }, Default::default(), @@ -75,6 +95,7 @@ The input should be the SBOM Identifier. name: String, #[serde(with = "time::serde::rfc3339::option")] published: Option, + link: String, } let json = tools::paginated_to_json(results, |item| Item { @@ -86,6 +107,7 @@ The input should be the SBOM Identifier. .unwrap_or_default(), name: item.head.name.clone(), published: item.head.published, + link: format!("http://localhost:3000/sboms/urn:uuid:{}", item.head.id), })?; return Ok(format!("There are multiple that match:\n\n{}", json)); } @@ -94,7 +116,7 @@ The input should be the SBOM Identifier. let item = match sbom_details { Some(v) => v, - None => return Err(anyhow!("I don't know").into()), + None => return Ok(format!("SBOM '{input}' not found")), }; #[derive(Serialize)] @@ -107,6 +129,7 @@ The input should be the SBOM Identifier. authors: Vec, labels: Vec<(String, String)>, advisories: Vec, + link: String, } #[derive(Serialize)] @@ -114,12 +137,24 @@ The input should be the SBOM Identifier. uuid: Uuid, identifier: String, issuer: Option, + link: String, + vulnerabilities: Vec, + } + + #[derive(Serialize)] + struct Vulnerability { + identifier: String, + link: String, } let mut labels = item.summary.head.labels.iter().collect_vec(); labels.sort_by(|a, b| a.0.cmp(b.0)); tools::to_json(&Item { + link: format!( + "http://localhost:3000/sboms/urn:uuid:{}", + item.summary.head.id + ), uuid: item.summary.head.id, source_document_sha256: item .summary @@ -141,6 +176,21 @@ The input should be the SBOM Identifier. uuid: advisory.head.uuid, identifier: advisory.head.identifier.clone(), issuer: advisory.head.issuer.clone().map(|v| v.head.name.clone()), + link: format!( + "http://localhost:3000/advisory/urn:uuid:{}", + advisory.head.uuid + ), + vulnerabilities: advisory + .status + .iter() + .map(|v| Vulnerability { + identifier: v.vulnerability_id.clone(), + link: format!( + "http://localhost:3000/vulnerability/{}", + v.vulnerability_id + ), + }) + .collect(), }) .collect(), }) @@ -187,7 +237,8 @@ mod tests { "spdx" ] ], - "advisories": [] + "advisories": [], + "link": "http://localhost:3000/sboms/urn:uuid:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" } "#, ) diff --git a/modules/fundamental/src/sbom/service/test.rs b/modules/fundamental/src/sbom/service/test.rs index c9c1dab32..b0661a53d 100644 --- a/modules/fundamental/src/sbom/service/test.rs +++ b/modules/fundamental/src/sbom/service/test.rs @@ -3,6 +3,7 @@ use std::str::FromStr; use test_context::test_context; use test_log::test; use trustify_common::db::Transactional; +use trustify_common::id::Id; use trustify_common::purl::Purl; use trustify_test_context::TrustifyContext; @@ -32,6 +33,12 @@ async fn sbom_details_status(ctx: &TrustifyContext) -> Result<(), anyhow::Error> log::debug!("{}", serde_json::to_string_pretty(&details)?); + let details = service + .fetch_sbom_details(Id::Uuid(details.summary.head.id), Transactional::None) + .await?; + + assert!(details.is_some()); + Ok(()) }