diff --git a/Cargo.lock b/Cargo.lock index 4f1ea22f..e5448649 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -399,6 +399,7 @@ dependencies = [ "cfg-if", "getrandom", "once_cell", + "serde", "version_check", "zerocopy", ] @@ -1182,6 +1183,12 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "bytecount" +version = "0.6.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ce89b21cab1437276d2650d57e971f9d548a2d9037cc231abdc0562b97498ce" + [[package]] name = "byteorder" version = "1.5.0" @@ -1863,6 +1870,31 @@ dependencies = [ "serde", ] +[[package]] +name = "cyclonedx-bom" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a114dd99ed051f1481d8d35acd455f77469f026b783fe08074763fdf1701506" +dependencies = [ + "base64 0.21.7", + "cyclonedx-bom-macros", + "fluent-uri 0.1.4", + "indexmap 2.6.0", + "jsonschema", + "once_cell", + "ordered-float 4.3.0", + "packageurl", + "regex", + "serde", + "serde_json", + "spdx", + "strum 0.26.3", + "thiserror", + "time", + "uuid", + "xml-rs", +] + [[package]] name = "cyclonedx-bom" version = "0.7.0" @@ -2354,6 +2386,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4f2c92ceda6ceec50f43169f9ee8424fe2db276791afde7b2cd8bc084cb376ab" dependencies = [ "log", + "regex", ] [[package]] @@ -2365,6 +2398,7 @@ dependencies = [ "anstream", "anstyle", "env_filter", + "humantime", "log", ] @@ -2433,6 +2467,16 @@ dependencies = [ "pin-project-lite", ] +[[package]] +name = "fancy-regex" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b95f7c0680e4142284cf8b22c14a476e87d61b004a3a0861872b32ef7ead40a2" +dependencies = [ + "bit-set", + "regex", +] + [[package]] name = "fancy-regex" version = "0.12.0" @@ -2582,6 +2626,16 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "fraction" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3027ae1df8d41b4bed2241c8fdad4acc1e7af60c8e17743534b545e77182d678" +dependencies = [ + "lazy_static", + "num", +] + [[package]] name = "funty" version = "2.0.0" @@ -3673,6 +3727,15 @@ version = "1.70.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" +[[package]] +name = "iso8601" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "924e5d73ea28f59011fec52a0d12185d496a9b075d360657aed2a5707f701153" +dependencies = [ + "nom", +] + [[package]] name = "itertools" version = "0.10.5" @@ -3765,6 +3828,34 @@ dependencies = [ "thiserror", ] +[[package]] +name = "jsonschema" +version = "0.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a071f4f7efc9a9118dfb627a0a94ef247986e1ab8606a4c806ae2b3aa3b6978" +dependencies = [ + "ahash 0.8.11", + "anyhow", + "base64 0.21.7", + "bytecount", + "fancy-regex 0.11.0", + "fraction", + "getrandom", + "iso8601", + "itoa", + "memchr", + "num-cmp", + "once_cell", + "parking_lot 0.12.3", + "percent-encoding", + "regex", + "serde", + "serde_json", + "time", + "url", + "uuid", +] + [[package]] name = "lalrpop" version = "0.20.2" @@ -4397,6 +4488,20 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "num" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23" +dependencies = [ + "num-bigint", + "num-complex", + "num-integer", + "num-iter", + "num-rational", + "num-traits", +] + [[package]] name = "num-bigint" version = "0.4.6" @@ -4424,6 +4529,21 @@ dependencies = [ "zeroize", ] +[[package]] +name = "num-cmp" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63335b2e2c34fae2fb0aa2cecfd9f0832a1e24b3b32ecec612c3426d46dc8aaa" + +[[package]] +name = "num-complex" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495" +dependencies = [ + "num-traits", +] + [[package]] name = "num-conv" version = "0.1.0" @@ -4460,6 +4580,17 @@ dependencies = [ "num-traits", ] +[[package]] +name = "num-rational" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824" +dependencies = [ + "num-bigint", + "num-integer", + "num-traits", +] + [[package]] name = "num-traits" version = "0.2.19" @@ -6418,7 +6549,7 @@ dependencies = [ "bytes", "chrono", "csv", - "cyclonedx-bom", + "cyclonedx-bom 0.7.0", "digest", "filetime", "fluent-uri 0.2.0", @@ -6440,6 +6571,27 @@ dependencies = [ "walker-common", ] +[[package]] +name = "sbomsleuth" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9bdf732fb14c2e4914aaa154b909968c3beabb4dccd005e09dfab8e06a9617a" +dependencies = [ + "anyhow", + "colored", + "cyclonedx-bom 0.6.2", + "env_logger", + "log", + "reqwest 0.12.8", + "sbom-walker", + "serde", + "serde_json", + "spdx-rs", + "tokio", + "walker-common", + "walker-extras", +] + [[package]] name = "schannel" version = "0.1.24" @@ -7955,7 +8107,7 @@ dependencies = [ "anyhow", "base64 0.21.7", "bstr", - "fancy-regex", + "fancy-regex 0.12.0", "lazy_static", "parking_lot 0.12.3", "rustc-hash 1.1.0", @@ -8490,7 +8642,7 @@ dependencies = [ "chrono", "criterion", "csaf", - "cyclonedx-bom", + "cyclonedx-bom 0.7.0", "hex", "humantime", "jsonpath-rust", @@ -8535,7 +8687,7 @@ dependencies = [ "criterion", "csaf", "cve", - "cyclonedx-bom", + "cyclonedx-bom 0.7.0", "futures-util", "hex", "humantime", @@ -8660,7 +8812,7 @@ dependencies = [ "cpe", "csaf", "cve", - "cyclonedx-bom", + "cyclonedx-bom 0.7.0", "hex", "humantime", "jsn", @@ -8675,6 +8827,7 @@ dependencies = [ "roxmltree", "rstest", "sbom-walker", + "sbomsleuth", "sea-orm", "sea-query", "semver", @@ -9221,6 +9374,7 @@ dependencies = [ "bytes", "bzip2", "chrono", + "clap", "csv", "digest", "filetime", @@ -9250,6 +9404,26 @@ dependencies = [ "xattr", ] +[[package]] +name = "walker-extras" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "451ae6af73f431c5c002d8961ad9b28eaf8411f983b32cf3eceb67022cf8b15f" +dependencies = [ + "anyhow", + "async-trait", + "bytes", + "clap", + "csaf-walker", + "humantime", + "log", + "reqwest 0.12.8", + "sbom-walker", + "thiserror", + "tokio", + "walker-common", +] + [[package]] name = "want" version = "0.3.1" diff --git a/Cargo.toml b/Cargo.toml index 14cc5fe2..de84c412 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -27,7 +27,6 @@ publish = false license = "Apache-2.0" [workspace.dependencies] -actix = "0.13.3" actix-cors = "0.7" actix-http = "3.3.1" actix-tls = "3" @@ -40,13 +39,11 @@ actix-web-static-files = "4.0.1" anyhow = "1.0.72" async-graphql = "7.0.5" async-graphql-actix-web = "7.0.5" -async-std = "1" async-trait = "0.1.74" base64 = "0.22" biscuit = "0.7" build-info = "0.0.38" build-info-build = "0.0.38" -build-info-common = "0.0.38" bytes = "1.5" bytesize = "1.3" criterion = "0.5.1" @@ -58,19 +55,15 @@ csaf = { version = "0.5.0", default-features = false } csaf-walker = { version = "0.9.0", default-features = false } cve = "0.3.1" cyclonedx-bom = "0.7.0" -env_logger = "0.11.0" futures = "0.3.30" futures-util = "0.3" garage-door = "0.1.1" git2 = { version = "0.19.0", features = ["ssh"] } hex = "0.4.3" -hide = "0.1.5" http = "1" human-date-parser = "0.2" humantime = "2" humantime-serde = "1" -indicatif = "0.17.8" -indicatif-log-bridge = "0.2" itertools = "0.13" jsn = "0.14" json-merge-patch = "0.0.1" @@ -83,7 +76,6 @@ log = "0.4.19" mime = "0.3.17" native-tls = "0.2" nu-ansi-term = "0.46" -once_cell = "1.19.0" openid = "0.15" openssl = "0.10" opentelemetry = "0.24" @@ -107,6 +99,7 @@ ring = "0.17.8" roxmltree = "0.20.0" rstest = "0.22" rust-s3 = "0.35" +sbomsleuth = { version = "0.1.9"} sbom-walker = { version = "0.9.0", default-features = false, features = ["crypto-openssl", "cyclonedx-bom", "spdx-rs"] } schemars = "0.8" sea-orm = "~1.0" # See https://www.sea-ql.org/blog/2024-08-04-sea-orm-1.0/#release-planning @@ -121,7 +114,6 @@ spdx = "0.10.6" spdx-expression = "0.5.2" spdx-rs = "0.5.3" sqlx = "0.7" -static-files = "0.2.3" strum = "0.26.3" temp-env = "0.3" tempfile = "3" @@ -130,10 +122,8 @@ test-log = "0.2.16" thiserror = "1.0.58" time = "0.3" tokio = "1.30.0" -tokio-stream = "0.1.15" tokio-util = "0.7" tracing = "0.1" -tracing-bunyan-formatter = "0.3.7" # Note: This uses OTEL 0.24 https://crates.io/crates/tracing-opentelemetry/0.25.0/dependencies tracing-opentelemetry = "0.25" tracing-subscriber = { version = "0.3.18", default-features = false } @@ -146,7 +136,6 @@ utoipa-swagger-ui = "7.1.0" uuid = "1.7.0" walkdir = "2.5" walker-common = "0.9.3" -walker-extras = "0.9.0" zip = "2.2.0" trustify-auth = { path = "common/auth", features = ["actix", "swagger"] } @@ -165,12 +154,6 @@ trustify-module-storage = { path = "modules/storage" } trustify-module-graphql = { path = "modules/graphql" } trustify-test-context = { path = "test-context" } trustify-module-analysis = { path = "modules/analysis" } - -# These dependencies are active during both the build time and the run time. So they are normal dependencies -# as well as build-dependencies. However, we can't control feature flags for build dependencies the way we do -# it for normal dependencies. So enabling the vendor feature for openssl-sys doesn't work for the build-dependencies. -# This will fail the build on targets where we need vendoring for openssl. Using rustls instead works around this issue. -postgresql_archive = { version = "0.16.3", default-features = false, features = ["theseus", "rustls-tls"] } postgresql_embedded = { version = "0.16.3", default-features = false, features = ["theseus", "rustls-tls"] } postgresql_commands = { version = "0.16.3", default-features = false, features = ["tokio"] } diff --git a/entity/src/source_document.rs b/entity/src/source_document.rs index 447e9b1b..3103099d 100644 --- a/entity/src/source_document.rs +++ b/entity/src/source_document.rs @@ -1,4 +1,5 @@ use sea_orm::entity::prelude::*; +use sea_orm::JsonValue; #[derive(Clone, Debug, PartialEq, Eq, DeriveEntityModel)] #[sea_orm(table_name = "source_document")] @@ -8,6 +9,8 @@ pub struct Model { pub sha256: String, pub sha384: String, pub sha512: String, + #[sea_orm(column_type = "JsonBinary")] + pub meta: JsonValue, } #[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] diff --git a/migration/src/lib.rs b/migration/src/lib.rs index 1d22f26e..3002cf05 100644 --- a/migration/src/lib.rs +++ b/migration/src/lib.rs @@ -83,6 +83,7 @@ mod m0000631_alter_product_cpe_key; mod m0000640_create_product_status; mod m0000650_alter_advisory_tracking; mod m0000660_purl_id_indexes; +mod m0000680_add_meta_report_source_document; pub struct Migrator; @@ -173,6 +174,7 @@ impl MigratorTrait for Migrator { Box::new(m0000640_create_product_status::Migration), Box::new(m0000650_alter_advisory_tracking::Migration), Box::new(m0000660_purl_id_indexes::Migration), + Box::new(m0000680_add_meta_report_source_document::Migration), ] } } diff --git a/migration/src/m0000680_add_meta_report_source_document.rs b/migration/src/m0000680_add_meta_report_source_document.rs new file mode 100644 index 00000000..321d725b --- /dev/null +++ b/migration/src/m0000680_add_meta_report_source_document.rs @@ -0,0 +1,41 @@ +use sea_orm_migration::prelude::*; + +#[derive(DeriveMigrationName)] +pub struct Migration; + +#[async_trait::async_trait] +impl MigrationTrait for Migration { + async fn up(&self, manager: &SchemaManager) -> Result<(), DbErr> { + //create jsonb meta column + manager + .alter_table( + Table::alter() + .table(SourceDocument::Table) + .add_column(ColumnDef::new(SourceDocument::Meta).json_binary()) + .to_owned(), + ) + .await?; + + Ok(()) + } + + async fn down(&self, manager: &SchemaManager) -> Result<(), DbErr> { + // Drop jsonb meta column + manager + .alter_table( + Table::alter() + .table(SourceDocument::Table) + .drop_column(SourceDocument::Meta) + .to_owned(), + ) + .await?; + + Ok(()) + } +} + +#[derive(DeriveIden)] +enum SourceDocument { + Table, + Meta, +} diff --git a/modules/ingestor/Cargo.toml b/modules/ingestor/Cargo.toml index 491c53f4..164dbb9c 100644 --- a/modules/ingestor/Cargo.toml +++ b/modules/ingestor/Cargo.toml @@ -12,6 +12,7 @@ trustify-entity = { workspace = true } trustify-module-storage = { workspace = true } trustify-module-analysis = { workspace = true } + actix-web = { workspace = true } anyhow = { workspace = true } bytes = { workspace = true } @@ -30,6 +31,7 @@ packageurl = { workspace = true } parking_lot = { workspace = true } quick-xml = { workspace = true } roxmltree = { workspace = true } +sbomsleuth = { workspace = true } sbom-walker = { workspace = true } sea-orm = { workspace = true } sea-query = { workspace = true } diff --git a/modules/ingestor/src/graph/advisory/mod.rs b/modules/ingestor/src/graph/advisory/mod.rs index e3e19695..e4db28f8 100644 --- a/modules/ingestor/src/graph/advisory/mod.rs +++ b/modules/ingestor/src/graph/advisory/mod.rs @@ -11,6 +11,7 @@ use sea_orm::{ }; use sea_query::{Condition, JoinType, OnConflict}; use semver::Version; +use serde_json::json; use std::fmt::{Debug, Formatter}; use time::OffsetDateTime; use tracing::instrument; @@ -141,6 +142,7 @@ impl Graph { sha256: Set(sha256), sha384: Set(digests.sha384.encode_hex()), sha512: Set(digests.sha512.encode_hex()), + meta: Set(json!({})), // no sbom to report on }; let doc = doc_model.insert(&self.connection(&tx)).await?; diff --git a/modules/ingestor/src/graph/sbom/mod.rs b/modules/ingestor/src/graph/sbom/mod.rs index cc5744d9..1254e15d 100644 --- a/modules/ingestor/src/graph/sbom/mod.rs +++ b/modules/ingestor/src/graph/sbom/mod.rs @@ -21,6 +21,7 @@ use crate::{ use cpe::uri::OwnedUri; use entity::{product, product_version}; use hex::ToHex; +use sbomsleuth::report::Report; use sea_orm::{ prelude::Uuid, ActiveModelTrait, ColumnTrait, EntityTrait, ModelTrait, QueryFilter, QuerySelect, QueryTrait, RelationTrait, Select, SelectColumns, Set, @@ -28,6 +29,7 @@ use sea_orm::{ use sea_query::{ extension::postgres::PgExpr, Alias, Condition, Expr, Func, JoinType, Query, SimpleExpr, }; +use serde_json::json; use std::{ fmt::{Debug, Formatter}, iter, @@ -96,6 +98,70 @@ impl Graph { .map(|sbom| SbomContext::new(self, sbom))) } + #[instrument(skip(tx, info), err(level=tracing::Level::INFO))] + pub async fn ingest_sbom_with_report>( + &self, + report: &Report, + labels: impl Into + Debug, + digests: &Digests, + document_id: &str, + info: impl Into, + tx: TX, + ) -> Result { + let sha256 = digests.sha256.encode_hex::(); + + if let Some(found) = self.get_sbom_by_digest(&sha256, &tx).await? { + return Ok(found); + } + + let SbomInformation { + node_id, + name, + published, + authors, + } = info.into(); + + let connection = self.db.connection(&tx); + + let sbom_id = Uuid::now_v7(); + + let meta_string = serde_json::to_string(report)?; + let meta_value: serde_json::Value = meta_string.parse()?; + let doc_model = source_document::ActiveModel { + id: Default::default(), + sha256: Set(sha256), + sha384: Set(digests.sha384.encode_hex()), + sha512: Set(digests.sha512.encode_hex()), + meta: Set(meta_value), + }; + + let doc = doc_model.insert(&connection).await?; + + let model = sbom::ActiveModel { + sbom_id: Set(sbom_id), + node_id: Set(node_id.clone()), + + document_id: Set(document_id.to_string()), + + published: Set(published), + authors: Set(authors), + + source_document_id: Set(Some(doc.id)), + labels: Set(labels.into()), + }; + + let node_model = sbom_node::ActiveModel { + sbom_id: Set(sbom_id), + node_id: Set(node_id), + name: Set(name), + }; + + let result = model.insert(&connection).await?; + node_model.insert(&connection).await?; + + Ok(SbomContext::new(self, result)) + } + #[instrument(skip(tx, info), err(level=tracing::Level::INFO))] pub async fn ingest_sbom>( &self, @@ -127,6 +193,7 @@ impl Graph { sha256: Set(sha256), sha384: Set(digests.sha384.encode_hex()), sha512: Set(digests.sha512.encode_hex()), + meta: Set(json!({})), // Set to an empty JSON object }; let doc = doc_model.insert(&connection).await?; diff --git a/modules/ingestor/src/service/sbom/spdx.rs b/modules/ingestor/src/service/sbom/spdx.rs index 7e52f953..47573562 100644 --- a/modules/ingestor/src/service/sbom/spdx.rs +++ b/modules/ingestor/src/service/sbom/spdx.rs @@ -6,6 +6,8 @@ use crate::{ model::IngestResult, service::{Error, Warnings}, }; +use sbomsleuth::license::Licenses; +use sbomsleuth::report::Report; use serde_json::Value; use tracing::instrument; use trustify_common::{hashing::Digests, id::Id}; @@ -31,6 +33,28 @@ impl<'g> SpdxLoader<'g> { let (spdx, _) = parse_spdx(&warnings, json)?; + let license_instance = Licenses::default(); + let licenses_result = license_instance.run_with_spdx(spdx.clone()).await; + let licenses = match licenses_result { + Ok(licenses) => licenses, + Err(e) => { + log::warn!("Failed to generate spdx license report, {}", e.as_str()); + Licenses::default() + } + }; + let report_instance = sbomsleuth::report::Report { + licenses, + ..Default::default() + }; + let report_result = report_instance.run_with_spdx(spdx.clone()); + let report = match report_result { + Ok(report) => report, + Err(e) => { + log::warn!("Failed to generate spdx quality report, {}", e.as_str()); + Report::default() + } + }; + log::info!( "Storing: {}", spdx.document_creation_information.document_name @@ -47,7 +71,14 @@ impl<'g> SpdxLoader<'g> { let sbom = self .graph - .ingest_sbom(labels, digests, &document_id, spdx::Information(&spdx), &tx) + .ingest_sbom_with_report( + &report, + labels, + digests, + &document_id, + spdx::Information(&spdx), + &tx, + ) .await?; sbom.ingest_spdx(spdx, &warnings, &tx).await?;