Skip to content

Commit

Permalink
Merge pull request #560 from DataDog/jf/STAL-2917
Browse files Browse the repository at this point in the history
[STAL-2917] Fix PKU-related segfaults that prevent upgrading to the latest `deno_core`.
  • Loading branch information
jasonforal authored Nov 25, 2024
2 parents 1062a4f + 8a1c8df commit c0ad97d
Show file tree
Hide file tree
Showing 40 changed files with 610 additions and 380 deletions.
25 changes: 22 additions & 3 deletions crates/bins/src/bin/datadog-static-analyzer-git-hook.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@ use common::model::diff_aware::DiffAware;
use getopts::Options;
use git2::Repository;
use itertools::Itertools;
use kernel::analysis::analyze::analyze;
use kernel::analysis::analyze::analyze_with;
use kernel::analysis::ddsa_lib::v8_platform::initialize_v8;
use kernel::analysis::ddsa_lib::JsRuntime;
use kernel::constants::{CARGO_VERSION, VERSION};
use kernel::model::common::OutputFormat::Json;
use kernel::model::config_file::{ConfigFile, ConfigMethod, PathConfig};
Expand All @@ -29,6 +31,7 @@ use rayon::prelude::*;
use rocket::yansi::Paint;
use secrets::scanner::{build_sds_scanner, find_secrets};
use secrets::secret_files::should_ignore_file_for_secret;
use std::cell::Cell;
use std::collections::HashMap;
use std::io::Write;
use std::path::PathBuf;
Expand Down Expand Up @@ -438,6 +441,8 @@ fn main() -> Result<()> {
);
}

let v8 = initialize_v8(num_cpus as u32);

let analysis_start_instant = Instant::now();

// static analysis part
Expand All @@ -459,6 +464,11 @@ fn main() -> Result<()> {
let rule_results: Vec<RuleResult> = files_for_language
.into_par_iter()
.flat_map(|path| {
thread_local! {
// (`Cell` is used to allow lazy instantiation of a thread local with zero runtime cost).
static JS_RUNTIME: Cell<Option<JsRuntime>> = const { Cell::new(None) };
}

let relative_path = path
.strip_prefix(directory_path)
.unwrap()
Expand All @@ -469,15 +479,24 @@ fn main() -> Result<()> {
.rule_config_provider
.config_for_file(relative_path.as_ref());
if let Ok(file_content) = fs::read_to_string(&path) {
let mut opt = JS_RUNTIME.replace(None);
let runtime_ref = opt.get_or_insert_with(|| {
v8.try_new_runtime().expect("ddsa init should succeed")
});

let file_content = Arc::from(file_content);
analyze(
let rule_result = analyze_with(
runtime_ref,
language,
&rules_for_language,
&relative_path,
&file_content,
&rule_config,
&analysis_options,
)
);
JS_RUNTIME.replace(opt);

rule_result
} else {
vec![]
}
Expand Down
23 changes: 23 additions & 0 deletions crates/bins/src/bin/datadog-static-analyzer-server.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,29 @@
// Unless explicitly stated otherwise all files in this repository are licensed under the Apache License, Version 2.0.
// This product includes software developed at Datadog (https://www.datadoghq.com/).
// Copyright 2024 Datadog, Inc.

use kernel::analysis::ddsa_lib::v8_platform::{initialize_v8, Initialized, V8Platform};
use std::sync::OnceLock;
mod datadog_static_analyzer_server;

pub(crate) static V8_PLATFORM: OnceLock<V8Platform<Initialized>> = OnceLock::new();
pub(crate) static RAYON_POOL: OnceLock<rayon::ThreadPool> = OnceLock::new();

#[rocket::main]
async fn main() {
// NOTE: It's imperative that the Rayon pool (which will handle analysis jobs that use v8)
// is created by the same thread that initializes v8 (see the documentation
// on the `initialize_v8` function for more information).
let v8 = initialize_v8(0);
V8_PLATFORM.set(v8).expect("cell should have been unset");

let rayon_pool = rayon::ThreadPoolBuilder::new()
.num_threads(0)
.build()
.expect("rayon pool should be buildable");
RAYON_POOL
.set(rayon_pool)
.expect("cell should have been unset");

datadog_static_analyzer_server::start().await;
}
16 changes: 12 additions & 4 deletions crates/bins/src/bin/datadog-static-analyzer-test-ruleset.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
use cli::datadog_utils::get_ruleset;
use common::analysis_options::AnalysisOptions;
use kernel::analysis::analyze::analyze;
use kernel::analysis::analyze::analyze_with;
use kernel::model::rule::Rule;

use anyhow::{Error, Result};
use getopts::Options;
use kernel::analysis::ddsa_lib::v8_platform::initialize_v8;
use kernel::analysis::ddsa_lib::JsRuntime;
use kernel::model::rule_test::RuleTest;
use kernel::rule_config::RuleConfig;
use kernel::utils::decode_base64_string;
Expand All @@ -17,7 +19,7 @@ fn print_usage(program: &str, opts: Options) {
print!("{}", opts.usage(&brief));
}

fn test_rule(rule: &Rule, test: &RuleTest) -> Result<String> {
fn test_rule(runtime: &mut JsRuntime, rule: &Rule, test: &RuleTest) -> Result<String> {
let rule_internal = rule.to_rule_internal().unwrap();
let code = decode_base64_string(test.code_base64.to_string()).unwrap();
let code = Arc::from(code);
Expand All @@ -28,7 +30,8 @@ fn test_rule(rule: &Rule, test: &RuleTest) -> Result<String> {
timeout: None,
};
let rules = vec![rule_internal];
let analyze_result = analyze(
let analyze_result = analyze_with(
runtime,
&rule.language,
&rules,
&Arc::from(test.filename.clone()),
Expand Down Expand Up @@ -82,6 +85,11 @@ fn main() {
exit(1);
}

let v8 = initialize_v8(0);
let mut runtime = v8
.try_new_runtime()
.expect("runtime should have all data required to init");

let use_staging = matches.opt_present("s");
let rulesets = matches.opt_strs("r");
let mut num_failures = 0;
Expand All @@ -93,7 +101,7 @@ fn main() {
println!(" rule {} ... ", rule.name);
let c = rule.clone();
for t in rule.tests {
match test_rule(&c, &t) {
match test_rule(&mut runtime, &c, &t) {
Ok(_) => {
println!(" test {} passed", t.filename);
}
Expand Down
22 changes: 20 additions & 2 deletions crates/bins/src/bin/datadog-static-analyzer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,9 @@ use cli::utils::{choose_cpu_count, get_num_threads_to_use, print_configuration};
use cli::violations_table;
use common::analysis_options::AnalysisOptions;
use common::model::diff_aware::DiffAware;
use kernel::analysis::analyze::{analyze, generate_flow_graph_dot};
use kernel::analysis::analyze::{analyze_with, generate_flow_graph_dot};
use kernel::analysis::ddsa_lib::v8_platform::initialize_v8;
use kernel::analysis::ddsa_lib::JsRuntime;
use kernel::analysis::generated_content::DEFAULT_IGNORED_GLOBS;
use kernel::constants::{CARGO_VERSION, VERSION};
use kernel::model::analysis::ERROR_RULE_TIMEOUT;
Expand All @@ -44,6 +46,7 @@ use kernel::rule_config::RuleConfigProvider;
use secrets::model::secret_result::{SecretResult, SecretValidationStatus};
use secrets::scanner::{build_sds_scanner, find_secrets};
use secrets::secret_files::should_ignore_file_for_secret;
use std::cell::Cell;

fn print_usage(program: &str, opts: Options) {
let brief = format!("Usage: {} FILE [options]", program);
Expand Down Expand Up @@ -488,6 +491,8 @@ fn main() -> Result<()> {
);
}

let v8 = initialize_v8(num_threads as u32);

let mut number_of_rules_used = 0;
// Finally run the analysis
for language in &languages {
Expand Down Expand Up @@ -532,6 +537,11 @@ fn main() -> Result<()> {
.fold(
|| (AnalysisStatistics::new(), Vec::<RuleResult>::new()),
|(mut stats, mut fold_results), path| {
thread_local! {
// (`Cell` is used to allow lazy instantiation of a thread local with zero runtime cost).
static JS_RUNTIME: Cell<Option<JsRuntime>> = const { Cell::new(None) };
}

let relative_path = path
.strip_prefix(directory_path)
.unwrap()
Expand All @@ -542,8 +552,14 @@ fn main() -> Result<()> {
.rule_config_provider
.config_for_file(relative_path.as_ref());
let res = if let Ok(file_content) = fs::read_to_string(&path) {
let mut opt = JS_RUNTIME.replace(None);
let runtime_ref = opt.get_or_insert_with(|| {
v8.try_new_runtime().expect("ddsa init should succeed")
});

let file_content = Arc::from(file_content);
let mut results = analyze(
let mut results = analyze_with(
runtime_ref,
language,
&rules_for_language,
&relative_path,
Expand Down Expand Up @@ -581,6 +597,7 @@ fn main() -> Result<()> {

if debug_java_dfa && *language == Language::Java {
if let Some(graph) = generate_flow_graph_dot(
runtime_ref,
*language,
&relative_path,
&file_content,
Expand All @@ -591,6 +608,7 @@ fn main() -> Result<()> {
let _ = fs::write(dot_path, graph);
}
}
JS_RUNTIME.replace(opt);

results
} else {
Expand Down
29 changes: 27 additions & 2 deletions crates/bins/src/bin/datadog_static_analyzer_server/endpoints.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
use std::cell::Cell;
use std::path::Path;

use crate::datadog_static_analyzer_server::fairings::TraceSpan;
use crate::{RAYON_POOL, V8_PLATFORM};
use kernel::analysis::ddsa_lib::JsRuntime;
use rocket::{
fs::NamedFile,
futures::FutureExt,
Expand Down Expand Up @@ -105,11 +108,33 @@ fn languages(span: TraceSpan) -> Value {
json!(languages)
}

#[allow(unreachable_code)]
#[rocket::post("/analyze", format = "application/json", data = "<request>")]
fn analyze(span: TraceSpan, request: Json<AnalysisRequest>) -> Value {
async fn analyze(span: TraceSpan, request: Json<AnalysisRequest>) -> Value {
let _entered = span.enter();
tracing::debug!("{:?}", &request.0);
json!(process_analysis_request(request.into_inner()))

rocket::tokio::task::spawn_blocking(|| {
let pool = RAYON_POOL.get().expect("pool should have been created");
pool.scope_fifo(|_| {
thread_local! {
// (`Cell` is used to allow lazy instantiation of a thread local with zero runtime cost).
static JS_RUNTIME: Cell<Option<JsRuntime>> = const { Cell::new(None) };
}
tracing::warn!("performing job on {:?}", std::thread::current().id());
let mut opt = JS_RUNTIME.replace(None);
let runtime_ref = opt.get_or_insert_with(|| {
let v8 = V8_PLATFORM.get().expect("v8 should have been initialized");
v8.try_new_runtime().expect("ddsa init should succeed")
});
let response = process_analysis_request(request.into_inner(), runtime_ref);
JS_RUNTIME.replace(opt);

json!(response)
})
})
.await
.unwrap()
}

#[rocket::post("/get-treesitter-ast", format = "application/json", data = "<request>")]
Expand Down
2 changes: 1 addition & 1 deletion crates/static-analysis-kernel/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ indexmap = { workspace = true }
tree-sitter = { workspace = true }

# other
deno_core = "0.196.0"
deno_core = "0.321.0"
globset = "0.4.14"
graphviz-rust = "0.9.0"
sequence_trie = "0.3.6"
Expand Down
67 changes: 28 additions & 39 deletions crates/static-analysis-kernel/src/analysis/analyze.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,6 @@ use std::time::{Duration, Instant};
/// the time it takes for the JavaScript rule to execute.
const RULE_EXECUTION_TIMEOUT: Duration = Duration::from_millis(2000);

thread_local! {
/// A thread-local `JsRuntime`
pub static DEFAULT_JS_RUNTIME: std::cell::RefCell<JsRuntime> = {
let runtime = JsRuntime::try_new().expect("runtime should have all data required to init");
std::cell::RefCell::new(runtime)
};
}

/// Split the code and extract all the logic that reports to lines to ignore.
/// If a no-dd-sa statement occurs on the first line, it applies to the whole file.
/// Otherwise, it only applies to the line below.
Expand Down Expand Up @@ -137,36 +129,6 @@ fn get_lines_to_ignore(code: &str, language: &Language) -> LinesToIgnore {
}
}

// main function
// 1. Build the context (tree-sitter tree, etc)
// 2. Run the tree-sitter query and build the object that hold the match
// 3. Execute the rule
// 4. Collect results and errors
pub fn analyze<I>(
language: &Language,
rules: I,
filename: &Arc<str>,
code: &Arc<str>,
rule_config: &RuleConfig,
analysis_option: &AnalysisOptions,
) -> Vec<RuleResult>
where
I: IntoIterator,
I::Item: Borrow<RuleInternal>,
{
DEFAULT_JS_RUNTIME.with_borrow_mut(|runtime| {
analyze_with(
runtime,
language,
rules,
filename,
code,
rule_config,
analysis_option,
)
})
}

pub fn analyze_with<I>(
runtime: &mut JsRuntime,
language: &Language,
Expand Down Expand Up @@ -311,6 +273,7 @@ where
///
/// [DOT Language]: https://graphviz.org/doc/info/lang.html
pub fn generate_flow_graph_dot(
runtime: &mut JsRuntime,
language: Language,
file_name: &Arc<str>,
file_contents: &Arc<str>,
Expand Down Expand Up @@ -374,7 +337,8 @@ function visit(captures) {
tree_sitter_query,
};

let results = analyze(
let results = analyze_with(
runtime,
&language,
[rule],
file_name,
Expand Down Expand Up @@ -411,6 +375,7 @@ function visit(captures) {
mod tests {

use super::*;
use crate::analysis::ddsa_lib::test_utils::cfg_test_v8;
use crate::analysis::tree_sitter::get_query;
use crate::config_file::parse_config_file;
use crate::model::common::Language;
Expand All @@ -429,6 +394,28 @@ def foo(arg1):
pass
"#;

/// A shorthand function to avoid needing to manually create a v8 platform in unit tests.
fn analyze(
language: &Language,
rules: &[RuleInternal],
filename: &Arc<str>,
code: &Arc<str>,
rule_config: &RuleConfig,
analysis_option: &AnalysisOptions,
) -> Vec<RuleResult> {
let v8 = cfg_test_v8();
let mut runtime = v8.new_runtime();
analyze_with(
&mut runtime,
language,
rules,
filename,
code,
rule_config,
analysis_option,
)
}

// execution time must be more than 0
#[test]
fn test_execution_time() {
Expand Down Expand Up @@ -1331,7 +1318,9 @@ public class ClassB {
}
}
";
let mut runtime = cfg_test_v8().new_runtime();
let parsed_dot = generate_flow_graph_dot(
&mut runtime,
Language::Java,
&Arc::from("path/to/file.java"),
&Arc::from(file_contents),
Expand Down
Loading

0 comments on commit c0ad97d

Please sign in to comment.