From be4d0439378e468bc0c0fe8584bbb3e92a3498a6 Mon Sep 17 00:00:00 2001 From: Petr Pchelko Date: Mon, 12 Aug 2024 07:25:27 -0700 Subject: [PATCH 1/7] Demo: hackana for finding executable code --- Cargo.toml | 5 ++- src/cli/Cargo.toml | 1 + src/cli/lib.rs | 46 ++++++++++++++++++++++++++- src/executable_code_finder/Cargo.toml | 10 ++++++ src/executable_code_finder/lib.rs | 39 +++++++++++++++++++++++ 5 files changed, 99 insertions(+), 2 deletions(-) create mode 100644 src/executable_code_finder/Cargo.toml create mode 100644 src/executable_code_finder/lib.rs diff --git a/Cargo.toml b/Cargo.toml index cff481b4..c53481ee 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,6 +18,7 @@ hakana-analyzer = { path = "src/analyzer" } hakana-str = { path = "src/str" } hakana-language-server = { path = "src/language_server" } hakana-workhorse = { path = "src/file_scanner_analyzer" } +executable-finder = { path = "src/executable_code_finder" } mimalloc = { version = "*", default-features = false } tower-lsp = { version = "=0.20.0", features = ["proposed"] } tokio = { version = "1.26.0", features = ["full"] } @@ -33,7 +34,9 @@ members = [ "src/file_scanner_analyzer", "src/language_server", "src/logger", - "src/js_interop" + "src/ttype", + "src/js_interop", + "src/executable_code_finder", ] exclude = ["third-party"] diff --git a/src/cli/Cargo.toml b/src/cli/Cargo.toml index faed154a..e062d8f0 100644 --- a/src/cli/Cargo.toml +++ b/src/cli/Cargo.toml @@ -5,6 +5,7 @@ edition = "2021" [dependencies] hakana-workhorse = { path = "../file_scanner_analyzer" } +executable-finder = { path = "../executable_code_finder" } hakana-analyzer = { path = "../analyzer" } hakana-logger = { path = "../logger" } hakana-code-info = { path = "../code_info" } diff --git a/src/cli/lib.rs b/src/cli/lib.rs index 4ae21651..b7ca37f2 100644 --- a/src/cli/lib.rs +++ b/src/cli/lib.rs @@ -17,7 +17,9 @@ use std::fs::{self, File}; use std::io::Write; use std::path::Path; use std::process::exit; -use std::sync::Arc; +use std::sync::{Arc, Mutex}; +use hakana_reflection_info::code_location::FilePath; +use hakana_workhorse::file::VirtualFileSystem; use test_runners::test_runner::TestRunner; pub mod test_runners; @@ -436,6 +438,13 @@ pub fn init( .arg(arg!( "The test to run")) .arg_required_else_help(true), ) + .subcommand( + Command::new("find-executable") + .about("Finds all executable lines of code") + .arg(arg!(--"file" ).required(true).help( + "THe file path to process", + )) + ) .get_matches(); let cwd = (env::current_dir()).unwrap().to_str().unwrap().to_string(); @@ -650,6 +659,13 @@ pub fn init( random_seed, ); } + Some(("find-executable", sub_matches)) => { + do_find_executable( + sub_matches, + root_dir, + logger, + ); + } _ => unreachable!(), // If all subcommands are defined above, anything else is unreachable!() } @@ -718,6 +734,34 @@ fn do_fix( } } +fn do_find_executable( + sub_matches: &clap::ArgMatches, + root_dir: String, + _logger: Logger, +) { + let mut root_owned: String = root_dir.to_owned(); + let file = sub_matches + .value_of("file") + .unwrap(); + root_owned.push_str("/"); + root_owned.push_str(file); + + println!("{}", root_owned); + + let interner = Arc::new(Mutex::new(Interner::default())); + let mut threaded_interner = ThreadedInterner::new(interner.clone()); + let interned_file_path = FilePath(threaded_interner.intern(root_owned.clone())); + let mut file_system = VirtualFileSystem::default(); + + file_system + .file_hashes_and_times + .insert(interned_file_path, (0, 0)); + + let aast = hakana_workhorse::get_aast_for_path(interned_file_path, root_owned.as_str()); + + executable_finder::collect_executable_lines(&aast.unwrap().0); +} + fn do_remove_unused_fixmes( sub_matches: &clap::ArgMatches, root_dir: &String, diff --git a/src/executable_code_finder/Cargo.toml b/src/executable_code_finder/Cargo.toml new file mode 100644 index 00000000..b20034a9 --- /dev/null +++ b/src/executable_code_finder/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "executable-finder" +version = "0.1.0" +edition = "2021" + +[dependencies] +oxidized = { path = "../../third-party/hhvm/hphp/hack/src/oxidized" } + +[lib] +path = "lib.rs" \ No newline at end of file diff --git a/src/executable_code_finder/lib.rs b/src/executable_code_finder/lib.rs new file mode 100644 index 00000000..449f430d --- /dev/null +++ b/src/executable_code_finder/lib.rs @@ -0,0 +1,39 @@ +use oxidized::{ + aast, + aast_visitor::{visit, AstParams, Node, Visitor}, +}; + +struct Context { +} + +struct Scanner { + +} + +impl<'ast> Visitor<'ast> for Scanner { + type Params = AstParams; + + fn object(&mut self) -> &mut dyn Visitor<'ast, Params = Self::Params> { + self + } + + fn visit_stmt(&mut self, c: &mut Context, p: &aast::Stmt<(), ()>) -> Result<(), ()> { + let result = p.recurse(c, self); + + println!("{}-{}", p.0.to_raw_span().start.line(),p.0.to_raw_span().end.line()); + + result + } +} + +pub fn collect_executable_lines( + program: &aast::Program<(), ()>, +) { + let mut checker = Scanner { + }; + + let mut context = Context { + }; + + visit(&mut checker, &mut context, program).unwrap(); +} From 0515b5815638dc16a7ecc82ceb9857e0dabb6ef1 Mon Sep 17 00:00:00 2001 From: Petr Pchelko Date: Wed, 14 Aug 2024 08:49:56 -0700 Subject: [PATCH 2/7] Recursively scan all files in the repo --- src/cli/lib.rs | 46 +++--- src/executable_code_finder/Cargo.toml | 7 + src/executable_code_finder/lib.rs | 224 +++++++++++++++++++++++--- 3 files changed, 230 insertions(+), 47 deletions(-) diff --git a/src/cli/lib.rs b/src/cli/lib.rs index b7ca37f2..61a8640a 100644 --- a/src/cli/lib.rs +++ b/src/cli/lib.rs @@ -17,9 +17,7 @@ use std::fs::{self, File}; use std::io::Write; use std::path::Path; use std::process::exit; -use std::sync::{Arc, Mutex}; -use hakana_reflection_info::code_location::FilePath; -use hakana_workhorse::file::VirtualFileSystem; +use std::sync::Arc; use test_runners::test_runner::TestRunner; pub mod test_runners; @@ -441,8 +439,8 @@ pub fn init( .subcommand( Command::new("find-executable") .about("Finds all executable lines of code") - .arg(arg!(--"file" ).required(true).help( - "THe file path to process", + .arg(arg!(--"root" ).required(false).help( + "The root directory that Hakana runs in. Defaults to the current directory", )) ) .get_matches(); @@ -662,7 +660,8 @@ pub fn init( Some(("find-executable", sub_matches)) => { do_find_executable( sub_matches, - root_dir, + &root_dir, + threads, logger, ); } @@ -736,30 +735,21 @@ fn do_fix( fn do_find_executable( sub_matches: &clap::ArgMatches, - root_dir: String, - _logger: Logger, + root_dir: &str, + threads: u8, + logger: Logger, ) { - let mut root_owned: String = root_dir.to_owned(); - let file = sub_matches - .value_of("file") - .unwrap(); - root_owned.push_str("/"); - root_owned.push_str(file); + let _output_file = sub_matches.value_of("output").map(|f| f.to_string()); + let _output_format = sub_matches.value_of("json-format").map(|f| f.to_string()); - println!("{}", root_owned); - - let interner = Arc::new(Mutex::new(Interner::default())); - let mut threaded_interner = ThreadedInterner::new(interner.clone()); - let interned_file_path = FilePath(threaded_interner.intern(root_owned.clone())); - let mut file_system = VirtualFileSystem::default(); - - file_system - .file_hashes_and_times - .insert(interned_file_path, (0, 0)); - - let aast = hakana_workhorse::get_aast_for_path(interned_file_path, root_owned.as_str()); - - executable_finder::collect_executable_lines(&aast.unwrap().0); + let config = config::Config::new(root_dir.to_string(), FxHashSet::default()); + let _ = executable_finder::scan_files( + &vec![root_dir.to_string()], + None, + &Arc::new(config), + threads, + Arc::new(logger), + ); } fn do_remove_unused_fixmes( diff --git a/src/executable_code_finder/Cargo.toml b/src/executable_code_finder/Cargo.toml index b20034a9..f202881d 100644 --- a/src/executable_code_finder/Cargo.toml +++ b/src/executable_code_finder/Cargo.toml @@ -4,7 +4,14 @@ version = "0.1.0" edition = "2021" [dependencies] +hakana-analyzer = { path = "../analyzer" } +hakana-logger = { path = "../logger" } +hakana-reflection-info = { path = "../code_info" } +hakana-str = { path = "../str" } +hakana-workhorse = { path = "../file_scanner_analyzer" } oxidized = { path = "../../third-party/hhvm/hphp/hack/src/oxidized" } +indicatif = "0.17.0-rc.11" +rustc-hash = "1.1.0" [lib] path = "lib.rs" \ No newline at end of file diff --git a/src/executable_code_finder/lib.rs b/src/executable_code_finder/lib.rs index 449f430d..9711d633 100644 --- a/src/executable_code_finder/lib.rs +++ b/src/executable_code_finder/lib.rs @@ -1,39 +1,225 @@ -use oxidized::{ - aast, - aast_visitor::{visit, AstParams, Node, Visitor}, -}; +use std::sync::{Arc, Mutex}; +use std::time::Instant; +use hakana_analyzer::config::Config; +use hakana_logger::Logger; +use hakana_reflection_info::code_location::FilePath; +use hakana_str::{Interner, ThreadedInterner}; +use hakana_workhorse::file::{VirtualFileSystem}; +use hakana_workhorse::scanner::{add_builtins_to_scan}; +use indicatif::{ProgressBar, ProgressStyle}; +use oxidized::{aast, aast_visitor::{visit, AstParams, Node, Visitor}}; +use rustc_hash::FxHashMap; +use hakana_reflection_info::file_info::ParserError; struct Context { } -struct Scanner { +pub fn scan_files( + scan_dirs: &Vec, + cache_dir: Option<&String>, + config: &Arc, + threads: u8, + logger: Arc, +) -> Result<(),()> { + logger.log_debug_sync(&format!("{:#?}", scan_dirs)); + + let mut files_to_scan = vec![]; + let mut files_to_analyze = vec![]; + let mut interner= Interner::default(); + let existing_file_system = None; + + get_filesystem( + &mut files_to_scan, + &mut interner, + &logger, + scan_dirs, + &existing_file_system, + config, + cache_dir, + &mut files_to_analyze, + ); + + let invalid_files = Arc::new(Mutex::new(vec![])); + + if !files_to_scan.is_empty() { + let file_scanning_now = Instant::now(); + + let bar = if logger.show_progress() { + let pb = ProgressBar::new(files_to_scan.len() as u64); + let sty = + ProgressStyle::with_template("{bar:40.green/yellow} {pos:>7}/{len:7}").unwrap(); + pb.set_style(sty); + Some(Arc::new(pb)) + } else { + None + }; + + let files_processed: Arc> = Arc::new(Mutex::new(0)); + + let mut group_size = threads as usize; + + let mut path_groups = FxHashMap::default(); + + if files_to_scan.len() < 4 * group_size { + group_size = 1; + } + + for (i, str_path) in files_to_scan.into_iter().enumerate() { + let group = i % group_size; + path_groups + .entry(group) + .or_insert_with(Vec::new) + .push(FilePath(interner.get(str_path.as_str()).unwrap())); + } + + let interner = Arc::new(Mutex::new(interner)); + let mut handles = vec![]; + + for (_, path_group) in path_groups { + let interner = interner.clone(); + let bar = bar.clone(); + let files_processed = files_processed.clone(); + let logger = logger.clone(); + let invalid_files = invalid_files.clone(); + + let handle = std::thread::spawn(move || { + let mut new_context = Context {}; + let new_interner = ThreadedInterner::new(interner); + for file_path in &path_group { + let str_path = new_interner + .parent + .lock() + .unwrap() + .lookup(&file_path.0) + .to_string(); + + println!("{}", str_path); + + match scan_file(&str_path, *file_path, &mut new_context, &logger.clone(), ) { + Err(_) => { + invalid_files.lock().unwrap().push(*file_path); + } + Ok(_) => {} + }; + + let mut tally = files_processed.lock().unwrap(); + *tally += 1; + + update_progressbar(*tally, bar.clone()); + } + + //resolved_names.lock().unwrap().extend(local_resolved_names); + + //let mut codebases = codebases.lock().unwrap(); + //codebases.push(new_codebase); + }); + + handles.push(handle); + } + + for handle in handles { + handle.join().unwrap(); + } + + if let Some(bar) = &bar { + bar.finish_and_clear(); + } + + if logger.can_log_timing() { + logger.log_sync(&format!( + "Scanning files took {:.2?}", + file_scanning_now.elapsed() + )); + } + } + + let _invalid_files = Arc::try_unwrap(invalid_files) + .unwrap() + .into_inner() + .unwrap(); + + Ok(()) } -impl<'ast> Visitor<'ast> for Scanner { - type Params = AstParams; +fn get_filesystem( + files_to_scan: &mut Vec, + interner: &mut Interner, + logger: &Logger, + scan_dirs: &Vec, + existing_file_system: &Option, + config: &Arc, + cache_dir: Option<&String>, + files_to_analyze: &mut Vec, +) -> VirtualFileSystem { + let mut file_system = VirtualFileSystem::default(); - fn object(&mut self) -> &mut dyn Visitor<'ast, Params = Self::Params> { - self + add_builtins_to_scan(files_to_scan, interner, &mut file_system); + + logger.log_sync("Looking for Hack files"); + + for scan_dir in scan_dirs { + logger.log_debug_sync(&format!(" - in {}", scan_dir)); + + files_to_scan.extend(file_system.find_files_in_dir( + scan_dir, + interner, + existing_file_system, + config, + cache_dir.is_some() || config.ast_diff, + files_to_analyze, + )); } - fn visit_stmt(&mut self, c: &mut Context, p: &aast::Stmt<(), ()>) -> Result<(), ()> { - let result = p.recurse(c, self); + file_system +} - println!("{}-{}", p.0.to_raw_span().start.line(),p.0.to_raw_span().end.line()); - result +fn update_progressbar(percentage: u64, bar: Option>) { + if let Some(bar) = bar { + bar.set_position(percentage); } } -pub fn collect_executable_lines( - program: &aast::Program<(), ()>, -) { - let mut checker = Scanner { +pub(crate) fn scan_file( + str_path: &str, + file_path: FilePath, + context: &mut Context, + logger: &Logger, +) -> Result<(), ParserError>{ + logger.log_debug_sync(&format!("scanning {}", str_path)); + + let aast = hakana_workhorse::get_aast_for_path(file_path, str_path); + + let aast = match aast { + Ok(aast) => aast, + Err(err) => { + return Err(err); + } }; - let mut context = Context { + let mut checker = Scanner { }; - visit(&mut checker, &mut context, program).unwrap(); + visit(&mut checker, context, &aast.0) +} + +struct Scanner { + } + +impl<'ast> Visitor<'ast> for Scanner { + type Params = AstParams; + + fn object(&mut self) -> &mut dyn Visitor<'ast, Params = Self::Params> { + self + } + + fn visit_stmt(&mut self, c: &mut Context, p: &aast::Stmt<(), ()>) -> Result<(), ParserError> { + let result = p.recurse(c, self); + + //println!("{}-{}", p.0.to_raw_span().start.line(),p.0.to_raw_span().end.line()); + + result + } +} \ No newline at end of file From 79bdea1a3e1962a92dce1eb3ddb8482a0a166614 Mon Sep 17 00:00:00 2001 From: Petr Pchelko Date: Wed, 14 Aug 2024 11:12:04 -0700 Subject: [PATCH 3/7] And write the result into a file as JSON --- src/cli/lib.rs | 34 +++++++++-- src/executable_code_finder/Cargo.toml | 1 + src/executable_code_finder/lib.rs | 82 ++++++++++++++++----------- 3 files changed, 79 insertions(+), 38 deletions(-) diff --git a/src/cli/lib.rs b/src/cli/lib.rs index 61a8640a..fb7888dd 100644 --- a/src/cli/lib.rs +++ b/src/cli/lib.rs @@ -442,6 +442,9 @@ pub fn init( .arg(arg!(--"root" ).required(false).help( "The root directory that Hakana runs in. Defaults to the current directory", )) + .arg(arg!(--"output" ).required(true).help( + "File to save output to" + )), ) .get_matches(); @@ -661,6 +664,7 @@ pub fn init( do_find_executable( sub_matches, &root_dir, + &cwd, threads, logger, ); @@ -736,20 +740,40 @@ fn do_fix( fn do_find_executable( sub_matches: &clap::ArgMatches, root_dir: &str, + cwd: &String, threads: u8, logger: Logger, ) { - let _output_file = sub_matches.value_of("output").map(|f| f.to_string()); - let _output_format = sub_matches.value_of("json-format").map(|f| f.to_string()); - + let output_file = sub_matches.value_of("output").unwrap().to_string(); let config = config::Config::new(root_dir.to_string(), FxHashSet::default()); - let _ = executable_finder::scan_files( + + match executable_finder::scan_files( &vec![root_dir.to_string()], None, &Arc::new(config), threads, Arc::new(logger), - ); + ) { + Ok(file_infos) => { + let output_path = if output_file.starts_with('/') { + output_file + } else { + format!("{}/{}", cwd, output_file) + }; + let mut out = fs::File::create(Path::new(&output_path)).unwrap(); + match write!(out, "{}", serde_json::to_string_pretty(&file_infos).unwrap()) { + Ok(_) => { + println!("Done") + } + Err(err) => { + println!("error: {}", err) + } + } + } + Err(_) => { + println!("error") + } + } } fn do_remove_unused_fixmes( diff --git a/src/executable_code_finder/Cargo.toml b/src/executable_code_finder/Cargo.toml index f202881d..3a31a7f1 100644 --- a/src/executable_code_finder/Cargo.toml +++ b/src/executable_code_finder/Cargo.toml @@ -12,6 +12,7 @@ hakana-workhorse = { path = "../file_scanner_analyzer" } oxidized = { path = "../../third-party/hhvm/hphp/hack/src/oxidized" } indicatif = "0.17.0-rc.11" rustc-hash = "1.1.0" +serde = { version = "1.0", features = ["derive"] } [lib] path = "lib.rs" \ No newline at end of file diff --git a/src/executable_code_finder/lib.rs b/src/executable_code_finder/lib.rs index 9711d633..b17ef393 100644 --- a/src/executable_code_finder/lib.rs +++ b/src/executable_code_finder/lib.rs @@ -10,8 +10,12 @@ use indicatif::{ProgressBar, ProgressStyle}; use oxidized::{aast, aast_visitor::{visit, AstParams, Node, Visitor}}; use rustc_hash::FxHashMap; use hakana_reflection_info::file_info::ParserError; +use serde::Serialize; -struct Context { +#[derive(Debug, Serialize)] +pub struct ExecutableLines { + pub path: String, + pub executable_lines: Vec, } pub fn scan_files( @@ -20,7 +24,7 @@ pub fn scan_files( config: &Arc, threads: u8, logger: Arc, -) -> Result<(),()> { +) -> Result,()> { logger.log_debug_sync(&format!("{:#?}", scan_dirs)); let mut files_to_scan = vec![]; @@ -40,6 +44,7 @@ pub fn scan_files( ); let invalid_files = Arc::new(Mutex::new(vec![])); + let executable_lines = Arc::new(Mutex::new(vec![])); if !files_to_scan.is_empty() { let file_scanning_now = Instant::now(); @@ -57,9 +62,7 @@ pub fn scan_files( let files_processed: Arc> = Arc::new(Mutex::new(0)); let mut group_size = threads as usize; - let mut path_groups = FxHashMap::default(); - if files_to_scan.len() < 4 * group_size { group_size = 1; } @@ -81,26 +84,23 @@ pub fn scan_files( let files_processed = files_processed.clone(); let logger = logger.clone(); let invalid_files = invalid_files.clone(); + let executable_lines = executable_lines.clone(); + let root_dir = config.root_dir.clone(); let handle = std::thread::spawn(move || { - let mut new_context = Context {}; let new_interner = ThreadedInterner::new(interner); for file_path in &path_group { - let str_path = new_interner - .parent - .lock() - .unwrap() - .lookup(&file_path.0) - .to_string(); - - println!("{}", str_path); - - match scan_file(&str_path, *file_path, &mut new_context, &logger.clone(), ) { + match scan_file(&new_interner, &root_dir, *file_path, &logger.clone(), ) { Err(_) => { invalid_files.lock().unwrap().push(*file_path); } - Ok(_) => {} + Ok(res) => { + let mut executable_lines = executable_lines.lock().unwrap(); + if !res.executable_lines.is_empty() { + executable_lines.push(res); + } + } }; let mut tally = files_processed.lock().unwrap(); @@ -108,11 +108,6 @@ pub fn scan_files( update_progressbar(*tally, bar.clone()); } - - //resolved_names.lock().unwrap().extend(local_resolved_names); - - //let mut codebases = codebases.lock().unwrap(); - //codebases.push(new_codebase); }); handles.push(handle); @@ -139,7 +134,7 @@ pub fn scan_files( .into_inner() .unwrap(); - Ok(()) + Ok(Arc::try_unwrap(executable_lines).unwrap().into_inner().unwrap()) } fn get_filesystem( @@ -182,14 +177,22 @@ fn update_progressbar(percentage: u64, bar: Option>) { } pub(crate) fn scan_file( - str_path: &str, + interner: &ThreadedInterner, + root_dir: &str, file_path: FilePath, - context: &mut Context, logger: &Logger, -) -> Result<(), ParserError>{ +) -> Result{ + let interner = interner + .parent + .lock() + .unwrap(); + let str_path = interner + .lookup(&file_path.0) + .to_string(); + logger.log_debug_sync(&format!("scanning {}", str_path)); - let aast = hakana_workhorse::get_aast_for_path(file_path, str_path); + let aast = hakana_workhorse::get_aast_for_path(file_path, &str_path); let aast = match aast { Ok(aast) => aast, @@ -201,25 +204,38 @@ pub(crate) fn scan_file( let mut checker = Scanner { }; - visit(&mut checker, context, &aast.0) + let mut context= Vec::new(); + match visit(&mut checker, &mut context, &aast.0) { + Ok(_) => { + Ok(ExecutableLines { + path: file_path.get_relative_path(&interner, root_dir), + executable_lines: context + }) + } + Err(err) => { + Err(err) + } + } + } struct Scanner { - } impl<'ast> Visitor<'ast> for Scanner { - type Params = AstParams; + type Params = AstParams, ParserError>; fn object(&mut self) -> &mut dyn Visitor<'ast, Params = Self::Params> { self } - fn visit_stmt(&mut self, c: &mut Context, p: &aast::Stmt<(), ()>) -> Result<(), ParserError> { + fn visit_stmt(&mut self, c: &mut Vec, p: &aast::Stmt<(), ()>) -> Result<(), ParserError> { let result = p.recurse(c, self); - - //println!("{}-{}", p.0.to_raw_span().start.line(),p.0.to_raw_span().end.line()); - + let start = p.0.to_raw_span().start.line(); + let end = p.0.to_raw_span().end.line(); + if start != 0 && end != 0 { + c.push(format!("{}-{}", start, end)); + } result } } \ No newline at end of file From 70cae4fde4d64373993f023ccdab4cb20902054f Mon Sep 17 00:00:00 2001 From: Petr Pchelko Date: Thu, 15 Aug 2024 13:50:26 -0700 Subject: [PATCH 4/7] Address comments and make the visitor a bit smarter --- src/executable_code_finder/lib.rs | 144 ++++++++++++++++++------------ 1 file changed, 87 insertions(+), 57 deletions(-) diff --git a/src/executable_code_finder/lib.rs b/src/executable_code_finder/lib.rs index b17ef393..9f4daf3b 100644 --- a/src/executable_code_finder/lib.rs +++ b/src/executable_code_finder/lib.rs @@ -1,16 +1,18 @@ -use std::sync::{Arc, Mutex}; -use std::time::Instant; use hakana_analyzer::config::Config; use hakana_logger::Logger; use hakana_reflection_info::code_location::FilePath; +use hakana_reflection_info::file_info::ParserError; use hakana_str::{Interner, ThreadedInterner}; -use hakana_workhorse::file::{VirtualFileSystem}; -use hakana_workhorse::scanner::{add_builtins_to_scan}; +use hakana_workhorse::file::VirtualFileSystem; +use hakana_workhorse::scanner::add_builtins_to_scan; use indicatif::{ProgressBar, ProgressStyle}; +use oxidized::aast::Stmt_; +use oxidized::ast::Pos; use oxidized::{aast, aast_visitor::{visit, AstParams, Node, Visitor}}; use rustc_hash::FxHashMap; -use hakana_reflection_info::file_info::ParserError; use serde::Serialize; +use std::sync::{Arc, Mutex}; +use std::time::Instant; #[derive(Debug, Serialize)] pub struct ExecutableLines { @@ -24,12 +26,12 @@ pub fn scan_files( config: &Arc, threads: u8, logger: Arc, -) -> Result,()> { +) -> Result, ()> { logger.log_debug_sync(&format!("{:#?}", scan_dirs)); let mut files_to_scan = vec![]; let mut files_to_analyze = vec![]; - let mut interner= Interner::default(); + let mut interner = Interner::default(); let existing_file_system = None; get_filesystem( @@ -43,7 +45,6 @@ pub fn scan_files( &mut files_to_analyze, ); - let invalid_files = Arc::new(Mutex::new(vec![])); let executable_lines = Arc::new(Mutex::new(vec![])); if !files_to_scan.is_empty() { @@ -83,7 +84,6 @@ pub fn scan_files( let bar = bar.clone(); let files_processed = files_processed.clone(); let logger = logger.clone(); - let invalid_files = invalid_files.clone(); let executable_lines = executable_lines.clone(); let root_dir = config.root_dir.clone(); @@ -91,21 +91,13 @@ pub fn scan_files( let new_interner = ThreadedInterner::new(interner); for file_path in &path_group { - match scan_file(&new_interner, &root_dir, *file_path, &logger.clone(), ) { - Err(_) => { - invalid_files.lock().unwrap().push(*file_path); - } - Ok(res) => { - let mut executable_lines = executable_lines.lock().unwrap(); - if !res.executable_lines.is_empty() { - executable_lines.push(res); - } - } - }; - + let res = scan_file(&new_interner, &root_dir, *file_path, &logger.clone()); + let mut executable_lines = executable_lines.lock().unwrap(); + if !res.executable_lines.is_empty() { + executable_lines.push(res); + } let mut tally = files_processed.lock().unwrap(); *tally += 1; - update_progressbar(*tally, bar.clone()); } }); @@ -129,11 +121,6 @@ pub fn scan_files( } } - let _invalid_files = Arc::try_unwrap(invalid_files) - .unwrap() - .into_inner() - .unwrap(); - Ok(Arc::try_unwrap(executable_lines).unwrap().into_inner().unwrap()) } @@ -181,7 +168,7 @@ pub(crate) fn scan_file( root_dir: &str, file_path: FilePath, logger: &Logger, -) -> Result{ +) -> ExecutableLines { let interner = interner .parent .lock() @@ -191,51 +178,94 @@ pub(crate) fn scan_file( .to_string(); logger.log_debug_sync(&format!("scanning {}", str_path)); - let aast = hakana_workhorse::get_aast_for_path(file_path, &str_path); - let aast = match aast { Ok(aast) => aast, - Err(err) => { - return Err(err); - } + Err(_) => panic!("invalid file: {}", str_path) }; - - let mut checker = Scanner { - }; - - let mut context= Vec::new(); + let mut checker = Scanner {}; + let mut context = Vec::new(); match visit(&mut checker, &mut context, &aast.0) { - Ok(_) => { - Ok(ExecutableLines { - path: file_path.get_relative_path(&interner, root_dir), - executable_lines: context - }) - } - Err(err) => { - Err(err) - } + Ok(_) => ExecutableLines { + path: file_path.get_relative_path(&interner, root_dir), + executable_lines: context, + }, + Err(_) => panic!("invalid file: {}", str_path) } - } -struct Scanner { -} +struct Scanner {} impl<'ast> Visitor<'ast> for Scanner { type Params = AstParams, ParserError>; - fn object(&mut self) -> &mut dyn Visitor<'ast, Params = Self::Params> { + fn object(&mut self) -> &mut dyn Visitor<'ast, Params=Self::Params> { self } fn visit_stmt(&mut self, c: &mut Vec, p: &aast::Stmt<(), ()>) -> Result<(), ParserError> { - let result = p.recurse(c, self); - let start = p.0.to_raw_span().start.line(); - let end = p.0.to_raw_span().end.line(); - if start != 0 && end != 0 { - c.push(format!("{}-{}", start, end)); + match &p.1 { + Stmt_::For(boxed) => { + push_start(&p.0, c); // The line where for loop is declared is coverable + boxed.1.recurse(c, self) + } + Stmt_::Foreach(boxed) => { + push_start(&p.0, c); // The line where foreach loop is declared is coverable + boxed.2.recurse(c, self) + } + Stmt_::Do(boxed) => { + push_pos(&boxed.1.1, c); + boxed.0.recurse(c, self) + } + Stmt_::While(boxed) => { + push_pos(&boxed.0.1, c); + boxed.1.recurse(c, self) + } + Stmt_::If(boxed) => { + push_pos(&boxed.0.1, c); // if expression + boxed.1.recurse(c, self)?; + boxed.2.recurse(c, self) + } + Stmt_::Switch(boxed) => { + // Skipping the switch statement, it's never covered by HHVM + for case_stmt in &boxed.1 { + push_pos(&case_stmt.0.1, c); + case_stmt.recurse(c, self)?; + } + boxed.2.recurse(c, self) + } + Stmt_::Block(boxed) => { + boxed.recurse(c, self) + } + Stmt_::Expr(boxed) => { + let start = boxed.1.to_raw_span().start.line(); + let end = boxed.1.to_raw_span().end.line(); + if start == end { + c.push(format!("{}-{}", start, end)); + } else { + // Multi-line expressions seem to miss the first line in HHVM coverage + c.push(format!("{}-{}", start + 1, end)); + } + Ok(()) + } + _ => { + let result = p.recurse(c, self); + push_pos(&p.0, c); + result + } } - result + } +} + +fn push_start(p: &Pos, res: &mut Vec) { + let start = p.to_raw_span().start.line(); + res.push(format!("{}-{}", start, start)); +} + +fn push_pos(p: &Pos, res: &mut Vec) { + let start = p.to_raw_span().start.line(); + let end = p.to_raw_span().end.line(); + if start != 0 && end != 0 { + res.push(format!("{}-{}", start, end)); } } \ No newline at end of file From 1bd01fb724b3a9dee066604e1bb51bc9645605d4 Mon Sep 17 00:00:00 2001 From: Petr Pchelko Date: Thu, 15 Aug 2024 13:52:53 -0700 Subject: [PATCH 5/7] poke ci From e4c6ca2659e530efa5741540d38021e60383a05e Mon Sep 17 00:00:00 2001 From: Petr Pchelko Date: Mon, 19 Aug 2024 15:01:43 -0700 Subject: [PATCH 6/7] rebase --- Cargo.toml | 1 - src/executable_code_finder/Cargo.toml | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index c53481ee..d011f527 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -34,7 +34,6 @@ members = [ "src/file_scanner_analyzer", "src/language_server", "src/logger", - "src/ttype", "src/js_interop", "src/executable_code_finder", ] diff --git a/src/executable_code_finder/Cargo.toml b/src/executable_code_finder/Cargo.toml index 3a31a7f1..b28f141b 100644 --- a/src/executable_code_finder/Cargo.toml +++ b/src/executable_code_finder/Cargo.toml @@ -6,7 +6,7 @@ edition = "2021" [dependencies] hakana-analyzer = { path = "../analyzer" } hakana-logger = { path = "../logger" } -hakana-reflection-info = { path = "../code_info" } +hakana-code-info = { path = "../code_info" } hakana-str = { path = "../str" } hakana-workhorse = { path = "../file_scanner_analyzer" } oxidized = { path = "../../third-party/hhvm/hphp/hack/src/oxidized" } From 74092a40c29f9e168650a272463fbf4a344a7244 Mon Sep 17 00:00:00 2001 From: Petr Pchelko Date: Mon, 19 Aug 2024 15:10:39 -0700 Subject: [PATCH 7/7] rebase 2 --- src/executable_code_finder/lib.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/executable_code_finder/lib.rs b/src/executable_code_finder/lib.rs index 9f4daf3b..2ec510d3 100644 --- a/src/executable_code_finder/lib.rs +++ b/src/executable_code_finder/lib.rs @@ -1,7 +1,7 @@ use hakana_analyzer::config::Config; use hakana_logger::Logger; -use hakana_reflection_info::code_location::FilePath; -use hakana_reflection_info::file_info::ParserError; +use hakana_code_info::code_location::FilePath; +use hakana_code_info::file_info::ParserError; use hakana_str::{Interner, ThreadedInterner}; use hakana_workhorse::file::VirtualFileSystem; use hakana_workhorse::scanner::add_builtins_to_scan;