diff --git a/crates/cli/src/analytics.rs b/crates/cli/src/analytics.rs index e2460a057..c45456186 100644 --- a/crates/cli/src/analytics.rs +++ b/crates/cli/src/analytics.rs @@ -3,7 +3,7 @@ use anyhow::Result; use clap::Args; use lazy_static::lazy_static; use marzano_gritmodule::fetcher::LocalRepo; -use marzano_gritmodule::{fetcher::ModuleRepo}; +use marzano_gritmodule::fetcher::ModuleRepo; use marzano_messenger::emit::ApplyDetails; use serde::{Deserialize, Serialize}; use std::{env, fmt, time::Duration}; diff --git a/crates/cli/src/analyze.rs b/crates/cli/src/analyze.rs index e577b3147..6f83c96c9 100644 --- a/crates/cli/src/analyze.rs +++ b/crates/cli/src/analyze.rs @@ -323,7 +323,7 @@ where #[cfg(feature = "grit_tracing")] task_span.set_parent(grouped_ctx); task_span.in_scope(|| { - compiled.execute_paths_streaming(&found_paths, context, tx, cache_ref); + compiled.execute_paths_streaming(found_paths, context, tx, cache_ref); loop { if processed.load(Ordering::SeqCst) >= found_count.try_into().unwrap() || !should_continue.load(Ordering::SeqCst) diff --git a/crates/cli/src/commands/check.rs b/crates/cli/src/commands/check.rs index 64e65e178..03d55047f 100644 --- a/crates/cli/src/commands/check.rs +++ b/crates/cli/src/commands/check.rs @@ -194,7 +194,7 @@ pub(crate) async fn run_check( !cache.has_no_matches(hash, pattern.hash) }) .collect(); - let (result, no_match) = pattern.execute_paths(&un_cached_input_files, &context); + let (result, no_match) = pattern.execute_paths(un_cached_input_files, &context); if !no_match.is_empty() { for path in no_match.into_iter() { let hash = path.hash.unwrap(); diff --git a/crates/cli/src/community.rs b/crates/cli/src/community.rs index 99904ca77..7d3106de9 100644 --- a/crates/cli/src/community.rs +++ b/crates/cli/src/community.rs @@ -2,7 +2,6 @@ use anyhow::Result; use grit_util::{FileRange, Position, RangeWithoutByte}; use serde::Deserialize; - use std::path::PathBuf; #[derive(Debug, Deserialize)] diff --git a/crates/cli_bin/tests/apply.rs b/crates/cli_bin/tests/apply.rs index 901cbc2d3..81a1172bd 100644 --- a/crates/cli_bin/tests/apply.rs +++ b/crates/cli_bin/tests/apply.rs @@ -783,6 +783,8 @@ fn test_absolute_path() -> Result<()> { let file = dir.join("dir2/unique.js"); let content = std::fs::read_to_string(file)?; + println!("content: {:?}", content); + // Verify it contains dir2/unique.js assert!(content.contains("dir2/unique.js")); diff --git a/crates/core/src/api.rs b/crates/core/src/api.rs index 3d648d444..30ace9249 100644 --- a/crates/core/src/api.rs +++ b/crates/core/src/api.rs @@ -35,6 +35,16 @@ pub enum MatchResult { AnalysisLog(AnalysisLog), } +impl MatchResult { + pub fn is_match(&self) -> bool { + is_match(self) + } + + pub fn is_error(&self) -> bool { + matches!(self, MatchResult::AnalysisLog(log) if log.level < 400) + } +} + /// Make a path look the way provolone expects it to /// Removes leading "./", or the root path if it's provided fn normalize_path_in_project<'a>(path: &'a str, root_path: Option<&'a PathBuf>) -> &'a str { @@ -591,6 +601,19 @@ impl AnalysisLog { source: None, } } + + pub(crate) fn floating_error(message: String) -> Self { + Self { + level: 280, + message, + position: Position::first(), + file: "".to_string(), + engine_id: "marzano".to_string(), + range: None, + syntax_tree: None, + source: None, + } + } } impl From for AnalysisLog { diff --git a/crates/core/src/built_in_functions.rs b/crates/core/src/built_in_functions.rs index 3163121f9..981a6c07f 100644 --- a/crates/core/src/built_in_functions.rs +++ b/crates/core/src/built_in_functions.rs @@ -173,6 +173,7 @@ fn resolve_path_fn<'a>( let args = MarzanoResolvedPattern::from_patterns(args, state, context, logs)?; let current_file = get_absolute_file_name(state, context.language())?; + let target_path = match &args[0] { Some(resolved_pattern) => resolved_pattern.text(&state.files, context.language())?, None => return Err(anyhow!("No path argument provided for resolve function")), diff --git a/crates/core/src/lib.rs b/crates/core/src/lib.rs index 8b2d7a1bd..195b0e0c9 100644 --- a/crates/core/src/lib.rs +++ b/crates/core/src/lib.rs @@ -11,6 +11,7 @@ mod equivalence; mod foreign_function_definition; pub mod fs; mod inline_snippets; +mod limits; pub mod marzano_binding; pub mod marzano_code_snippet; pub mod marzano_context; @@ -35,3 +36,5 @@ use getrandom as _; #[cfg(test)] mod test; +#[cfg(test)] +mod test_files; diff --git a/crates/core/src/limits.rs b/crates/core/src/limits.rs new file mode 100644 index 000000000..deb7b351d --- /dev/null +++ b/crates/core/src/limits.rs @@ -0,0 +1,22 @@ +use grit_util::{AnalysisLog, Position}; +use marzano_util::rich_path::RichFile; + +use crate::constants::MAX_FILE_SIZE; + +pub(crate) fn is_file_too_big(file: &RichFile) -> Option { + if file.path.len() > MAX_FILE_SIZE || file.content.len() > MAX_FILE_SIZE { + Some(AnalysisLog { + // TODO: standardize levels + level: Some(310), + message: format!("Skipped {}, it is too big.", file.path), + file: Some(file.path.to_owned().into()), + engine_id: Some("marzano".to_owned()), + position: Some(Position::first()), + syntax_tree: None, + range: None, + source: None, + }) + } else { + None + } +} diff --git a/crates/core/src/marzano_context.rs b/crates/core/src/marzano_context.rs index 18de2f18b..39771e730 100644 --- a/crates/core/src/marzano_context.rs +++ b/crates/core/src/marzano_context.rs @@ -2,7 +2,8 @@ use crate::{ built_in_functions::BuiltIns, clean::{get_replacement_ranges, replace_cleaned_ranges}, foreign_function_definition::ForeignFunctionDefinition, - marzano_resolved_pattern::MarzanoResolvedPattern, + limits::is_file_too_big, + marzano_resolved_pattern::{MarzanoFile, MarzanoResolvedPattern}, pattern_compiler::file_owner_compiler::FileOwnerCompiler, problem::MarzanoQueryContext, text_unparser::apply_effects, @@ -24,14 +25,18 @@ use marzano_language::{ language::{MarzanoLanguage, Tree}, target_language::TargetLanguage, }; -use marzano_util::runtime::ExecutionContext; -use std::path::PathBuf; +use marzano_util::{ + rich_path::{LoadableFile, RichFile}, + runtime::ExecutionContext, +}; +use std::{borrow::Cow, path::PathBuf}; pub struct MarzanoContext<'a> { pub pattern_definitions: &'a Vec>, pub predicate_definitions: &'a Vec>, pub function_definitions: &'a Vec>, pub foreign_function_definitions: &'a Vec, + lazy_files: Vec>, pub files: &'a FileOwners, pub built_ins: &'a BuiltIns, pub language: &'a TargetLanguage, @@ -46,6 +51,7 @@ impl<'a> MarzanoContext<'a> { predicate_definitions: &'a Vec>, function_definitions: &'a Vec>, foreign_function_definitions: &'a Vec, + lazy_files: Vec>, files: &'a FileOwners, built_ins: &'a BuiltIns, language: &'a TargetLanguage, @@ -57,6 +63,7 @@ impl<'a> MarzanoContext<'a> { predicate_definitions, function_definitions, foreign_function_definitions, + lazy_files, files, built_ins, language, @@ -112,6 +119,48 @@ impl<'a> ExecContext<'a, MarzanoQueryContext> for MarzanoContext<'a> { self.built_ins.call(call, context, state, logs) } + fn load_file( + &self, + file: &MarzanoFile<'a>, + state: &mut State<'a, MarzanoQueryContext>, + logs: &mut AnalysisLogs, + ) -> anyhow::Result { + match file { + MarzanoFile::Resolved(_) => { + // Assume the file is already loaded + } + MarzanoFile::Ptr(ptr) => { + if state.files.is_loaded(ptr) { + return Ok(true); + } + let index = ptr.file; + + let cow: Cow = self.lazy_files[index as usize].try_into_cow()?; + + if let Some(log) = is_file_too_big(&cow) { + logs.push(log); + return Ok(false); + } + + let owned = cow.into_owned(); + + let file = FileOwnerCompiler::from_matches( + owned.path, + owned.content, + None, + false, + self.language, + logs, + )?; + if let Some(file) = file { + self.files.push(file); + state.files.load_file(ptr, self.files.last().unwrap()); + } + } + } + Ok(true) + } + // FIXME: Don't depend on Grit's file handling in context. fn files(&self) -> &FileOwners { self.files @@ -130,7 +179,7 @@ impl<'a> ExecContext<'a, MarzanoQueryContext> for MarzanoContext<'a> { ) -> Result { let mut parser = self.language().get_parser(); - let files = if let Some(files) = binding.get_file_pointers() { + let mut files = if let Some(files) = binding.get_file_pointers() { files .iter() .map(|f| state.files.latest_revision(f)) @@ -142,6 +191,11 @@ impl<'a> ExecContext<'a, MarzanoQueryContext> for MarzanoContext<'a> { let binding = if files.len() == 1 { ResolvedPattern::from_file_pointer(*files.last().unwrap()) } else { + // Load all files into memory and collect successful file pointers + files.retain(|file_ptr| { + self.load_file(&MarzanoFile::Ptr(*file_ptr), state, logs) + .unwrap_or(false) + }); ResolvedPattern::from_files(ResolvedPattern::from_list_parts( files.iter().map(|f| ResolvedPattern::from_file_pointer(*f)), )) @@ -160,7 +214,7 @@ impl<'a> ExecContext<'a, MarzanoQueryContext> for MarzanoContext<'a> { suppressed, }; for file_ptr in files { - let file = state.files.get_file(file_ptr); + let file = state.files.get_file_owner(file_ptr); let mut match_log = file.matches.borrow_mut(); let filename_path = &file.name; diff --git a/crates/core/src/marzano_resolved_pattern.rs b/crates/core/src/marzano_resolved_pattern.rs index be102d04d..c37d513d0 100644 --- a/crates/core/src/marzano_resolved_pattern.rs +++ b/crates/core/src/marzano_resolved_pattern.rs @@ -848,7 +848,7 @@ impl<'a> File<'a, MarzanoQueryContext> for MarzanoFile<'a> { fn name(&self, files: &FileRegistry<'a, MarzanoQueryContext>) -> MarzanoResolvedPattern<'a> { match self { Self::Resolved(resolved) => resolved.name.clone(), - Self::Ptr(ptr) => MarzanoResolvedPattern::from_path_binding(&files.get_file(*ptr).name), + Self::Ptr(ptr) => MarzanoResolvedPattern::from_path_binding(files.get_file_name(*ptr)), } } @@ -866,7 +866,7 @@ impl<'a> File<'a, MarzanoQueryContext> for MarzanoFile<'a> { ))) } Self::Ptr(ptr) => Ok(ResolvedPattern::from_path_binding( - &files.get_file(*ptr).absolute_path, + files.get_absolute_path(*ptr)?, )), } } @@ -875,7 +875,7 @@ impl<'a> File<'a, MarzanoQueryContext> for MarzanoFile<'a> { match self { Self::Resolved(resolved) => resolved.body.clone(), Self::Ptr(ptr) => { - let file = &files.get_file(*ptr); + let file = &files.get_file_owner(*ptr); let root = file.tree.root_node(); let range = root.byte_range(); ResolvedPattern::from_range_binding(range, &file.tree.source) @@ -887,7 +887,7 @@ impl<'a> File<'a, MarzanoQueryContext> for MarzanoFile<'a> { match self { Self::Resolved(resolved) => resolved.body.clone(), Self::Ptr(ptr) => { - let file = &files.get_file(*ptr); + let file = &files.get_file_owner(*ptr); ResolvedPattern::from_node_binding(file.tree.root_node()) } } diff --git a/crates/core/src/problem.rs b/crates/core/src/problem.rs index fef670fae..8cbd26ed2 100644 --- a/crates/core/src/problem.rs +++ b/crates/core/src/problem.rs @@ -2,25 +2,24 @@ use crate::{ api::{is_match, AnalysisLog, DoneFile, MatchResult}, ast_node::{ASTNode, AstLeafNode}, built_in_functions::BuiltIns, - constants::MAX_FILE_SIZE, foreign_function_definition::ForeignFunctionDefinition, marzano_binding::MarzanoBinding, marzano_code_snippet::MarzanoCodeSnippet, marzano_context::MarzanoContext, marzano_resolved_pattern::{MarzanoFile, MarzanoResolvedPattern}, - pattern_compiler::{compiler::VariableLocations, file_owner_compiler::FileOwnerCompiler}, + pattern_compiler::compiler::VariableLocations, }; -use anyhow::{bail, Result}; +use anyhow::Result; use grit_pattern_matcher::{ constants::{GLOBAL_VARS_SCOPE_INDEX, NEW_FILES_INDEX}, context::QueryContext, - file_owners::{FileOwner, FileOwners}, + file_owners::FileOwners, pattern::{ - FilePtr, GritFunctionDefinition, Matcher, Pattern, PatternDefinition, PredicateDefinition, - ResolvedPattern, State, VariableContent, + FilePtr, FileRegistry, GritFunctionDefinition, Matcher, Pattern, PatternDefinition, + PredicateDefinition, ResolvedPattern, State, VariableContent, }, }; -use grit_util::{Position, VariableMatch}; +use grit_util::VariableMatch; use im::vector; use log::error; use marzano_language::{language::Tree, target_language::TargetLanguage}; @@ -28,17 +27,19 @@ use marzano_util::{ cache::{GritCache, NullCache}, hasher::hash, node_with_source::NodeWithSource, - rich_path::{FileName, RichFile, RichPath, TryIntoInputFile}, + rich_path::{LoadableFile, RichFile, RichPath}, runtime::ExecutionContext, }; -use rayon::iter::{IntoParallelRefIterator, ParallelIterator}; +use rayon::iter::IntoParallelIterator; +use rayon::iter::ParallelIterator; use sha2::{Digest, Sha256}; -use std::fmt::Debug; + use std::{ - borrow::Cow, + collections::HashMap, path::PathBuf, sync::mpsc::{self, Sender}, }; +use std::{fmt::Debug, str::FromStr}; use tracing::{event, Level}; #[derive(Debug)] @@ -92,13 +93,6 @@ impl From for MarzanoResolvedPattern<'_> { } } -struct FilePatternOutput { - file_pattern: Option, - file_owners: FileOwners, - done_files: Vec, - error_files: Vec, -} - fn send(tx: &Sender>, value: Vec) { if let Err(err) = tx.send(value) { error!("Failed to emit execution result: {}", err); @@ -153,135 +147,116 @@ impl Problem { } } - fn build_resolved_pattern( + fn build_and_execute_resolved_pattern( &self, - files: &[impl TryIntoInputFile + FileName], + tx: &Sender>, + files: Vec, + context: &ExecutionContext, cache: &impl GritCache, - ) -> Result { + ) { let owned_files = FileOwners::new(); - let mut results = vec![]; - let mut file_pointers = vec![]; - let mut done_files = vec![]; if !self.is_multifile && files.len() != 1 { - bail!("Cannot build resolved pattern for single file pattern with more than one file") + let results = vec![MatchResult::AnalysisLog(AnalysisLog::floating_error( + "Cannot build resolved pattern for single file pattern with more than one file" + .to_string(), + ))]; + send(tx, results); } - for file in files { - let file: Cow = match file.try_into_cow() { - Result::Ok(file) => file, - Result::Err(err) => { - results.push(MatchResult::AnalysisLog(AnalysisLog::new_error( - err.to_string(), - &file.name(), - ))); - continue; - } - }; - if let Some(log) = is_file_too_big(&file) { - results.push(MatchResult::AnalysisLog(log)); - results.push(MatchResult::DoneFile(DoneFile { - relative_file_path: file.path.to_string(), - // Don't know if there are results, so we can't cache - ..Default::default() - })) - } else { - let file_hash = hash(&file.path); - if cache.has_no_matches(file_hash, self.hash) { - results.push(MatchResult::DoneFile(DoneFile { - relative_file_path: file.path.to_string(), + let mut file_pointers: Vec = Vec::new(); + + let mut done_files: HashMap = HashMap::new(); + + for (index, file) in files.iter().enumerate() { + let path = file.name(); + let file_hash = hash(&path); + if cache.has_no_matches(file_hash, self.hash) { + done_files.insert( + path.clone(), + DoneFile { + relative_file_path: path, has_results: Some(false), file_hash: Some(file_hash), from_cache: true, - })); - } else { - let mut logs = vec![].into(); - let owned_file = FileOwnerCompiler::from_matches( - file.path.to_owned(), - file.content.to_owned(), - None, - false, - &self.language, - &mut logs, - ); - results.extend( - logs.logs() - .into_iter() - .map(|l| MatchResult::AnalysisLog(l.into())), - ); - match owned_file { - Result::Ok(owned_file) => { - if let Some(owned_file) = owned_file { - file_pointers.push(FilePtr::new(file_pointers.len() as u16, 0)); - owned_files.push(owned_file); - } - done_files.push(MatchResult::DoneFile(DoneFile { - relative_file_path: file.path.to_string(), - has_results: None, - file_hash: Some(file_hash), - from_cache: false, - })) - } - Result::Err(err) => { - results.push(MatchResult::AnalysisLog(AnalysisLog::new_error( - err.to_string(), - &file.path, - ))); - results.push(MatchResult::DoneFile(DoneFile { - relative_file_path: file.path.to_string(), - ..Default::default() - })) - } - } - } + }, + ); + } else { + done_files.insert( + path.clone(), + DoneFile { + relative_file_path: path, + file_hash: Some(file_hash), + ..Default::default() + }, + ); + file_pointers.push(FilePtr::new(index as u16, 0)); } } - let binding = if self.is_multifile { + + let binding: FilePattern = if self.is_multifile { file_pointers.into() } else if file_pointers.is_empty() { - // single file pattern had file that was too big - return Ok(FilePatternOutput { - file_pattern: None, - file_owners: owned_files, - done_files, - error_files: results, - }); + // we somehow arrived here with no files, so we return Ok + return; } else { file_pointers[0].into() }; - Ok(FilePatternOutput { - file_pattern: Some(binding), - file_owners: owned_files, - done_files, - error_files: results, - }) + + self.execute_and_send(tx, files, binding, &owned_files, context, done_files); } fn execute_and_send( &self, tx: &Sender>, - files: &[impl TryIntoInputFile + FileName], + files: Vec, binding: FilePattern, owned_files: &FileOwners, context: &ExecutionContext, - mut done_files: Vec, + mut done_files: HashMap, ) { - let mut outputs = match self.execute(binding, owned_files, context) { - Result::Err(err) => files - .iter() - .map(|file| { - MatchResult::AnalysisLog(AnalysisLog::new_error(err.to_string(), &file.name())) - }) - .collect(), - Result::Ok(messages) => messages, - }; - if done_files.len() == 1 { - if let MatchResult::DoneFile(ref mut done_file) = done_files[0] { - let has_results = outputs + let file_names: Vec = files + .iter() + .map(|f| PathBuf::from_str(&f.name()).unwrap()) + .collect(); + let borrowed_names: Vec<&PathBuf> = file_names.iter().collect(); + let lazy_files: Vec> = files + .into_iter() + .map(|file| Box::new(file) as Box) + .collect(); + + let mut outputs = + match self.execute(binding, lazy_files, borrowed_names, owned_files, context) { + Result::Err(err) => file_names .iter() - .any(|m| is_match(m) || matches!(m, MatchResult::AnalysisLog(_))); - done_file.has_results = Some(has_results); + .map(|file| { + MatchResult::AnalysisLog(AnalysisLog::new_error( + err.to_string(), + &file.to_string_lossy(), + )) + }) + .collect(), + Result::Ok(messages) => { + // For each message, mark the DoneFile as having results + for message in &messages { + if !is_match(message) { + continue; + } + if let Some(name) = message.file_name() { + if let Ok(path) = PathBuf::from_str(name) { + if let Some(done_file) = + done_files.get_mut(path.to_string_lossy().as_ref()) + { + done_file.has_results = Some(true); + } + } + } + } + + messages + } }; - } - outputs.extend(done_files); + + outputs.extend(done_files.into_values().map(MatchResult::DoneFile)); + if self.is_multifile { // to keep snapshot tests happy, not ideal; outputs.sort(); @@ -289,53 +264,9 @@ impl Problem { send(tx, outputs); } - fn build_and_execute_resolved_pattern( - &self, - tx: &Sender>, - files: &[impl TryIntoInputFile + FileName], - context: &ExecutionContext, - cache: &impl GritCache, - ) { - match self.build_resolved_pattern(files, cache) { - Result::Ok(FilePatternOutput { - file_pattern, - file_owners, - done_files, - error_files, - }) => { - send(tx, error_files); - if let Some(file_pattern) = file_pattern { - self.execute_and_send( - tx, - files, - file_pattern, - &file_owners, - context, - done_files, - ); - } - } - Result::Err(err) => { - // might be sending too many donefile here? - let mut error_files = vec![]; - for file in files { - error_files.push(MatchResult::AnalysisLog(AnalysisLog::new_error( - err.to_string(), - &file.name(), - ))); - error_files.push(MatchResult::DoneFile(DoneFile { - relative_file_path: file.name().to_string(), - ..Default::default() - })) - } - send(tx, error_files); - } - } - } - pub fn execute_files( &self, - files: &[RichFile], + files: Vec, context: &ExecutionContext, ) -> Vec { let mut results = vec![]; @@ -351,7 +282,7 @@ impl Problem { pub fn execute_paths<'a>( &self, - files: &[&'a RichPath], + files: Vec<&'a RichPath>, context: &ExecutionContext, ) -> (Vec, Vec<&'a RichPath>) { let mut results = vec![]; @@ -386,7 +317,8 @@ impl Problem { pub fn execute_file(&self, file: &RichFile, context: &ExecutionContext) -> Vec { let mut results = vec![]; let (tx, rx) = mpsc::channel::>(); - self.execute_shared(std::array::from_ref(file), context, tx, &NullCache::new()); + let files = vec![file]; + self.execute_shared(files, context, tx, &NullCache::new()); for r in rx.iter() { results.extend(r) } @@ -394,18 +326,9 @@ impl Problem { results } - pub fn execute_files_streaming( - &self, - files: &[RichFile], - context: &ExecutionContext, - tx: Sender>, - ) { - self.execute_shared(files, context, tx, &NullCache::new()) - } - pub fn execute_paths_streaming( &self, - files: &[PathBuf], + files: Vec, context: &ExecutionContext, tx: Sender>, cache: &impl GritCache, @@ -414,9 +337,9 @@ impl Problem { } #[cfg_attr(feature = "grit_tracing", instrument(skip_all))] - fn execute_shared( + pub(crate) fn execute_shared( &self, - files: &[impl TryIntoInputFile + FileName + Send + Sync], + files: Vec, context: &ExecutionContext, tx: Sender>, cache: &impl GritCache, @@ -441,32 +364,33 @@ impl Problem { event!(Level::INFO, "spawn execute_shared_body"); - files.par_iter().for_each_with(tx, |sender, f| { - self.build_and_execute_resolved_pattern( - sender, - std::array::from_ref(f), - context, - cache, - ); + files.into_par_iter().for_each_with(tx, |sender, f| { + let vec = vec![f]; + self.build_and_execute_resolved_pattern(sender, vec, context, cache); }); }) }) } } - fn execute( + fn execute<'a>( &self, binding: FilePattern, + files: Vec>, + file_names: Vec<&PathBuf>, owned_files: &FileOwners, context: &ExecutionContext, ) -> Result> { let mut user_logs = vec![].into(); + let lazy_files = files; + let context = MarzanoContext::new( &self.pattern_definitions, &self.predicate_definitions, &self.function_definitions, &self.foreign_function_definitions, + lazy_files, owned_files, &self.built_ins, &self.language, @@ -486,8 +410,8 @@ impl Problem { }) .collect(); - let file_refs: Vec<&FileOwner> = context.files.iter().collect(); - let mut state = State::new(bindings, file_refs); + let file_registry = FileRegistry::new_from_paths(file_names); + let mut state = State::new(bindings, file_registry); let the_new_files = state.bindings[GLOBAL_VARS_SCOPE_INDEX].back_mut().unwrap()[NEW_FILES_INDEX].as_mut(); @@ -516,24 +440,6 @@ impl Problem { } } -fn is_file_too_big(file: &RichFile) -> Option { - if file.path.len() > MAX_FILE_SIZE || file.content.len() > MAX_FILE_SIZE { - Some(AnalysisLog { - // TODO: standardize levels - level: 310, - message: format!("Skipped {}, it is too big.", file.path), - file: file.path.to_owned(), - engine_id: "marzano".to_owned(), - position: Position::first(), - syntax_tree: None, - range: None, - source: None, - }) - } else { - None - } -} - #[derive(Clone, Debug, PartialEq)] pub struct MarzanoQueryContext; diff --git a/crates/core/src/snapshots/marzano_core__test__filename.snap b/crates/core/src/snapshots/marzano_core__test__filename.snap index 879b6dbdc..c2dbf7354 100644 --- a/crates/core/src/snapshots/marzano_core__test__filename.snap +++ b/crates/core/src/snapshots/marzano_core__test__filename.snap @@ -11,6 +11,15 @@ expression: results ranges: [] - name: $program scopedName: 0_1_$program + ranges: [] + - name: $filename + scopedName: 0_2_$filename + ranges: [] + - name: $absolute_filename + scopedName: 0_3_$absolute_filename + ranges: [] + - name: $b + scopedName: 1_0_$b ranges: - start: line: 1 @@ -20,12 +29,6 @@ expression: results column: 1 startByte: 0 endByte: 10 - - name: $filename - scopedName: 0_2_$filename - ranges: [] - - name: $absolute_filename - scopedName: 0_3_$absolute_filename - ranges: [] sourceFile: test-file.tsx ranges: [] debug: "{\n \"SORT\": \"program\",\n \"RANGE\": {\n \"start\": {\n \"line\": 1,\n \"column\": 1\n },\n \"end\": {\n \"line\": 2,\n \"column\": 1\n }\n },\n \"statements\": [\n {\n \"SORT\": \"expression_statement\",\n \"RANGE\": {\n \"start\": {\n \"line\": 1,\n \"column\": 1\n },\n \"end\": {\n \"line\": 1,\n \"column\": 10\n }\n },\n \"expression\": {\n \"SORT\": \"identifier\",\n \"RANGE\": {\n \"start\": {\n \"line\": 1,\n \"column\": 1\n },\n \"end\": {\n \"line\": 1,\n \"column\": 9\n }\n },\n \"TEXT\": \"whatever\"\n },\n \"CHILDREN\": [\n {\n \"SORT\": \";\",\n \"RANGE\": {\n \"start\": {\n \"line\": 1,\n \"column\": 9\n },\n \"end\": {\n \"line\": 1,\n \"column\": 10\n }\n },\n \"TEXT\": \";\"\n }\n ]\n }\n ]\n}" @@ -33,8 +36,10 @@ expression: results messages: [] variables: [] sourceFile: the_new_name - content: "whatever;\n" - byteRanges: [] + content: "test-file.tsx;\n" + byteRanges: + - start: 0 + end: 13 ansiSummary: "" reason: ~ - __typename: DoneFile diff --git a/crates/core/src/test.rs b/crates/core/src/test.rs index eb7780321..f8d5ed04f 100644 --- a/crates/core/src/test.rs +++ b/crates/core/src/test.rs @@ -63,7 +63,7 @@ fn match_pattern_one_file( match_pattern_libs(pattern, &libs, file, src, default_language) } -fn create_test_context() -> Result { +pub(crate) fn create_test_context() -> Result { let context = ExecutionContext::default(); // Exchange client tokens for a test token @@ -82,7 +82,7 @@ fn create_test_context() -> Result { } lazy_static! { - static ref TEST_EXECUTION_CONTEXT: Result = create_test_context(); + pub(crate) static ref TEST_EXECUTION_CONTEXT: Result = create_test_context(); } #[allow(clippy::wildcard_enum_match_arm)] @@ -4807,7 +4807,7 @@ multifile { .unwrap(); let context = ExecutionContext::default(); let results = pattern.execute_files( - &[ + vec![ RichFile::new( "~/dev/rewriter/packages/sdk/src/stdlib/index.ts".to_string(), content1, @@ -4829,10 +4829,13 @@ fn test_filename() { |language js | |pattern foo() { - | $_ where $filename => `the_new_name` + | $b where { + | $b <: contains `whatever` => `$filename`, + | $filename => `the_new_name`, + | } |} | - |file(name = $filename, body = $program) where $program <: foo() + |file(body = foo()) |"# .trim_margin() .unwrap(); @@ -8321,7 +8324,7 @@ multifile { let context = ExecutionContext::default(); let pattern = src_to_problem(pattern.to_owned(), js_lang).unwrap(); let results = pattern.execute_files( - &[ + vec![ RichFile::new( "file1.tsx".to_string(), "foo(1)\nbar(1)\nbar(2)\nbaz(1)".to_string(), @@ -8351,7 +8354,7 @@ multifile { let context = ExecutionContext::default(); let pattern = src_to_problem(pattern.to_owned(), js_lang).unwrap(); let results = pattern.execute_files( - &[ + vec![ RichFile::new("file1.tsx".to_string(), "foo(1)".to_string()), RichFile::new("file2.tsx".to_string(), "bar(1)\nbar(3)".to_string()), ], @@ -14588,7 +14591,7 @@ fn ruby_nested_module() { | module ^foo_child | end |end` where { - | ^foo_child => `Child` + | ^foo_child => `Child` |} |"# .trim_margin() diff --git a/crates/core/src/test_files.rs b/crates/core/src/test_files.rs new file mode 100644 index 000000000..58619740d --- /dev/null +++ b/crates/core/src/test_files.rs @@ -0,0 +1,407 @@ +use marzano_language::target_language::TargetLanguage; +use marzano_util::{ + cache::NullCache, + rich_path::{FileName, RichFile, TryIntoInputFile}, + runtime::ExecutionContext, +}; +use serde::{Deserialize, Serialize}; + +use crate::api::{MatchResult, Rewrite}; + +use self::{pattern_compiler::src_to_problem_libs, problem::Problem}; +use anyhow::Result; + +use super::*; +use std::{borrow::Cow, collections::BTreeMap, sync::mpsc}; + +/// SyntheticFile is used for ensuring we don't read files until their file names match +#[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq)] +struct SyntheticFile { + pub path: String, + pub content: String, + pub can_read: bool, +} + +impl SyntheticFile { + pub fn new(path: String, content: String, can_read: bool) -> Self { + Self { + path, + content, + can_read, + } + } +} + +impl TryIntoInputFile for SyntheticFile { + fn try_into_cow(&self) -> Result> { + if !self.can_read { + println!("Tried to read file that should not be read: {}", self.path); + } + + Ok(Cow::Owned(RichFile::new( + self.path.clone(), + self.content.clone(), + ))) + } +} + +impl FileName for SyntheticFile { + fn name(&self) -> String { + self.path.to_owned() + } +} + +fn run_on_test_files(problem: &Problem, test_files: &[SyntheticFile]) -> Vec { + let mut results = vec![]; + let context = ExecutionContext::default(); + let (tx, rx) = mpsc::channel::>(); + problem.execute_shared(test_files.to_vec(), &context, tx, &NullCache::new()); + for r in rx.iter() { + results.extend(r) + } + results +} + +#[test] +fn test_lazy_file_parsing() { + let pattern_src = r#" + file(name=includes "target.js", body=contains bubble `$x` where { + $x <: contains `console.log($_)` + }) + "#; + let libs = BTreeMap::new(); + + let matching_src = r#" + console.log("Hello, world!"); + "#; + + let pattern = src_to_problem_libs( + pattern_src.to_string(), + &libs, + TargetLanguage::default(), + None, + None, + None, + None, + ) + .unwrap() + .problem; + + // Basic match works + let test_files = vec![SyntheticFile::new( + "target.js".to_owned(), + matching_src.to_owned(), + true, + )]; + let results = run_on_test_files(&pattern, &test_files); + assert!(results.iter().any(|r| r.is_match())); + + // Non-match match works + let test_files = vec![SyntheticFile::new( + "worng.js".to_owned(), + matching_src.to_owned(), + true, + )]; + let results = run_on_test_files(&pattern, &test_files); + assert!(!results.iter().any(|r| r.is_match())); + + // Unreadable file should not be read + let test_files = vec![SyntheticFile::new( + "do_not_read.js".to_owned(), + String::new(), + false, + )]; + let results = run_on_test_files(&pattern, &test_files); + assert!(!results.iter().any(|r| r.is_match())); + + // All together now + let test_files = vec![ + SyntheticFile::new("wrong.js".to_owned(), matching_src.to_owned(), true), + SyntheticFile::new("target.js".to_owned(), matching_src.to_owned(), true), + SyntheticFile::new("other.js".to_owned(), matching_src.to_owned(), true), + SyntheticFile::new("do_not_read.js".to_owned(), String::new(), false), + ]; + let results = run_on_test_files(&pattern, &test_files); + // Confirm we have 4 DoneFiles and 1 match + assert_eq!(results.len(), 5); + assert!(results.iter().any(|r| r.is_match())); +} + +#[test] +fn test_lazy_filename_variable() { + let pattern_src = r#" + file(name=includes "target.js", body=contains bubble `$x` where { + $x <: contains `console.log($_)`, + $filename <: includes "target.js", + }) + "#; + let libs = BTreeMap::new(); + + let matching_src = r#" + console.log("Hello, world!"); + "#; + + let pattern = src_to_problem_libs( + pattern_src.to_string(), + &libs, + TargetLanguage::default(), + None, + None, + None, + None, + ) + .unwrap() + .problem; + + // All together now + let test_files = vec![ + SyntheticFile::new("wrong.js".to_owned(), matching_src.to_owned(), true), + SyntheticFile::new("target.js".to_owned(), matching_src.to_owned(), true), + SyntheticFile::new("other.js".to_owned(), matching_src.to_owned(), true), + SyntheticFile::new("do_not_read.js".to_owned(), String::new(), false), + ]; + let results = run_on_test_files(&pattern, &test_files); + // Confirm we have 4 DoneFiles and 1 match + assert_eq!(results.len(), 5); + assert!(results.iter().any(|r| r.is_match())); +} + +#[test] +fn test_absolute_path_resolution() { + let pattern_src = r#" + file(body=contains bubble `console.log($msg)` where { + $resolved = resolve($absolute_filename), + $msg => `Hello, from $resolved` + }) + "#; + let libs = BTreeMap::new(); + + let matching_src = r#" + console.log("Hello, world!"); + "#; + + let pattern = src_to_problem_libs( + pattern_src.to_string(), + &libs, + TargetLanguage::default(), + None, + None, + None, + None, + ) + .unwrap() + .problem; + + // All together now + let test_files = vec![SyntheticFile::new( + "file/dir/target.js".to_owned(), + matching_src.to_owned(), + true, + )]; + let results = run_on_test_files(&pattern, &test_files); + assert!(!results.iter().any(|r| r.is_error())); + let mut has_rewrite = false; + for r in results.iter() { + if let MatchResult::Rewrite(Rewrite { rewritten, .. }) = r { + let content = &rewritten.content; + assert!(content.contains("core/file/dir/target.js")); + has_rewrite = true; + } + } + assert!(has_rewrite); +} + +#[test] +fn test_lazy_program_variable() { + let pattern_src = r#" + file(name=includes "target.js", body=contains bubble `$x` where { + $x <: contains `console.log($_)`, + $filename <: includes "target.js", + $program <: includes `console` + }) + "#; + let libs = BTreeMap::new(); + + let matching_src = r#" + console.log("Hello, world!"); + "#; + + let pattern = src_to_problem_libs( + pattern_src.to_string(), + &libs, + TargetLanguage::default(), + None, + None, + None, + None, + ) + .unwrap() + .problem; + + // All together now + let test_files = vec![ + SyntheticFile::new("wrong.js".to_owned(), matching_src.to_owned(), true), + SyntheticFile::new("target.js".to_owned(), matching_src.to_owned(), true), + SyntheticFile::new("other.js".to_owned(), matching_src.to_owned(), true), + SyntheticFile::new("do_not_read.js".to_owned(), String::new(), false), + ]; + let results = run_on_test_files(&pattern, &test_files); + // Confirm we have 4 DoneFiles and 1 match + assert_eq!(results.len(), 5); + assert!(results.iter().any(|r| r.is_match())); +} + +#[test] +fn test_pattern_contains() { + let pattern_src = r#" + pattern main_thing() { + `console.log` where { $filename <: includes "target.js" } + } + contains main_thing() + "#; + let libs = BTreeMap::new(); + + let matching_src = r#" + console.log("Hello, world!"); + "#; + + let pattern = src_to_problem_libs( + pattern_src.to_string(), + &libs, + TargetLanguage::default(), + None, + None, + None, + None, + ) + .unwrap() + .problem; + + // All together now + let test_files = vec![ + SyntheticFile::new("wrong.js".to_owned(), matching_src.to_owned(), true), + SyntheticFile::new("target.js".to_owned(), matching_src.to_owned(), true), + ]; + let results = run_on_test_files(&pattern, &test_files); + assert_eq!(results.len(), 3); + assert!(results.iter().any(|r| r.is_match())); +} + +#[test] +fn test_sequential_contains() { + let pattern_src = r#" + pattern main_thing() { + `console.log` where { $filename <: includes "target.js" } + } + sequential { + contains main_thing() + } + "#; + let libs = BTreeMap::new(); + + let matching_src = r#" + console.log("Hello, world!"); + "#; + + let pattern = src_to_problem_libs( + pattern_src.to_string(), + &libs, + TargetLanguage::default(), + None, + None, + None, + None, + ) + .unwrap() + .problem; + + // All together now + let test_files = vec![ + SyntheticFile::new("wrong.js".to_owned(), matching_src.to_owned(), true), + SyntheticFile::new("target.js".to_owned(), matching_src.to_owned(), true), + ]; + let results = run_on_test_files(&pattern, &test_files); + assert_eq!(results.len(), 3); + assert!(results.iter().any(|r| r.is_match())); +} + +#[test] +fn test_sequential_contains_with_program() { + let pattern_src = r#" + pattern main_thing() { + `console.log` as $lg where { + $filename <: includes "target.js", + $program <: contains `log` + } + } + sequential { + contains main_thing() + } + "#; + let libs = BTreeMap::new(); + + let matching_src = r#" + console.log("Hello, world!"); + "#; + + let pattern = src_to_problem_libs( + pattern_src.to_string(), + &libs, + TargetLanguage::default(), + None, + None, + None, + None, + ) + .unwrap() + .problem; + + // All together now + let test_files = vec![ + SyntheticFile::new("wrong.js".to_owned(), matching_src.to_owned(), true), + SyntheticFile::new("target.js".to_owned(), matching_src.to_owned(), true), + ]; + let results = run_on_test_files(&pattern, &test_files); + assert_eq!(results.len(), 3); + assert!(results.iter().any(|r| r.is_match())); +} + +#[test] +fn test_multifile_mania() { + let pattern_src = r#" + pattern main_thing() { + `console.log` where { $filename <: includes "target.js" } + } + multifile { + contains main_thing(), + contains `log` where { $filename <: includes "target.js" } + } + "#; + let libs = BTreeMap::new(); + + let matching_src = r#" + console.log("Hello, world!"); + "#; + + let pattern = src_to_problem_libs( + pattern_src.to_string(), + &libs, + TargetLanguage::default(), + None, + None, + None, + None, + ) + .unwrap() + .problem; + + // All together now + let test_files = vec![ + SyntheticFile::new("wrong.js".to_owned(), matching_src.to_owned(), true), + SyntheticFile::new("target.js".to_owned(), matching_src.to_owned(), true), + ]; + let results = run_on_test_files(&pattern, &test_files); + assert!(results.iter().any(|r| r.is_match())); + // Make sure no errors + assert!(!results.iter().any(|r| r.is_error())); +} diff --git a/crates/grit-pattern-matcher/src/context.rs b/crates/grit-pattern-matcher/src/context.rs index 9197b18be..5e40b0062 100644 --- a/crates/grit-pattern-matcher/src/context.rs +++ b/crates/grit-pattern-matcher/src/context.rs @@ -41,6 +41,17 @@ pub trait ExecContext<'a, Q: QueryContext> { logs: &mut AnalysisLogs, ) -> Result>; + /// Call this when "entering" a file to lazily load it. + /// This MUST be implemented correctly, or the query engine will not work. + /// + // TODO: ideally this should be async, but that requires engine-wide async support. + fn load_file( + &self, + file: &Q::File<'a>, + state: &mut State<'a, Q>, + logs: &mut AnalysisLogs, + ) -> Result; + // FIXME: Don't depend on Grit's file handling in Context. fn files(&self) -> &FileOwners; diff --git a/crates/grit-pattern-matcher/src/pattern/contains.rs b/crates/grit-pattern-matcher/src/pattern/contains.rs index b40e5e5bc..0d0d62aef 100644 --- a/crates/grit-pattern-matcher/src/pattern/contains.rs +++ b/crates/grit-pattern-matcher/src/pattern/contains.rs @@ -3,7 +3,11 @@ use super::{ resolved_pattern::{LazyBuiltIn, ResolvedPattern, ResolvedSnippet}, State, }; -use crate::{binding::Binding, context::QueryContext, pattern::resolved_pattern::File}; +use crate::{ + binding::Binding, constants::PROGRAM_INDEX, context::QueryContext, + pattern::resolved_pattern::File, +}; +use crate::{constants::GLOBAL_VARS_SCOPE_INDEX, context::ExecContext}; use anyhow::Result; use core::fmt::Debug; use grit_util::AnalysisLogs; @@ -144,9 +148,20 @@ impl Matcher for Contains { *init_state = cur_state; Ok(true) } else if let Some(file) = resolved_pattern.get_file() { + // Load the file in, if it wasn't already + if !context.load_file(file, init_state, logs)? { + return Ok(false); + } + + init_state.bindings[GLOBAL_VARS_SCOPE_INDEX] + .back_mut() + .unwrap()[PROGRAM_INDEX] + .value = Some(file.binding(&init_state.files)); + let mut cur_state = init_state.clone(); let mut did_match = false; let prev_state = cur_state.clone(); + if self .contains .execute(resolved_pattern, &mut cur_state, context, logs)? @@ -155,6 +170,7 @@ impl Matcher for Contains { } else { cur_state = prev_state; } + let prev_state = cur_state.clone(); if self .contains @@ -164,6 +180,7 @@ impl Matcher for Contains { } else { cur_state = prev_state; } + let prev_state = cur_state.clone(); if self.execute( &file.binding(&cur_state.files), diff --git a/crates/grit-pattern-matcher/src/pattern/file_pattern.rs b/crates/grit-pattern-matcher/src/pattern/file_pattern.rs index c492bb554..42fac4936 100644 --- a/crates/grit-pattern-matcher/src/pattern/file_pattern.rs +++ b/crates/grit-pattern-matcher/src/pattern/file_pattern.rs @@ -3,6 +3,10 @@ use super::{ resolved_pattern::ResolvedPattern, state::State, }; +use crate::{ + constants::{ABSOLUTE_PATH_INDEX, FILENAME_INDEX, GLOBAL_VARS_SCOPE_INDEX, PROGRAM_INDEX}, + context::ExecContext, +}; use crate::{context::QueryContext, pattern::resolved_pattern::File}; use anyhow::Result; use grit_util::AnalysisLogs; @@ -31,12 +35,30 @@ impl Matcher for FilePattern { return Ok(false); }; - if !self - .name - .execute(&file.name(&state.files), state, context, logs)? - { + let name = file.name(&state.files); + + if !self.name.execute(&name, state, context, logs)? { + return Ok(false); + } + + // If the file isn't loaded yet, we must load it now + if !context.load_file(file, state, logs)? { + // The file wasn't loaded, so we can't match the body return Ok(false); } + + // Re-execute the name pattern to bind the name variable + self.name + .execute(&file.name(&state.files), state, context, logs)?; + + // Fill in the variables now - this is a bit of a hack + state.bindings[GLOBAL_VARS_SCOPE_INDEX].back_mut().unwrap()[PROGRAM_INDEX].value = + Some(file.binding(&state.files)); + state.bindings[GLOBAL_VARS_SCOPE_INDEX].back_mut().unwrap()[FILENAME_INDEX].value = + Some(file.name(&state.files)); + state.bindings[GLOBAL_VARS_SCOPE_INDEX].back_mut().unwrap()[ABSOLUTE_PATH_INDEX].value = + Some(file.absolute_path(&state.files, context.language())?); + if !self .body .execute(&file.binding(&state.files), state, context, logs)? diff --git a/crates/grit-pattern-matcher/src/pattern/patterns.rs b/crates/grit-pattern-matcher/src/pattern/patterns.rs index a33581682..fa3fcf295 100644 --- a/crates/grit-pattern-matcher/src/pattern/patterns.rs +++ b/crates/grit-pattern-matcher/src/pattern/patterns.rs @@ -48,7 +48,7 @@ use super::{ State, }; use crate::{ - constants::{ABSOLUTE_PATH_INDEX, FILENAME_INDEX, GLOBAL_VARS_SCOPE_INDEX, PROGRAM_INDEX}, + constants::{FILENAME_INDEX, GLOBAL_VARS_SCOPE_INDEX}, context::{ExecContext, QueryContext}, pattern::resolved_pattern::File, }; @@ -233,10 +233,6 @@ impl Matcher for Pattern { if let Some(file) = binding.get_file() { state.bindings[GLOBAL_VARS_SCOPE_INDEX].back_mut().unwrap()[FILENAME_INDEX].value = Some(file.name(&state.files)); - state.bindings[GLOBAL_VARS_SCOPE_INDEX].back_mut().unwrap()[ABSOLUTE_PATH_INDEX] - .value = Some(file.absolute_path(&state.files, context.language())?); - state.bindings[GLOBAL_VARS_SCOPE_INDEX].back_mut().unwrap()[PROGRAM_INDEX].value = - Some(file.binding(&state.files)); } match self { diff --git a/crates/grit-pattern-matcher/src/pattern/state.rs b/crates/grit-pattern-matcher/src/pattern/state.rs index 6572bdd14..8357e1209 100644 --- a/crates/grit-pattern-matcher/src/pattern/state.rs +++ b/crates/grit-pattern-matcher/src/pattern/state.rs @@ -12,8 +12,8 @@ use anyhow::{anyhow, bail, Result}; use grit_util::{AnalysisLogs, CodeRange, Range, VariableMatch}; use im::{vector, Vector}; use rand::SeedableRng; -use std::collections::HashMap; use std::ops::Range as StdRange; +use std::{collections::HashMap, path::PathBuf}; #[derive(Debug, Clone)] pub struct EffectRange<'a, Q: QueryContext> { @@ -28,38 +28,101 @@ impl Interval for EffectRange<'_, Q> { } #[derive(Clone, Debug)] -pub struct FileRegistry<'a, Q: QueryContext>(Vector>>); +pub struct FileRegistry<'a, Q: QueryContext> { + /// The number of versions for each file + version_count: Vec, + /// Original file paths, for lazy loading + file_paths: Vec<&'a PathBuf>, + /// The actual FileOwner, which has the full file available + owners: Vector>>, +} impl<'a, Q: QueryContext> FileRegistry<'a, Q> { - pub fn get_file(&self, pointer: FilePtr) -> &'a FileOwner { - self.0[pointer.file as usize][pointer.version as usize] + pub fn get_file_owner(&self, pointer: FilePtr) -> &'a FileOwner { + self.owners[pointer.file as usize][pointer.version as usize] + } + + pub fn get_file_name(&self, pointer: FilePtr) -> &'a PathBuf { + let file_index = pointer.file as usize; + let version_index = pointer.version as usize; + if let Some(owners) = self.owners.get(file_index) { + if let Some(owner) = owners.get(version_index) { + return &owner.name; + } + } + self.file_paths + .get(file_index) + .expect("File path should exist for given file index.") + } + + pub fn get_absolute_path(&self, pointer: FilePtr) -> Result<&'a PathBuf> { + let file_index = pointer.file as usize; + let version_index = pointer.version as usize; + if let Some(owners) = self.owners.get(file_index) { + if let Some(owner) = owners.get(version_index) { + return Ok(&owner.absolute_path); + } + } + Err(anyhow!( + "Absolute file path accessed before file was loaded." + )) + } + + /// If only the paths are available, create a FileRegistry with empty owners + /// This is *unsafe* if you do not later insert the appropriate owners before get_file_owner is called + pub fn new_from_paths(file_paths: Vec<&'a PathBuf>) -> Self { + Self { + version_count: file_paths.iter().map(|_| 0).collect(), + owners: file_paths.iter().map(|_| vector![]).collect(), + file_paths, + } + } + + /// Confirms a file is already fully loaded + pub fn is_loaded(&self, pointer: &FilePtr) -> bool { + self.version_count + .get(pointer.file as usize) + .map_or(false, |&v| v > 0) } - pub fn new(files: Vector>>) -> Self { - Self(files) + /// Load a file in + pub fn load_file(&mut self, pointer: &FilePtr, file: &'a FileOwner) { + self.push_revision(pointer, file) } - // assumes at least one revision exists + /// Returns the latest revision of a given filepointer + /// If none exists, returns the file pointer itself pub fn latest_revision(&self, pointer: &FilePtr) -> FilePtr { - let latest = self.0[pointer.file as usize].len() - 1; - FilePtr { - file: pointer.file, - version: latest as u16, + match self.version_count.get(pointer.file as usize) { + Some(&version_count) => { + if version_count == 0 { + *pointer + } else { + FilePtr { + file: pointer.file, + version: version_count - 1, + } + } + } + None => *pointer, } } pub fn files(&self) -> &Vector>> { - &self.0 + &self.owners } pub fn push_revision(&mut self, pointer: &FilePtr, file: &'a FileOwner) { - self.0[pointer.file as usize].push_back(file) + self.version_count[pointer.file as usize] += 1; + self.owners[pointer.file as usize].push_back(file) } pub fn push_new_file(&mut self, file: &'a FileOwner) -> FilePtr { - self.0.push_back(vector![file]); + self.version_count.push(1); + self.file_paths.push(&file.name); + self.owners.push_back(vector![file]); FilePtr { - file: (self.0.len() - 1) as u16, + file: (self.owners.len() - 1) as u16, version: 0, } } @@ -151,12 +214,12 @@ impl FilePtr { } impl<'a, Q: QueryContext> State<'a, Q> { - pub fn new(bindings: VarRegistry<'a, Q>, files: Vec<&'a FileOwner>) -> Self { + pub fn new(bindings: VarRegistry<'a, Q>, registry: FileRegistry<'a, Q>) -> Self { Self { rng: rand::rngs::StdRng::seed_from_u64(32), bindings, effects: vector![], - files: FileRegistry::new(files.into_iter().map(|f| vector![f]).collect()), + files: registry, } } diff --git a/crates/gritmodule/src/testing.rs b/crates/gritmodule/src/testing.rs index e8da5c73b..99d81238c 100644 --- a/crates/gritmodule/src/testing.rs +++ b/crates/gritmodule/src/testing.rs @@ -183,7 +183,8 @@ pub fn test_pattern_sample( .iter() .map(|input| RichFile::new(input.path.to_owned(), input.content.to_owned())) .collect::>(); - let res = compiled.execute_files(&rich_files, &runtime); + let cloned_files = rich_files.clone(); + let res = compiled.execute_files(rich_files, &runtime); for result in res.into_iter() { if is_match(&result) { @@ -274,7 +275,7 @@ pub fn test_pattern_sample( if raw_actual_outputs.len() < raw_expected_outputs.len() && is_multifile_sample(&sample.input, &compiled.language) { - for file in rich_files.iter() { + for file in cloned_files.iter() { if raw_actual_outputs.iter().any(|f| f.path == file.path) { continue; } diff --git a/crates/language/src/ruby.rs b/crates/language/src/ruby.rs index d3bcd7606..ccc9f58de 100644 --- a/crates/language/src/ruby.rs +++ b/crates/language/src/ruby.rs @@ -1,8 +1,8 @@ use crate::language::{fields_for_nodes, Field, MarzanoLanguage, NodeTypes, SortId, TSLanguage}; use grit_util::Language; +use lazy_static::lazy_static; use marzano_util::node_with_source::NodeWithSource; use regex::Regex; -use lazy_static::lazy_static; use std::sync::OnceLock; static NODE_TYPES_STRING: &str = include_str!("../../../resources/node-types/ruby-node-types.json"); @@ -56,10 +56,10 @@ impl NodeTypes for Ruby { lazy_static! { static ref EXACT_VARIABLE_REGEX: Regex = Regex::new(r"^\^([A-Za-z_][A-Za-z0-9_]*)$") .expect("Failed to compile EXACT_VARIABLE_REGEX"); - static ref VARIABLE_REGEX: Regex = Regex::new(r"\^(\.\.\.|[A-Za-z_][A-Za-z0-9_]*)") - .expect("Failed to compile VARIABLE_REGEX"); - static ref BRACKET_VAR_REGEX: Regex = Regex::new(r"\^\[([A-Za-z_][A-Za-z0-9_]*)\]") - .expect("Failed to compile BRACKET_VAR_REGEX"); + static ref VARIABLE_REGEX: Regex = + Regex::new(r"\^(\.\.\.|[A-Za-z_][A-Za-z0-9_]*)").expect("Failed to compile VARIABLE_REGEX"); + static ref BRACKET_VAR_REGEX: Regex = + Regex::new(r"\^\[([A-Za-z_][A-Za-z0-9_]*)\]").expect("Failed to compile BRACKET_VAR_REGEX"); } impl Language for Ruby { diff --git a/crates/util/src/rich_path.rs b/crates/util/src/rich_path.rs index 60954bfb9..9fee1058a 100644 --- a/crates/util/src/rich_path.rs +++ b/crates/util/src/rich_path.rs @@ -51,6 +51,12 @@ impl FileName for RichFile { } } +impl FileName for &RichFile { + fn name(&self) -> String { + self.path.to_owned() + } +} + // there must be a better way right? impl FileName for &(RichFile, [u8; 32]) { fn name(&self) -> String { @@ -85,6 +91,12 @@ impl TryIntoInputFile for &(RichFile, [u8; 32]) { } } +impl TryIntoInputFile for &RichFile { + fn try_into_cow(&self) -> Result> { + Ok(Cow::Borrowed(self)) + } +} + impl TryIntoInputFile for RichFile { fn try_into_cow(&self) -> Result> { Ok(Cow::Borrowed(self)) @@ -106,3 +118,12 @@ impl TryIntoInputFile for &RichPath { Ok(Cow::Owned(RichFile::new(name, content))) } } + +/// Core Marzano file trait +pub trait LoadableFile: TryIntoInputFile + FileName {} +impl LoadableFile for T where T: TryIntoInputFile + FileName {} + +/// All the required traits for processing a file in the Marzano engine +pub trait MarzanoFileTrait: TryIntoInputFile + FileName + Send + Sync + Clone {} + +impl MarzanoFileTrait for T where T: TryIntoInputFile + FileName + Send + Sync + Clone {} diff --git a/crates/wasm-bindings/src/match_pattern.rs b/crates/wasm-bindings/src/match_pattern.rs index d58d4275e..6dd673066 100644 --- a/crates/wasm-bindings/src/match_pattern.rs +++ b/crates/wasm-bindings/src/match_pattern.rs @@ -1,9 +1,10 @@ use anyhow::Context; use grit_util::{Ast, Position}; +use marzano_core::pattern_compiler::PatternBuilder; use marzano_core::{ api::{AnalysisLog, InputFile, MatchResult, PatternInfo}, built_in_functions::BuiltIns, - pattern_compiler::{CompilationResult}, + pattern_compiler::CompilationResult, tree_sitter_serde::tree_sitter_node_to_json, }; use marzano_language::{ @@ -20,7 +21,6 @@ use std::{ }; use tree_sitter::{Language as TSLanguage, Parser as TSParser}; use wasm_bindgen::prelude::*; -use marzano_core::pattern_compiler::PatternBuilder; static GRIT_LANGUAGE: OnceLock = OnceLock::new(); static JAVASCRIPT_LANGUAGE: OnceLock = OnceLock::new(); @@ -63,7 +63,6 @@ extern "C" { ) -> Result; } - pub async fn parse_input_files_internal( pattern: String, paths: Vec, @@ -76,7 +75,8 @@ pub async fn parse_input_files_internal( let _ = web_tree_sitter_sg::TreeSitter::init().await; let mut pure_parser = setup_grit_parser().await?; let parser = &mut pure_parser; - let ParsedPattern { libs, tree, lang } = get_parsed_pattern(&pattern, lib_paths, lib_contents, parser).await?; + let ParsedPattern { libs, tree, lang } = + get_parsed_pattern(&pattern, lib_paths, lib_contents, parser).await?; let node = tree.root_node(); let parsed_pattern = tree_sitter_node_to_json(&node.node, &pattern, &lang).to_string(); @@ -145,16 +145,11 @@ pub async fn parse_input_files( // Library file contents, in the same order as `lib_paths`. lib_contents: Vec, ) -> Result { - let results = match parse_input_files_internal( - pattern, - paths, - contents, - lib_paths, - lib_contents, - ).await { - Ok(r) => r, - Err(e) => vec![error_to_log(e)], - }; + let results = + match parse_input_files_internal(pattern, paths, contents, lib_paths, lib_contents).await { + Ok(r) => r, + Err(e) => vec![error_to_log(e)], + }; Ok(serde_wasm_bindgen::to_value(&results)?) } @@ -226,13 +221,7 @@ async fn match_pattern_internal( let injected_builtins: Option = None; #[cfg(feature = "ai_builtins")] let injected_builtins = Some(ai_builtins::ai_builtins::get_ai_built_in_functions()); - let builder = PatternBuilder::start( - pattern, - &libs, - lang, - None, - parser, - injected_builtins)?; + let builder = PatternBuilder::start(pattern, &libs, lang, None, parser, injected_builtins)?; let CompilationResult { problem: pattern, .. } = builder.compile(None, None)?; @@ -241,7 +230,7 @@ async fn match_pattern_internal( .zip(contents) .map(|(p, c)| RichFile::new(p, c)) .collect(); - let results = pattern.execute_files(&files, &context); + let results = pattern.execute_files(files, &context); Ok(results) } @@ -286,7 +275,9 @@ pub async fn match_pattern( lib_contents, llm_api_base, llm_api_bearer_token, - ).await { + ) + .await + { Ok(r) => r, Err(e) => vec![error_to_log(e)], }; @@ -334,7 +325,9 @@ async fn get_cached_lang(lang: &PatternLanguage) -> anyhow::Result<&'static TSLa } else { let path = pattern_language_to_path(lang)?; let _language_already_set = lang_store.set(get_lang(&path).await?); - Ok(lang_store.get().ok_or_else(|| anyhow::anyhow!("Failed to get language"))?) + Ok(lang_store + .get() + .ok_or_else(|| anyhow::anyhow!("Failed to get language"))?) } } @@ -345,7 +338,9 @@ async fn setup_grit_parser() -> anyhow::Result { lang } else { let _language_already_set = GRIT_LANGUAGE.set(get_lang(&lang_path).await?); - GRIT_LANGUAGE.get().ok_or_else(|| anyhow::anyhow!("Failed to setup GRIT_LANGUAGE"))? + GRIT_LANGUAGE + .get() + .ok_or_else(|| anyhow::anyhow!("Failed to setup GRIT_LANGUAGE"))? }; parser.set_language(lang)?; Ok(MarzanoGritParser::from_initialized_ts_parser(parser)) @@ -366,8 +361,7 @@ async fn get_language_for_tree(tree: &Tree) -> anyhow::Result { // javascript also parses vue files to look for javascript so // we need to initialize the Vue struct with a wasm parser let vue_lang = get_cached_lang(&PatternLanguage::Vue).await?; - let _ = PatternLanguage::Vue - .to_target_with_ts_lang(vue_lang.clone()); + let _ = PatternLanguage::Vue.to_target_with_ts_lang(vue_lang.clone()); } let ts_lang = get_cached_lang(&lang).await?; Ok(lang.to_target_with_ts_lang(ts_lang.clone())?)