Skip to content

Commit

Permalink
feat: implement lazy file loading (#306)
Browse files Browse the repository at this point in the history
  • Loading branch information
morgante authored May 6, 2024
1 parent 452abd1 commit b4cfadc
Show file tree
Hide file tree
Showing 24 changed files with 848 additions and 298 deletions.
2 changes: 1 addition & 1 deletion crates/cli/src/analytics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use anyhow::Result;
use clap::Args;
use lazy_static::lazy_static;
use marzano_gritmodule::fetcher::LocalRepo;
use marzano_gritmodule::{fetcher::ModuleRepo};
use marzano_gritmodule::fetcher::ModuleRepo;
use marzano_messenger::emit::ApplyDetails;
use serde::{Deserialize, Serialize};
use std::{env, fmt, time::Duration};
Expand Down
2 changes: 1 addition & 1 deletion crates/cli/src/analyze.rs
Original file line number Diff line number Diff line change
Expand Up @@ -323,7 +323,7 @@ where
#[cfg(feature = "grit_tracing")]
task_span.set_parent(grouped_ctx);
task_span.in_scope(|| {
compiled.execute_paths_streaming(&found_paths, context, tx, cache_ref);
compiled.execute_paths_streaming(found_paths, context, tx, cache_ref);
loop {
if processed.load(Ordering::SeqCst) >= found_count.try_into().unwrap()
|| !should_continue.load(Ordering::SeqCst)
Expand Down
2 changes: 1 addition & 1 deletion crates/cli/src/commands/check.rs
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ pub(crate) async fn run_check(
!cache.has_no_matches(hash, pattern.hash)
})
.collect();
let (result, no_match) = pattern.execute_paths(&un_cached_input_files, &context);
let (result, no_match) = pattern.execute_paths(un_cached_input_files, &context);
if !no_match.is_empty() {
for path in no_match.into_iter() {
let hash = path.hash.unwrap();
Expand Down
1 change: 0 additions & 1 deletion crates/cli/src/community.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ use anyhow::Result;
use grit_util::{FileRange, Position, RangeWithoutByte};
use serde::Deserialize;


use std::path::PathBuf;

#[derive(Debug, Deserialize)]
Expand Down
2 changes: 2 additions & 0 deletions crates/cli_bin/tests/apply.rs
Original file line number Diff line number Diff line change
Expand Up @@ -783,6 +783,8 @@ fn test_absolute_path() -> Result<()> {
let file = dir.join("dir2/unique.js");
let content = std::fs::read_to_string(file)?;

println!("content: {:?}", content);

// Verify it contains dir2/unique.js
assert!(content.contains("dir2/unique.js"));

Expand Down
23 changes: 23 additions & 0 deletions crates/core/src/api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,16 @@ pub enum MatchResult {
AnalysisLog(AnalysisLog),
}

impl MatchResult {
pub fn is_match(&self) -> bool {
is_match(self)
}

pub fn is_error(&self) -> bool {
matches!(self, MatchResult::AnalysisLog(log) if log.level < 400)
}
}

/// Make a path look the way provolone expects it to
/// Removes leading "./", or the root path if it's provided
fn normalize_path_in_project<'a>(path: &'a str, root_path: Option<&'a PathBuf>) -> &'a str {
Expand Down Expand Up @@ -591,6 +601,19 @@ impl AnalysisLog {
source: None,
}
}

pub(crate) fn floating_error(message: String) -> Self {
Self {
level: 280,
message,
position: Position::first(),
file: "".to_string(),
engine_id: "marzano".to_string(),
range: None,
syntax_tree: None,
source: None,
}
}
}

impl From<GritAnalysisLog> for AnalysisLog {
Expand Down
1 change: 1 addition & 0 deletions crates/core/src/built_in_functions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,7 @@ fn resolve_path_fn<'a>(
let args = MarzanoResolvedPattern::from_patterns(args, state, context, logs)?;

let current_file = get_absolute_file_name(state, context.language())?;

let target_path = match &args[0] {
Some(resolved_pattern) => resolved_pattern.text(&state.files, context.language())?,
None => return Err(anyhow!("No path argument provided for resolve function")),
Expand Down
3 changes: 3 additions & 0 deletions crates/core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ mod equivalence;
mod foreign_function_definition;
pub mod fs;
mod inline_snippets;
mod limits;
pub mod marzano_binding;
pub mod marzano_code_snippet;
pub mod marzano_context;
Expand All @@ -35,3 +36,5 @@ use getrandom as _;

#[cfg(test)]
mod test;
#[cfg(test)]
mod test_files;
22 changes: 22 additions & 0 deletions crates/core/src/limits.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
use grit_util::{AnalysisLog, Position};
use marzano_util::rich_path::RichFile;

use crate::constants::MAX_FILE_SIZE;

pub(crate) fn is_file_too_big(file: &RichFile) -> Option<AnalysisLog> {
if file.path.len() > MAX_FILE_SIZE || file.content.len() > MAX_FILE_SIZE {
Some(AnalysisLog {
// TODO: standardize levels
level: Some(310),
message: format!("Skipped {}, it is too big.", file.path),
file: Some(file.path.to_owned().into()),
engine_id: Some("marzano".to_owned()),
position: Some(Position::first()),
syntax_tree: None,
range: None,
source: None,
})
} else {
None
}
}
64 changes: 59 additions & 5 deletions crates/core/src/marzano_context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@ use crate::{
built_in_functions::BuiltIns,
clean::{get_replacement_ranges, replace_cleaned_ranges},
foreign_function_definition::ForeignFunctionDefinition,
marzano_resolved_pattern::MarzanoResolvedPattern,
limits::is_file_too_big,
marzano_resolved_pattern::{MarzanoFile, MarzanoResolvedPattern},
pattern_compiler::file_owner_compiler::FileOwnerCompiler,
problem::MarzanoQueryContext,
text_unparser::apply_effects,
Expand All @@ -24,14 +25,18 @@ use marzano_language::{
language::{MarzanoLanguage, Tree},
target_language::TargetLanguage,
};
use marzano_util::runtime::ExecutionContext;
use std::path::PathBuf;
use marzano_util::{
rich_path::{LoadableFile, RichFile},
runtime::ExecutionContext,
};
use std::{borrow::Cow, path::PathBuf};

pub struct MarzanoContext<'a> {
pub pattern_definitions: &'a Vec<PatternDefinition<MarzanoQueryContext>>,
pub predicate_definitions: &'a Vec<PredicateDefinition<MarzanoQueryContext>>,
pub function_definitions: &'a Vec<GritFunctionDefinition<MarzanoQueryContext>>,
pub foreign_function_definitions: &'a Vec<ForeignFunctionDefinition>,
lazy_files: Vec<Box<dyn LoadableFile + 'a>>,
pub files: &'a FileOwners<Tree>,
pub built_ins: &'a BuiltIns,
pub language: &'a TargetLanguage,
Expand All @@ -46,6 +51,7 @@ impl<'a> MarzanoContext<'a> {
predicate_definitions: &'a Vec<PredicateDefinition<MarzanoQueryContext>>,
function_definitions: &'a Vec<GritFunctionDefinition<MarzanoQueryContext>>,
foreign_function_definitions: &'a Vec<ForeignFunctionDefinition>,
lazy_files: Vec<Box<dyn LoadableFile + 'a>>,
files: &'a FileOwners<Tree>,
built_ins: &'a BuiltIns,
language: &'a TargetLanguage,
Expand All @@ -57,6 +63,7 @@ impl<'a> MarzanoContext<'a> {
predicate_definitions,
function_definitions,
foreign_function_definitions,
lazy_files,
files,
built_ins,
language,
Expand Down Expand Up @@ -112,6 +119,48 @@ impl<'a> ExecContext<'a, MarzanoQueryContext> for MarzanoContext<'a> {
self.built_ins.call(call, context, state, logs)
}

fn load_file(
&self,
file: &MarzanoFile<'a>,
state: &mut State<'a, MarzanoQueryContext>,
logs: &mut AnalysisLogs,
) -> anyhow::Result<bool> {
match file {
MarzanoFile::Resolved(_) => {
// Assume the file is already loaded
}
MarzanoFile::Ptr(ptr) => {
if state.files.is_loaded(ptr) {
return Ok(true);
}
let index = ptr.file;

let cow: Cow<RichFile> = self.lazy_files[index as usize].try_into_cow()?;

if let Some(log) = is_file_too_big(&cow) {
logs.push(log);
return Ok(false);
}

let owned = cow.into_owned();

let file = FileOwnerCompiler::from_matches(
owned.path,
owned.content,
None,
false,
self.language,
logs,
)?;
if let Some(file) = file {
self.files.push(file);
state.files.load_file(ptr, self.files.last().unwrap());
}
}
}
Ok(true)
}

// FIXME: Don't depend on Grit's file handling in context.
fn files(&self) -> &FileOwners<Tree> {
self.files
Expand All @@ -130,7 +179,7 @@ impl<'a> ExecContext<'a, MarzanoQueryContext> for MarzanoContext<'a> {
) -> Result<bool> {
let mut parser = self.language().get_parser();

let files = if let Some(files) = binding.get_file_pointers() {
let mut files = if let Some(files) = binding.get_file_pointers() {
files
.iter()
.map(|f| state.files.latest_revision(f))
Expand All @@ -142,6 +191,11 @@ impl<'a> ExecContext<'a, MarzanoQueryContext> for MarzanoContext<'a> {
let binding = if files.len() == 1 {
ResolvedPattern::from_file_pointer(*files.last().unwrap())
} else {
// Load all files into memory and collect successful file pointers
files.retain(|file_ptr| {
self.load_file(&MarzanoFile::Ptr(*file_ptr), state, logs)
.unwrap_or(false)
});
ResolvedPattern::from_files(ResolvedPattern::from_list_parts(
files.iter().map(|f| ResolvedPattern::from_file_pointer(*f)),
))
Expand All @@ -160,7 +214,7 @@ impl<'a> ExecContext<'a, MarzanoQueryContext> for MarzanoContext<'a> {
suppressed,
};
for file_ptr in files {
let file = state.files.get_file(file_ptr);
let file = state.files.get_file_owner(file_ptr);
let mut match_log = file.matches.borrow_mut();

let filename_path = &file.name;
Expand Down
8 changes: 4 additions & 4 deletions crates/core/src/marzano_resolved_pattern.rs
Original file line number Diff line number Diff line change
Expand Up @@ -848,7 +848,7 @@ impl<'a> File<'a, MarzanoQueryContext> for MarzanoFile<'a> {
fn name(&self, files: &FileRegistry<'a, MarzanoQueryContext>) -> MarzanoResolvedPattern<'a> {
match self {
Self::Resolved(resolved) => resolved.name.clone(),
Self::Ptr(ptr) => MarzanoResolvedPattern::from_path_binding(&files.get_file(*ptr).name),
Self::Ptr(ptr) => MarzanoResolvedPattern::from_path_binding(files.get_file_name(*ptr)),
}
}

Expand All @@ -866,7 +866,7 @@ impl<'a> File<'a, MarzanoQueryContext> for MarzanoFile<'a> {
)))
}
Self::Ptr(ptr) => Ok(ResolvedPattern::from_path_binding(
&files.get_file(*ptr).absolute_path,
files.get_absolute_path(*ptr)?,
)),
}
}
Expand All @@ -875,7 +875,7 @@ impl<'a> File<'a, MarzanoQueryContext> for MarzanoFile<'a> {
match self {
Self::Resolved(resolved) => resolved.body.clone(),
Self::Ptr(ptr) => {
let file = &files.get_file(*ptr);
let file = &files.get_file_owner(*ptr);
let root = file.tree.root_node();
let range = root.byte_range();
ResolvedPattern::from_range_binding(range, &file.tree.source)
Expand All @@ -887,7 +887,7 @@ impl<'a> File<'a, MarzanoQueryContext> for MarzanoFile<'a> {
match self {
Self::Resolved(resolved) => resolved.body.clone(),
Self::Ptr(ptr) => {
let file = &files.get_file(*ptr);
let file = &files.get_file_owner(*ptr);
ResolvedPattern::from_node_binding(file.tree.root_node())
}
}
Expand Down
Loading

0 comments on commit b4cfadc

Please sign in to comment.