Skip to content

Commit

Permalink
Explicit imports: import 'Raw "sample.html" (#2036)
Browse files Browse the repository at this point in the history
* Explicit imports

* explicit import: more changes.

* Rename typ to format
* Bring back the fallback
* Implement pretty-printer part
* Fix compilation of NLP

* Document imports

* Apply suggestions from code review

Co-authored-by: Yann Hamdaoui <[email protected]>

* minor fix

* explicit imports: add tests

* explicit imports: support importing the same file with different formats

* fmt

* Reinforce explicit imports test against some whitespace changes.

---------

Co-authored-by: Yann Hamdaoui <[email protected]>
  • Loading branch information
vi and yannham authored Sep 18, 2024
1 parent dceb960 commit c50647d
Show file tree
Hide file tree
Showing 27 changed files with 254 additions and 65 deletions.
91 changes: 69 additions & 22 deletions core/src/cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ use std::time::SystemTime;
use void::Void;

/// Supported input formats.
#[derive(Default, Clone, Copy, Eq, Debug, PartialEq)]
#[derive(Default, Clone, Copy, Eq, Debug, PartialEq, Hash)]
pub enum InputFormat {
#[default]
Nickel,
Expand All @@ -58,10 +58,36 @@ impl InputFormat {
_ => None,
}
}
/// Renturns an [InputFormat] based on the extension of a source path.

pub fn from_tag(tag: &str) -> Option<InputFormat> {
Some(match tag {
"Json" => InputFormat::Json,
"Nickel" => InputFormat::Nickel,
"Raw" => InputFormat::Raw,
"Yaml" => InputFormat::Yaml,
"Toml" => InputFormat::Toml,
#[cfg(feature = "nix-experimental")]
"Nix" => InputFormat::Nix,
_ => return None,
})
}

pub fn to_tag(&self) -> &'static str {
match self {
InputFormat::Nickel => "Nickel",
InputFormat::Json => "Json",
InputFormat::Yaml => "Yaml",
InputFormat::Toml => "Toml",
InputFormat::Raw => "Raw",
#[cfg(feature = "nix-experimental")]
InputFormat::Nix => "Nix",
}
}

/// Extracts format embedded in SourcePath
pub fn from_source_path(source_path: &SourcePath) -> Option<InputFormat> {
if let SourcePath::Path(p) = source_path {
Self::from_path(p)
if let SourcePath::Path(_p, fmt) = source_path {
Some(*fmt)
} else {
None
}
Expand Down Expand Up @@ -269,7 +295,7 @@ pub enum SourcePath {
///
/// This is the only `SourcePath` variant that can be resolved as the target
/// of an import statement.
Path(PathBuf),
Path(PathBuf, InputFormat),
/// A subrange of a file at the given path.
///
/// This is used by nls to analyze small parts of files that don't fully parse. The
Expand All @@ -290,7 +316,7 @@ impl<'a> TryFrom<&'a SourcePath> for &'a OsStr {

fn try_from(value: &'a SourcePath) -> Result<Self, Self::Error> {
match value {
SourcePath::Path(p) | SourcePath::Snippet(p) => Ok(p.as_os_str()),
SourcePath::Path(p, _) | SourcePath::Snippet(p) => Ok(p.as_os_str()),
_ => Err(()),
}
}
Expand All @@ -302,7 +328,7 @@ impl<'a> TryFrom<&'a SourcePath> for &'a OsStr {
impl From<SourcePath> for OsString {
fn from(source_path: SourcePath) -> Self {
match source_path {
SourcePath::Path(p) | SourcePath::Snippet(p) => p.into(),
SourcePath::Path(p, _) | SourcePath::Snippet(p) => p.into(),
SourcePath::Std(StdlibModule::Std) => "<stdlib/std.ncl>".into(),
SourcePath::Std(StdlibModule::Internals) => "<stdlib/internals.ncl>".into(),
SourcePath::Query => "<query>".into(),
Expand Down Expand Up @@ -364,13 +390,18 @@ impl Cache {

/// Same as [Self::add_file], but assume that the path is already normalized, and take the
/// timestamp as a parameter.
fn add_file_(&mut self, path: PathBuf, timestamp: SystemTime) -> io::Result<FileId> {
fn add_file_(
&mut self,
path: PathBuf,
format: InputFormat,
timestamp: SystemTime,
) -> io::Result<FileId> {
let contents = std::fs::read_to_string(&path)?;
let file_id = self.files.add(&path, contents);
self.file_paths
.insert(file_id, SourcePath::Path(path.clone()));
.insert(file_id, SourcePath::Path(path.clone(), format));
self.file_ids.insert(
SourcePath::Path(path),
SourcePath::Path(path, format),
NameIdEntry {
id: file_id,
source: SourceKind::Filesystem(timestamp),
Expand All @@ -383,24 +414,32 @@ impl Cache {
///
/// Uses the normalized path and the *modified at* timestamp as the name-id table entry.
/// Overrides any existing entry with the same name.
pub fn add_file(&mut self, path: impl Into<OsString>) -> io::Result<FileId> {
pub fn add_file(
&mut self,
path: impl Into<OsString>,
format: InputFormat,
) -> io::Result<FileId> {
let path = path.into();
let timestamp = timestamp(&path)?;
let normalized = normalize_path(&path)?;
self.add_file_(normalized, timestamp)
self.add_file_(normalized, format, timestamp)
}

/// Try to retrieve the id of a file from the cache.
///
/// If it was not in cache, try to read it from the filesystem and add it as a new entry.
pub fn get_or_add_file(&mut self, path: impl Into<OsString>) -> io::Result<CacheOp<FileId>> {
pub fn get_or_add_file(
&mut self,
path: impl Into<OsString>,
format: InputFormat,
) -> io::Result<CacheOp<FileId>> {
let path = path.into();
let normalized = normalize_path(&path)?;
match self.id_or_new_timestamp_of(path.as_ref())? {
match self.id_or_new_timestamp_of(path.as_ref(), format)? {
SourceState::UpToDate(id) => Ok(CacheOp::Cached(id)),
SourceState::Stale(timestamp) => {
self.add_file_(normalized, timestamp).map(CacheOp::Done)
}
SourceState::Stale(timestamp) => self
.add_file_(normalized, format, timestamp)
.map(CacheOp::Done),
}
}

Expand Down Expand Up @@ -954,7 +993,7 @@ impl Cache {
/// [normalize_path]).
pub fn id_of(&self, name: &SourcePath) -> Option<FileId> {
match name {
SourcePath::Path(p) => match self.id_or_new_timestamp_of(p).ok()? {
SourcePath::Path(p, fmt) => match self.id_or_new_timestamp_of(p, *fmt).ok()? {
SourceState::UpToDate(id) => Some(id),
SourceState::Stale(_) => None,
},
Expand All @@ -970,8 +1009,11 @@ impl Cache {
///
/// The main point of this awkward signature is to minimize I/O operations: if we accessed
/// the timestamp, keep it around.
fn id_or_new_timestamp_of(&self, name: &Path) -> io::Result<SourceState> {
match self.file_ids.get(&SourcePath::Path(name.to_owned())) {
fn id_or_new_timestamp_of(&self, name: &Path, format: InputFormat) -> io::Result<SourceState> {
match self
.file_ids
.get(&SourcePath::Path(name.to_owned(), format))
{
None => Ok(SourceState::Stale(timestamp(name)?)),
Some(NameIdEntry {
id,
Expand Down Expand Up @@ -1319,6 +1361,7 @@ pub trait ImportResolver {
fn resolve(
&mut self,
path: &OsStr,
format: InputFormat,
parent: Option<FileId>,
pos: &TermPos,
) -> Result<(ResolvedTerm, FileId), ImportError>;
Expand All @@ -1333,6 +1376,7 @@ impl ImportResolver for Cache {
fn resolve(
&mut self,
path: &OsStr,
format: InputFormat,
parent: Option<FileId>,
pos: &TermPos,
) -> Result<(ResolvedTerm, FileId), ImportError> {
Expand All @@ -1353,7 +1397,9 @@ impl ImportResolver for Cache {
.find_map(|parent| {
let mut path_buf = parent.clone();
path_buf.push(path);
self.get_or_add_file(&path_buf).ok().map(|x| (x, path_buf))
self.get_or_add_file(&path_buf, format)
.ok()
.map(|x| (x, path_buf))
})
.ok_or_else(|| {
let parents = possible_parents
Expand All @@ -1367,7 +1413,6 @@ impl ImportResolver for Cache {
)
})?;

let format = InputFormat::from_path(&path_buf).unwrap_or_default();
let (result, file_id) = match id_op {
CacheOp::Cached(id) => (ResolvedTerm::FromCache, id),
CacheOp::Done(id) => (ResolvedTerm::FromFile { path: path_buf }, id),
Expand Down Expand Up @@ -1467,6 +1512,7 @@ pub mod resolvers {
fn resolve(
&mut self,
_path: &OsStr,
_format: InputFormat,
_parent: Option<FileId>,
_pos: &TermPos,
) -> Result<(ResolvedTerm, FileId), ImportError> {
Expand Down Expand Up @@ -1508,6 +1554,7 @@ pub mod resolvers {
fn resolve(
&mut self,
path: &OsStr,
_format: InputFormat,
_parent: Option<FileId>,
pos: &TermPos,
) -> Result<(ResolvedTerm, FileId), ImportError> {
Expand Down
12 changes: 12 additions & 0 deletions core/src/error/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -583,6 +583,8 @@ pub enum ParseError {
/// time, there are a set of expressions that can be excluded syntactically. Currently, it's
/// mostly constants.
InvalidContract(RawSpan),
/// Unrecognized explicit import format tag
InvalidImportFormat { span: RawSpan },
}

/// An error occurring during the resolution of an import.
Expand Down Expand Up @@ -815,6 +817,9 @@ impl ParseError {
}
}
InternalParseError::InvalidContract(span) => ParseError::InvalidContract(span),
InternalParseError::InvalidImportFormat { span } => {
ParseError::InvalidImportFormat { span }
}
},
}
}
Expand Down Expand Up @@ -2113,6 +2118,13 @@ impl IntoDiagnostics<FileId> for ParseError {
.to_owned(),
"Only functions and records might be valid contracts".to_owned(),
]),
ParseError::InvalidImportFormat{span} => Diagnostic::error()
.with_message("unknown import format tag")
.with_labels(vec![primary(&span)])
.with_notes(vec![
"Examples of valid format tags: 'Nickel 'Json 'Yaml 'Toml 'Raw"
.to_owned()
]),
};

vec![diagnostic]
Expand Down
4 changes: 2 additions & 2 deletions core/src/eval/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -796,7 +796,7 @@ impl<R: ImportResolver, C: Cache> VirtualMachine<R, C> {
));
}
}
Term::Import(path) => {
Term::Import { path, .. } => {
return Err(EvalError::InternalError(
format!("Unresolved import ({})", path.to_string_lossy()),
pos,
Expand Down Expand Up @@ -1169,7 +1169,7 @@ pub fn subst<C: Cache>(
| v @ Term::ForeignId(_)
| v @ Term::SealingKey(_)
| v @ Term::Enum(_)
| v @ Term::Import(_)
| v @ Term::Import{..}
| v @ Term::ResolvedImport(_)
// We could recurse here, because types can contain terms which would then be subject to
// substitution. Not recursing should be fine, though, because a type in term position
Expand Down
12 changes: 8 additions & 4 deletions core/src/eval/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -131,15 +131,15 @@ fn imports() {
.add_source(String::from("bad"), String::from("^$*/.23ab 0°@"));
vm.import_resolver_mut().add_source(
String::from("nested"),
String::from("let x = import \"two\" in x + 1"),
String::from("let x = import 'Nickel \"two\" in x + 1"),
);
vm.import_resolver_mut().add_source(
String::from("cycle"),
String::from("let x = import \"cycle_b\" in {a = 1, b = x.a}"),
String::from("let x = import 'Nickel \"cycle_b\" in {a = 1, b = x.a}"),
);
vm.import_resolver_mut().add_source(
String::from("cycle_b"),
String::from("let x = import \"cycle\" in {a = x.a}"),
String::from("let x = import 'Nickel \"cycle\" in {a = x.a}"),
);

fn mk_import<R>(
Expand All @@ -152,7 +152,11 @@ fn imports() {
R: ImportResolver,
{
resolve_imports(
mk_term::let_one_in(var, mk_term::import(import), body),
mk_term::let_one_in(
var,
mk_term::import(import, crate::cache::InputFormat::Nickel),
body,
),
vm.import_resolver_mut(),
)
.map(|resolve_result| resolve_result.transformed_term)
Expand Down
2 changes: 2 additions & 0 deletions core/src/parser/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -153,4 +153,6 @@ pub enum ParseError {
/// time, there are a set of expressions that can be excluded syntactically. Currently, it's
/// mostly constants.
InvalidContract(RawSpan),
/// Unrecognized explicit import format tag
InvalidImportFormat { span: RawSpan },
}
7 changes: 6 additions & 1 deletion core/src/parser/grammar.lalrpop
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,12 @@ UniTerm: UniTerm = {
<err: Error> => {
UniTerm::from(err)
},
"import" <s: StandardStaticString> => UniTerm::from(Term::Import(OsString::from(s))),
"import" <l: @L> <s: StandardStaticString> <r: @R> =>? {
Ok(UniTerm::from(mk_import_based_on_filename(s, mk_span(src_id, l, r))?))
},
"import" <l: @L> <t: EnumTag> <r: @R> <s: StandardStaticString> =>? {
Ok(UniTerm::from(mk_import_explicit(s, t, mk_span(src_id, l, r))?))
},
};

AnnotatedInfixExpr: UniTerm = {
Expand Down
4 changes: 2 additions & 2 deletions core/src/parser/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -555,7 +555,7 @@ fn ty_var_kind_mismatch() {
fn import() {
assert_eq!(
parse_without_pos("import \"file.ncl\""),
mk_term::import("file.ncl")
mk_term::import("file.ncl", crate::cache::InputFormat::Nickel)
);
assert_matches!(
parse("import \"file.ncl\" some args"),
Expand All @@ -564,7 +564,7 @@ fn import() {
assert_eq!(
parse_without_pos("(import \"file.ncl\") some args"),
mk_app!(
mk_term::import("file.ncl"),
mk_term::import("file.ncl", crate::cache::InputFormat::Nickel),
mk_term::var("some"),
mk_term::var("args")
)
Expand Down
25 changes: 25 additions & 0 deletions core/src/parser/utils.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
//! Various helpers and companion code for the parser are put here to keep the grammar definition
//! uncluttered.
use indexmap::map::Entry;
use std::ffi::OsString;
use std::rc::Rc;
use std::{collections::HashSet, fmt::Debug};

Expand All @@ -10,6 +11,7 @@ use self::pattern::bindings::Bindings as _;

use super::error::ParseError;

use crate::cache::InputFormat;
use crate::{
combine::Combine,
eval::{
Expand Down Expand Up @@ -699,6 +701,29 @@ pub fn mk_fun(pat: Pattern, body: RichTerm) -> Term {
}
}

pub fn mk_import_based_on_filename(path: String, _span: RawSpan) -> Result<Term, ParseError> {
let path = OsString::from(path);
let format: Option<InputFormat> =
InputFormat::from_path(std::path::Path::new(path.as_os_str()));

// Fall back to InputFormat::Nickel in case of unknown filename extension for backwards compatiblilty.
let format = format.unwrap_or_default();

Ok(Term::Import { path, format })
}

pub fn mk_import_explicit(
path: String,
format: LocIdent,
span: RawSpan,
) -> Result<Term, ParseError> {
let path = OsString::from(path);
let Some(format) = InputFormat::from_tag(format.label()) else {
return Err(ParseError::InvalidImportFormat { span });
};
Ok(Term::Import { path, format })
}

/// Determine the minimal level of indentation of a multi-line string.
///
/// The result is determined by computing the minimum indentation level among all lines, where the
Expand Down
Loading

0 comments on commit c50647d

Please sign in to comment.