Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Delete -naive flag and disallow lookup actions in rules #461

Open
wants to merge 50 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
50 commits
Select commit Hold shift + click to select a range
74999fb
Get rid of semi-naive flag
FTRobbin Oct 23, 2024
3e55b0d
Global lookup tests
FTRobbin Oct 30, 2024
82994eb
Merge branch 'main' of github.com:egraphs-good/egglog into haobinni-0904
FTRobbin Oct 30, 2024
dc69b30
Add fail corner case to remove_global
FTRobbin Oct 30, 2024
dee21e8
Starting to rewrite tests
FTRobbin Nov 6, 2024
9e10fac
Merge branch 'main' of github.com:egraphs-good/egglog into haobinni-0904
FTRobbin Nov 6, 2024
35e8532
Rewrote all failed tests
FTRobbin Nov 6, 2024
a593155
Minor
FTRobbin Nov 6, 2024
dc42cd3
Minor
FTRobbin Nov 6, 2024
2573722
Revert "Rewrote all failed tests"
FTRobbin Nov 15, 2024
c484c92
Merge branch 'main' of github.com:egraphs-good/egglog into haobinni-0904
FTRobbin Nov 15, 2024
204dd5c
New typechecking pass forbidding lookups
FTRobbin Nov 22, 2024
6c8cfbc
Merge branch 'main' of github.com:egraphs-good/egglog into haobinni-0904
FTRobbin Nov 22, 2024
3ff74c7
Fix array.egg
FTRobbin Nov 22, 2024
3efe333
Fix combined_nested.egg
FTRobbin Nov 22, 2024
94a64c0
Fix cykjson.egg
FTRobbin Nov 22, 2024
a18063e
Fix cyk.egg
FTRobbin Nov 22, 2024
2abdd10
Revert previous fixes to tests
FTRobbin Nov 22, 2024
a040ce9
Fixing eggcc-extraction.egg in progress
FTRobbin Nov 22, 2024
63fcbff
Fix eggcc-extraction.egg
FTRobbin Nov 22, 2024
4cdeebe
Fix fusion.egg
FTRobbin Nov 22, 2024
59334fb
FIx herbie.egg
FTRobbin Nov 22, 2024
de7a783
Fix herbie-tutorial.egg
FTRobbin Nov 22, 2024
dbbf4c8
Fix path.egg
FTRobbin Nov 22, 2024
4e34bc6
Fix integer_math.egg
FTRobbin Nov 22, 2024
bc92bad
Fix interval.egg
FTRobbin Nov 22, 2024
ce7294b
Fix list.egg
FTRobbin Nov 22, 2024
d8378a7
Fix math.egg
FTRobbin Nov 22, 2024
79781cc
Fix path-union.egg
FTRobbin Nov 22, 2024
30fdddd
Fix pathproof.egg
FTRobbin Nov 22, 2024
693ff70
Fix points-to.egg
FTRobbin Nov 22, 2024
b6a2466
Fix prims.egg
FTRobbin Nov 22, 2024
14aa00d
Fix python_arry_optimize.egg
FTRobbin Nov 22, 2024
a9c9969
Fix repro-desugar-143.egg
FTRobbin Nov 22, 2024
6c315eb
Fix python_array_optimize.egg again
FTRobbin Nov 22, 2024
97ecc71
Fix repro-querybug.egg
FTRobbin Nov 22, 2024
181cd97
Fix repro-unsound.egg
FTRobbin Nov 23, 2024
0c0af7a
Fix rw-analysis.egg
FTRobbin Nov 23, 2024
755cf92
Fix schedule-demo.egg
FTRobbin Nov 23, 2024
078feb0
Fix stratified.egg
FTRobbin Nov 23, 2024
34a094c
Fix stresstest_large_expr.egg
FTRobbin Nov 23, 2024
bb09c17
Fix test-combined.egg
FTRobbin Nov 23, 2024
ead38d8
Fix test-combined-steps.egg
FTRobbin Nov 23, 2024
a3a7f3c
Fix tricky-type-checking.egg
FTRobbin Nov 23, 2024
645d3e3
Fix typeinfer.egg
FTRobbin Nov 23, 2024
b3768b3
Fix until.egg
FTRobbin Nov 23, 2024
5b95c79
Add negative cases for new typechecking pass
FTRobbin Nov 23, 2024
fd20ebf
Add more negative cases
FTRobbin Nov 23, 2024
ac48f48
Add negative and positive cases for rewrite
FTRobbin Nov 23, 2024
8a75e7e
Minor
FTRobbin Nov 24, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ make all
## Usage

```
cargo run [-f fact-path] [-naive] [--to-json] [--to-dot] [--to-svg] <files.egg>
cargo run [-f fact-path] [--to-json] [--to-dot] [--to-svg] <files.egg>
```

or just
Expand Down
74 changes: 4 additions & 70 deletions src/ast/desugar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,10 @@ use crate::*;
pub(crate) fn desugar_program(
program: Vec<Command>,
symbol_gen: &mut SymbolGen,
seminaive_transform: bool,
) -> Result<Vec<NCommand>, Error> {
let mut res = vec![];
for command in program {
let desugared = desugar_command(command, symbol_gen, seminaive_transform)?;
let desugared = desugar_command(command, symbol_gen)?;
res.extend(desugared);
}
Ok(res)
Expand All @@ -23,7 +22,6 @@ pub(crate) fn desugar_program(
pub(crate) fn desugar_command(
command: Command,
symbol_gen: &mut SymbolGen,
seminaive_transform: bool,
) -> Result<Vec<NCommand>, Error> {
let res = match command {
Command::SetOption { name, value } => {
Expand Down Expand Up @@ -101,11 +99,7 @@ pub(crate) fn desugar_command(
Command::Include(span, file) => {
let s = std::fs::read_to_string(&file)
.unwrap_or_else(|_| panic!("{span} Failed to read file {file}"));
return desugar_program(
parse_program(Some(file), &s)?,
symbol_gen,
seminaive_transform,
);
return desugar_program(parse_program(Some(file), &s)?, symbol_gen);
}
Command::Rule {
ruleset,
Expand All @@ -116,22 +110,12 @@ pub(crate) fn desugar_command(
name = rule.to_string().replace('\"', "'").into();
}

let mut result = vec![NCommand::NormRule {
let result = vec![NCommand::NormRule {
ruleset,
name,
rule: rule.clone(),
}];

if seminaive_transform {
if let Some(new_rule) = add_semi_naive_rule(symbol_gen, rule) {
result.push(NCommand::NormRule {
ruleset,
name,
rule: new_rule,
});
}
}

result
}
Command::Sort(span, sort, option) => vec![NCommand::Sort(span, sort, option)],
Expand Down Expand Up @@ -217,7 +201,7 @@ pub(crate) fn desugar_command(
vec![NCommand::Pop(span, num)]
}
Command::Fail(span, cmd) => {
let mut desugared = desugar_command(*cmd, symbol_gen, seminaive_transform)?;
let mut desugared = desugar_command(*cmd, symbol_gen)?;

let last = desugared.pop().unwrap();
desugared.push(NCommand::Fail(span, Box::new(last)));
Expand Down Expand Up @@ -320,55 +304,6 @@ fn desugar_birewrite(ruleset: Symbol, name: Symbol, rewrite: &Rewrite) -> Vec<NC
.collect()
}

// TODO(yz): we can delete this code once we enforce that all rule bodies cannot read the database (except EqSort).
fn add_semi_naive_rule(symbol_gen: &mut SymbolGen, rule: Rule) -> Option<Rule> {
let mut new_rule = rule;
// Whenever an Let(_, expr@Call(...)) or Set(_, expr@Call(...)) is present in action,
// an additional seminaive rule should be created.
// Moreover, for each such expr, expr and all variable definitions that it relies on should be moved to trigger.
let mut new_head_atoms = vec![];
let mut add_new_rule = false;

let mut var_set = HashSet::default();
for head_slice in new_rule.head.0.iter_mut().rev() {
match head_slice {
Action::Set(span, _, _, expr) => {
var_set.extend(expr.vars());
if let Expr::Call(..) = expr {
add_new_rule = true;

let fresh_symbol = symbol_gen.fresh(&"desugar_snrule".into());
let fresh_var = Expr::Var(span.clone(), fresh_symbol);
let expr = std::mem::replace(expr, fresh_var.clone());
new_head_atoms.push(Fact::Eq(span.clone(), vec![fresh_var, expr]));
};
}
Action::Let(span, symbol, expr) if var_set.contains(symbol) => {
var_set.extend(expr.vars());
if let Expr::Call(..) = expr {
add_new_rule = true;

let var = Expr::Var(span.clone(), *symbol);
new_head_atoms.push(Fact::Eq(span.clone(), vec![var, expr.clone()]));
}
}
_ => (),
}
}

if add_new_rule {
new_rule.body.extend(new_head_atoms.into_iter().rev());
// remove all let action
new_rule.head.0.retain_mut(
|action| !matches!(action, Action::Let(_ann, var, _) if var_set.contains(var)),
);
log::debug!("Added a semi-naive desugared rule:\n{}", new_rule);
Some(new_rule)
} else {
None
}
}

fn desugar_simplify(
expr: &Expr,
schedule: &Schedule,
Expand All @@ -391,7 +326,6 @@ fn desugar_simplify(
expr: Expr::Var(span.clone(), lhs),
},
symbol_gen,
false,
)
.unwrap(),
);
Expand Down
9 changes: 9 additions & 0 deletions src/ast/remove_globals.rs
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,15 @@ impl<'a> GlobalRemover<'a> {
rule: new_rule,
}]
}
//Handle the corner case where a global command is wrap in (fail )
GenericNCommand::Fail(span, cmd) => {
let mut removed = self.remove_globals_cmd(*cmd);
let last = removed.pop().unwrap();
let boxed_last = Box::new(last);
let new_command = GenericNCommand::Fail(span, boxed_last);
removed.push(new_command);
removed
}
_ => vec![cmd.visit_exprs(&mut replace_global_vars)],
}
}
Expand Down
31 changes: 13 additions & 18 deletions src/gj.rs
Original file line number Diff line number Diff line change
Expand Up @@ -748,27 +748,22 @@ impl EGraph {
}
}

let do_seminaive = self.seminaive;
// for the later atoms, we consider everything
let mut timestamp_ranges =
vec![0..u32::MAX; cq.query.funcs().collect::<Vec<_>>().len()];
if do_seminaive {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe we still want to keep the -naive flag as well as the code here, so the user can still do naive evaluation (useful for debugging, also have a different semantics than semi-naive for "unsafe" egglog).

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I agree that we should probably keep naive evaluation

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we should keep it. It is unhelpful and adds complexity to the later passes as they need to support the naive semantics correctly. I am also against keeping it as a use-at-your-own-risk feature.

For Egglog users, if you don't use delete, semi-naive and naive are indistinguishable, so it is unhelpful for debugging. If you use delete, then you care much about performance, and there's no point in using naive. Even when you debug with unsafe features, you should probably debug the semi-naive case instead because that's what you want.

For Egglog developers, I see some value in being a sanity check for ensuring semi-naive is implemented correctly. But we are not doing this now, and it can also be done through stronger end-to-end test cases.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That was convincing to me. I've never personally used it before
@yihozhang what do you think?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What complexity does this add to later passes? I thought the only difference between seminaive and naive is that in the seminaive case, we split the original query into many small queries depending on the timestamps (i.e., what this code snippet does).

I strongly recommend that we keep the naive evaluation. We can view semi-naive as an optimization of the naive evaluation, and this optimization is not always semantic-preserving, when given bizarre programs that violate certain assumptions. Examples include

  • rules that use extract / user-defined primitives
  • rules where the merge function is not associative or idempotent

I'm also not confident that our semi-naive is implemented correctly- do we really update timestamp every time we update the table? I just looked at table.rs and it seems we don't update the timestamp for at least get_mut. The naive evaluation serves as a ground truth for this purpose. Personally, when I am debugging a primitive I wrote, the first thing I do is to disable semi-naive evaluation.

Copy link
Collaborator Author

@FTRobbin FTRobbin Nov 26, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we keep the naive flag, either we need to split the latter passes into two, which is unlikely, or each piece of downstream code must support both naive and semi-naive. I am skeptical about the claim that semi-naive code would just work for naive. For one thing, I don't see how semi-naive can be implemented as pure syntactic rewrites. As you pointed out, something more needs to happen to the timestamps in the semi-naive case. And yet, the naive flag is not used anywhere else in the codebase.

There are cases where the two give different semantics. However, the naive semantics is not more helpful to the users in those cases because they still need semi-naive to work in the end.

For your last point: Firstly, you still need to debug your new primitive for semi-naive. Secondly, I will only trust naive evaluation as a ground truth if it is well supported with a clear separation between the two semantics. Relying on your program to be tested to produce the test output is a terrible idea to me.

However, I do think this discussion raised a significant concern about the correctness of Egglog. We should investigate the issue.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Conclusion: Keep

  • Not comprising the comfort of -naive for a smaller core
  • Too much effort to actually implement -naive, we settle for the timestamp hack
  • Reconsider when merging the new backend

for (atom_i, _atom) in cq.query.funcs().enumerate() {
timestamp_ranges[atom_i] = timestamp..u32::MAX;

self.gj_for_atom(
Some(atom_i),
&timestamp_ranges,
cq,
include_subsumed,
&mut f,
);
// now we can fix this atom to be "old stuff" only
// range is half-open; timestamp is excluded
timestamp_ranges[atom_i] = 0..timestamp;
}
} else {
self.gj_for_atom(None, &timestamp_ranges, cq, include_subsumed, &mut f);
for (atom_i, _atom) in cq.query.funcs().enumerate() {
timestamp_ranges[atom_i] = timestamp..u32::MAX;

self.gj_for_atom(
Some(atom_i),
&timestamp_ranges,
cq,
include_subsumed,
&mut f,
);
// now we can fix this atom to be "old stuff" only
// range is half-open; timestamp is excluded
timestamp_ranges[atom_i] = 0..timestamp;
}
} else if let Some((mut ctx, program, _)) = Context::new(self, cq, &[], include_subsumed) {
let mut meausrements = HashMap::<usize, Vec<usize>>::default();
Expand Down
5 changes: 1 addition & 4 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -436,7 +436,6 @@ pub struct EGraph {
timestamp: u32,
pub run_mode: RunMode,
pub fact_directory: Option<PathBuf>,
pub seminaive: bool,
type_info: TypeInfo,
extract_report: Option<ExtractReport>,
/// The run report for the most recent run of a schedule.
Expand All @@ -459,7 +458,6 @@ impl Default for EGraph {
run_mode: RunMode::Normal,
interactive_mode: false,
fact_directory: None,
seminaive: true,
extract_report: None,
recent_run_report: None,
overall_run_report: Default::default(),
Expand Down Expand Up @@ -1408,8 +1406,7 @@ impl EGraph {
}

fn process_command(&mut self, command: Command) -> Result<Vec<ResolvedNCommand>, Error> {
let program =
desugar::desugar_program(vec![command], &mut self.symbol_gen, self.seminaive)?;
let program = desugar::desugar_program(vec![command], &mut self.symbol_gen)?;

let program = self
.type_info
Expand Down
3 changes: 0 additions & 3 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,6 @@ struct Args {
#[clap(short = 'F', long)]
fact_directory: Option<PathBuf>,
#[clap(long)]
naive: bool,
#[clap(long)]
desugar: bool,
#[clap(long)]
resugar: bool,
Expand Down Expand Up @@ -101,7 +99,6 @@ fn main() {
let mut egraph = EGraph::default();
egraph.set_reserved_symbol(args.reserved_symbol.clone().into());
egraph.fact_directory.clone_from(&args.fact_directory);
egraph.seminaive = !args.naive;
egraph.run_mode = args.show;
egraph
};
Expand Down
61 changes: 61 additions & 0 deletions src/typechecking.rs
Original file line number Diff line number Diff line change
Expand Up @@ -388,13 +388,72 @@ impl TypeInfo {
let body: Vec<ResolvedFact> = assignment.annotate_facts(&mapped_query, self);
let actions: ResolvedActions = assignment.annotate_actions(&mapped_action, self)?;

Self::check_lookup_actions(&actions)?;

Ok(ResolvedRule {
span: span.clone(),
body,
head: actions,
})
}

fn check_lookup_expr(expr: &GenericExpr<ResolvedCall, ResolvedVar>) -> Result<(), TypeError> {
match expr {
GenericExpr::Call(span, head, args) => {
match head {
ResolvedCall::Func(t) => {
// Only allowed to lookup constructor
if !t.is_datatype {
Err(TypeError::LookupInRuleDisallowed(
head.to_symbol(),
span.clone(),
))
} else {
Ok(())
}
}
ResolvedCall::Primitive(_) => Ok(()),
}?;
for arg in args.iter() {
Self::check_lookup_expr(arg)?
}
Ok(())
}
_ => Ok(()),
}
}

fn check_lookup_actions(actions: &ResolvedActions) -> Result<(), TypeError> {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You could use map_exprs and/or visit_exprs here to make this check easier

for action in actions.iter() {
match action {
GenericAction::Let(_, _, rhs) => Self::check_lookup_expr(rhs),
GenericAction::Set(_, _, args, rhs) => {
for arg in args.iter() {
Self::check_lookup_expr(arg)?
}
Self::check_lookup_expr(rhs)
}
GenericAction::Union(_, lhs, rhs) => {
Self::check_lookup_expr(lhs)?;
Self::check_lookup_expr(rhs)
}
GenericAction::Change(_, _, _, args) => {
for arg in args.iter() {
Self::check_lookup_expr(arg)?
}
Ok(())
}
GenericAction::Extract(_, expr, variants) => {
Self::check_lookup_expr(expr)?;
Self::check_lookup_expr(variants)
}
GenericAction::Panic(..) => Ok(()),
GenericAction::Expr(_, expr) => Self::check_lookup_expr(expr),
}?
}
Ok(())
}

fn typecheck_facts(
&self,
symbol_gen: &mut SymbolGen,
Expand Down Expand Up @@ -516,6 +575,8 @@ pub enum TypeError {
InferenceFailure(Expr),
#[error("{1}\nVariable {0} was already defined")]
AlreadyDefined(Symbol, Span),
#[error("{1}\nValue lookup of non-constructor function {0} in rule is disallowed.")]
LookupInRuleDisallowed(Symbol, Span),
#[error("All alternative definitions considered failed\n{}", .0.iter().map(|e| format!(" {e}\n")).collect::<Vec<_>>().join(""))]
AllAlternativeFailed(Vec<TypeError>),
}
Expand Down
14 changes: 7 additions & 7 deletions tests/array.egg
Original file line number Diff line number Diff line change
Expand Up @@ -20,21 +20,21 @@
(relation neq (Math Math))

(rule ((neq x y))
((neq y x)))
((set (neq y x) ())))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh I see, now relations have to be set. My opinion is that we shouldn't make users use set for relations.

I think the long-term plan is for us to desugar relations to be constructors so people don't have to do this. You could either implement that plan in this PR, or desugar relations to a set for now and do that in another PR.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think I'll implement the desugaring in this PR. My impression is that it is still up to discussion what it should be desugared into. Performance overhead aside, desugaring into a constructor might not be ideal because that makes it more expressive than needed. For instance, now you can union two edges in the relation or have a relation as the codomain of a function.

Copy link
Member

@oflatt oflatt Nov 25, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's a good point- we would have to disallow unioning two relation terms.
You can't have a relation as the codomain of a function because the sort would be fresh (no way to get a handle on it)

Relation desugaring aside, I don't think we want to make this breaking change when we plan to fix it right away. I guess that means this PR is blocked by solving the relation issue

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's make functions whose output is Unit a special case: Such functions have default value () and an implicit merge function.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think that would work. I think the plan is to get rid of default values altogether. Desugared or not.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I meant getting rid of the default field/keyword, but handle unit specially (similar to how EqSort is handled specially).

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Conclusions:

  • Conceptually: 3 function subtypes
    • Constructor | Relation | Custom
    • Operates differently
  • Syntax
    • Keeping the status quo
      • Because there is not enough reason to change it
      • The decision is final for the June vision
    • Adding constructor keyword
      • Only relation if declared as relation
      • Only constructor if declared as constructor
  • Semantics/implementation
    • Everything still uses the same function data structure under the hood
    • Only relation and constructor are allowed on RHS
      • It adds the relation/creates the term
    • Union requires both arguments to be the same EqSort
    • Set requires the head to be a custom function


(rule ((neq x x))
((panic "query (neq x x) found something equal to itself")))


; injectivity rules take not equal to not equal.
(rule ((neq x y) (= (add x z) e))
((neq (add x z) (add y z))))
((set (neq (add x z) (add y z)) ())))
(rule ((= (add x (Num i)) e) (!= i 0))
((neq e x)))
((set (neq e x) ())))


(rule ((= (Num a) n1) (= (Num b) n2) (!= a b))
((neq n1 n2)))
((set (neq n1 n2) ())))

; select gets from store
(rewrite (select (store mem i e) i) e)
Expand All @@ -59,9 +59,9 @@
(let r3 (Var "r3"))
(let mem1 (AVar "mem1"))

(neq r1 r2)
(neq r2 r3)
(neq r1 r3)
(set (neq r1 r2) ())
(set (neq r2 r3) ())
(set (neq r1 r3) ())
(let test1 (select (store mem1 r1 (Num 42)) r1))
(let test2 (select (store mem1 r1 (Num 42)) (add r1 (Num 17))))
(let test3 (select (store (store mem1 (add r1 r2) (Num 1)) (add r2 r1) (Num 2)) (add r1 r3)))
Expand Down
Loading
Loading