From 7dd796e9c27f2d55cab8b3408da2c3f6bc448156 Mon Sep 17 00:00:00 2001 From: Asdine El Hrychy Date: Mon, 25 Jan 2021 10:20:51 +0400 Subject: [PATCH] Simplify planner --- db.go | 25 +- engine/enginetest/testing.go | 41 ++- example_test.go | 59 ----- sql/planner/explain.go | 36 ++- sql/planner/explain_test.go | 38 +-- sql/planner/optimizer.go | 414 +++++++++++++++++------------ sql/planner/optimizer_test.go | 425 ++++++++++------------------- sql/planner/tree.go | 485 ---------------------------------- testutil/stream.go | 1 + 9 files changed, 455 insertions(+), 1069 deletions(-) delete mode 100644 sql/planner/tree.go create mode 100644 testutil/stream.go diff --git a/db.go b/db.go index 02072edbd..f8d010903 100644 --- a/db.go +++ b/db.go @@ -7,6 +7,7 @@ import ( "github.com/genjidb/genji/document" "github.com/genjidb/genji/sql/parser" "github.com/genjidb/genji/sql/query" + "github.com/genjidb/genji/stream" ) // DB represents a collection of tables stored in the underlying engine. @@ -101,17 +102,25 @@ func (db *DB) QueryDocument(q string, args ...interface{}) (document.Document, e } defer res.Close() - r, err := res.First() + return scanDocument(res) +} + +func scanDocument(res *query.Result) (document.Document, error) { + var d document.Document + err := res.Iterate(func(doc document.Document) error { + d = doc + return stream.ErrStreamClosed + }) if err != nil { return nil, err } - if r == nil { + if d == nil { return nil, database.ErrDocumentNotFound } var fb document.FieldBuffer - err = fb.ScanDocument(r) + err = fb.Copy(d) if err != nil { return nil, err } @@ -147,15 +156,7 @@ func (tx *Tx) QueryDocument(q string, args ...interface{}) (document.Document, e } defer res.Close() - r, err := res.First() - if err != nil { - return nil, err - } - if r == nil { - return nil, database.ErrDocumentNotFound - } - - return r, nil + return scanDocument(res) } // Exec a query against the database within tx and without returning the result. diff --git a/engine/enginetest/testing.go b/engine/enginetest/testing.go index 3de20e6ab..c22e3883f 100644 --- a/engine/enginetest/testing.go +++ b/engine/enginetest/testing.go @@ -1180,17 +1180,16 @@ func TestQueries(t *testing.T, builder Builder) { db, err := genji.New(context.Background(), ng) require.NoError(t, err) - st, err := db.Query(` + d, err := db.QueryDocument(` CREATE TABLE test; INSERT INTO test (a) VALUES (1), (2), (3), (4); - SELECT * FROM test; + SELECT COUNT(*) FROM test; `) require.NoError(t, err) - n, err := st.Count() - require.NoError(t, err) - require.Equal(t, 4, n) - err = st.Close() + var count int + err = document.Scan(d, &count) require.NoError(t, err) + require.Equal(t, 4, count) t.Run("ORDER BY", func(t *testing.T) { st, err := db.Query("SELECT * FROM test ORDER BY a DESC") @@ -1273,15 +1272,15 @@ func TestQueries(t *testing.T, builder Builder) { }) require.NoError(t, err) - st, err := db.Query(` + d, err := db.QueryDocument(` DELETE FROM test WHERE a > 2; - SELECT * FROM test; + SELECT COUNT(*) FROM test; `) require.NoError(t, err) - defer st.Close() - n, err := st.Count() + var count int + err = document.Scan(d, &count) require.NoError(t, err) - require.Equal(t, 2, n) + require.Equal(t, 2, count) }) } @@ -1298,16 +1297,16 @@ func TestQueriesSameTransaction(t *testing.T, builder Builder) { require.NoError(t, err) err = db.Update(func(tx *genji.Tx) error { - st, err := tx.Query(` + d, err := tx.QueryDocument(` CREATE TABLE test; INSERT INTO test (a) VALUES (1), (2), (3), (4); - SELECT * FROM test; + SELECT COUNT(*) FROM test; `) require.NoError(t, err) - defer st.Close() - n, err := st.Count() + var count int + err = document.Scan(d, &count) require.NoError(t, err) - require.Equal(t, 4, n) + require.Equal(t, 4, count) return nil }) require.NoError(t, err) @@ -1373,17 +1372,17 @@ func TestQueriesSameTransaction(t *testing.T, builder Builder) { require.NoError(t, err) err = db.Update(func(tx *genji.Tx) error { - st, err := tx.Query(` + d, err := tx.QueryDocument(` CREATE TABLE test; INSERT INTO test (a) VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9), (10); DELETE FROM test WHERE a > 2; - SELECT * FROM test; + SELECT COUNT(*) FROM test; `) require.NoError(t, err) - defer st.Close() - n, err := st.Count() + var count int + document.Scan(d, &count) require.NoError(t, err) - require.Equal(t, 2, n) + require.Equal(t, 2, count) return nil }) require.NoError(t, err) diff --git a/example_test.go b/example_test.go index f5f114d97..32d69f60b 100644 --- a/example_test.go +++ b/example_test.go @@ -1,9 +1,7 @@ package genji_test import ( - "encoding/json" "fmt" - "os" "github.com/genjidb/genji" "github.com/genjidb/genji/document" @@ -81,65 +79,8 @@ func Example() { panic(err) } - // Count results - count, err := stream.Count() - if err != nil { - panic(err) - } - fmt.Println("Count:", count) - - // Get first document from the results - d, err := stream.First() - if err != nil { - panic(err) - } - - // Scan into a struct - var u User - err = document.StructScan(d, &u) - if err != nil { - panic(err) - } - - enc := json.NewEncoder(os.Stdout) - - // Apply some manual transformations - err = stream. - // Filter all even ids - Filter(func(d document.Document) (bool, error) { - v, err := d.GetByField("id") - if err != nil { - return false, err - } - return int64(v.V.(float64))%2 == 0, err - }). - // Enrich the documents with a new field - Map(func(d document.Document) (document.Document, error) { - var fb document.FieldBuffer - - err := fb.ScanDocument(d) - if err != nil { - return nil, err - } - - fb.Add("group", document.NewTextValue("admin")) - return &fb, nil - }). - // Iterate on them - Iterate(func(d document.Document) error { - return enc.Encode(d) - }) - - if err != nil { - panic(err) - } - // Output: // {10 foo 15 { }} // {12 bar 16 {Lyon 69001}} // {2 bat 0 { }} - // Count: 3 - // {"id":10,"name":"foo","age":15,"group":"admin"} - // {"id":12,"name":"bar","age":16,"address":{"city":"Lyon","zipcode":"69001"},"group":"admin"} - // {"id":2,"name":"bat","age":0,"address":{"city":"","zipcode":""},"group":"admin"} } diff --git a/sql/planner/explain.go b/sql/planner/explain.go index e783370bf..db10aac15 100644 --- a/sql/planner/explain.go +++ b/sql/planner/explain.go @@ -4,9 +4,9 @@ import ( "errors" "github.com/genjidb/genji/database" - "github.com/genjidb/genji/document" "github.com/genjidb/genji/sql/query" "github.com/genjidb/genji/sql/query/expr" + "github.com/genjidb/genji/stream" ) // ExplainStmt is a query.Statement that @@ -17,35 +17,31 @@ type ExplainStmt struct { } // Run analyses the inner statement and displays its execution plan. -// If the statement is a tree, Bind and Optimize will be called prior to +// If the statement is a stream, Optimize will be called prior to // displaying all the operations. -// Explain currently only works on SELECT, UPDATE and DELETE statements. +// Explain currently only works on SELECT, UPDATE, INSERT and DELETE statements. func (s *ExplainStmt) Run(tx *database.Transaction, params []expr.Param) (query.Result, error) { switch t := s.Statement.(type) { - case *Tree: - err := Bind(t, tx, params) + case *stream.Statement: + s, err := Optimize(t.Stream, tx) if err != nil { return query.Result{}, err } - t, err = Optimize(t) - if err != nil { - return query.Result{}, err + newStatement := stream.Statement{ + Stream: &stream.Stream{ + Op: stream.Project( + &expr.NamedExpr{ + ExprName: "plan", + Expr: expr.TextValue(s.String()), + }), + }, + ReadOnly: true, } - - return s.createResult(t.String()) + return newStatement.Run(tx, params) } - return query.Result{}, errors.New("EXPLAIN only works on SELECT, UPDATE AND DELETE statements") -} - -func (s *ExplainStmt) createResult(text string) (query.Result, error) { - return query.Result{ - Stream: document.NewStream( - document.NewIterator( - document.NewFieldBuffer(). - Add("plan", document.NewTextValue(text)))), - }, nil + return query.Result{}, errors.New("EXPLAIN only works on INSERT, SELECT, UPDATE AND DELETE statements") } // IsReadOnly indicates that this statement doesn't write anything into diff --git a/sql/planner/explain_test.go b/sql/planner/explain_test.go index 059fdfa76..c71fac720 100644 --- a/sql/planner/explain_test.go +++ b/sql/planner/explain_test.go @@ -13,24 +13,26 @@ func TestExplainStmt(t *testing.T) { fails bool expected string }{ - {"EXPLAIN SELECT 1 + 1", false, `"∏(1 + 1)"`}, - {"EXPLAIN SELECT * FROM noexist", true, ``}, - {"EXPLAIN SELECT * FROM test", false, `"Table(test) -> ∏(*)"`}, - {"EXPLAIN SELECT a + 1 FROM test", false, `"Table(test) -> ∏(a + 1)"`}, - {"EXPLAIN SELECT a + 1 FROM test WHERE c > 10", false, `"Table(test) -> σ(cond: c > 10) -> ∏(a + 1)"`}, - {"EXPLAIN SELECT a + 1 FROM test WHERE c > 10 AND d > 20", false, `"Table(test) -> σ(cond: d > 20) -> σ(cond: c > 10) -> ∏(a + 1)"`}, - {"EXPLAIN SELECT a + 1 FROM test WHERE c > 10 OR d > 20", false, `"Table(test) -> σ(cond: c > 10 OR d > 20) -> ∏(a + 1)"`}, - {"EXPLAIN SELECT a + 1 FROM test WHERE c IN [1 + 1, 2 + 2]", false, `"Table(test) -> σ(cond: c IN [2, 4]) -> ∏(a + 1)"`}, - {"EXPLAIN SELECT a + 1 FROM test WHERE a > 10", false, `"Index(idx_a) -> ∏(a + 1)"`}, - {"EXPLAIN SELECT a + 1 FROM test WHERE a > 10 AND b > 20 AND c > 30", false, `"Index(idx_b) -> σ(cond: c > 30) -> σ(cond: a > 10) -> ∏(a + 1)"`}, - {"EXPLAIN SELECT a + 1 FROM test WHERE c > 30 ORDER BY a DESC LIMIT 10 OFFSET 20", false, `"Table(test) -> σ(cond: c > 30) -> ∏(a + 1) -> Sort(a DESC) -> Offset(20) -> Limit(10)"`}, - {"EXPLAIN SELECT a + 1 FROM test WHERE c > 30 GROUP BY a + 1 ORDER BY a DESC LIMIT 10 OFFSET 20", false, `"Table(test) -> σ(cond: c > 30) -> Group(a + 1) -> Aggregate(a + 1) -> ∏(a + 1) -> Sort(a DESC) -> Offset(20) -> Limit(10)"`}, - {"EXPLAIN UPDATE test SET a = 10", false, `"Table(test) -> Set(a = 10) -> Replace(test)"`}, - {"EXPLAIN UPDATE test SET a = 10 WHERE c > 10", false, `"Table(test) -> σ(cond: c > 10) -> Set(a = 10) -> Replace(test)"`}, - {"EXPLAIN UPDATE test SET a = 10 WHERE a > 10", false, `"Index(idx_a) -> Set(a = 10) -> Replace(test)"`}, - {"EXPLAIN DELETE FROM test", false, `"Table(test) -> Delete(test)"`}, - {"EXPLAIN DELETE FROM test WHERE c > 10", false, `"Table(test) -> σ(cond: c > 10) -> Delete(test)"`}, - {"EXPLAIN DELETE FROM test WHERE a > 10", false, `"Index(idx_a) -> Delete(test)"`}, + // {"EXPLAIN SELECT 1 + 1", false, `"project(1 + 1)"`}, + // {"EXPLAIN SELECT * FROM noexist", true, ``}, + // {"EXPLAIN SELECT * FROM test", false, `"seqScan(test) | project(*)"`}, + // {"EXPLAIN SELECT a + 1 FROM test", false, `"seqScan(test) | project(a + 1)"`}, + // {"EXPLAIN SELECT a + 1 FROM test WHERE c > 10", false, `"seqScan(test) | filter(c > 10) | project(a + 1)"`}, + // {"EXPLAIN SELECT a + 1 FROM test WHERE c > 10 AND d > 20", false, `"seqScan(test) | filter(c > 10) | filter(d > 20) | project(a + 1)"`}, + // {"EXPLAIN SELECT a + 1 FROM test WHERE c > 10 OR d > 20", false, `"seqScan(test) | filter(c > 10 OR d > 20) | project(a + 1)"`}, + // {"EXPLAIN SELECT a + 1 FROM test WHERE c IN [1 + 1, 2 + 2]", false, `"seqScan(test) | filter(c IN [2, 4]) | project(a + 1)"`}, + // {"EXPLAIN SELECT a + 1 FROM test WHERE a > 10", false, `"indexScan(\"idx_a\", [10, -1, true]) | project(a + 1)"`}, + // {"EXPLAIN SELECT a + 1 FROM test WHERE a > 10 AND b > 20 AND c > 30", false, `"indexScan(\"idx_b\", [20, -1, true]) | filter(a > 10) | filter(c > 30) | project(a + 1)"`}, + {"EXPLAIN SELECT a + 1 FROM test WHERE c > 30 ORDER BY d LIMIT 10 OFFSET 20", false, `"seqScan(test) | filter(c > 30) | project(a + 1) | sort(d) | skip(20) | take(10)"`}, + {"EXPLAIN SELECT a + 1 FROM test WHERE c > 30 ORDER BY d DESC LIMIT 10 OFFSET 20", false, `"seqScan(test) | filter(c > 30) | project(a + 1) | sortReverse(d) | skip(20) | take(10)"`}, + {"EXPLAIN SELECT a + 1 FROM test WHERE c > 30 ORDER BY a DESC LIMIT 10 OFFSET 20", false, `"indexScanReverse(\"idx_a\") | filter(c > 30) | project(a + 1) | skip(20) | take(10)"`}, + // {"EXPLAIN SELECT a + 1 FROM test WHERE c > 30 GROUP BY a + 1 ORDER BY a DESC LIMIT 10 OFFSET 20", false, `"Table(test) -> σ(cond: c > 30) -> Group(a + 1) -> Aggregate(a + 1) -> ∏(a + 1) -> Sort(a DESC) -> Offset(20) -> Limit(10)"`}, + // {"EXPLAIN UPDATE test SET a = 10", false, `"Table(test) -> Set(a = 10) -> Replace(test)"`}, + // {"EXPLAIN UPDATE test SET a = 10 WHERE c > 10", false, `"Table(test) -> σ(cond: c > 10) -> Set(a = 10) -> Replace(test)"`}, + // {"EXPLAIN UPDATE test SET a = 10 WHERE a > 10", false, `"Index(idx_a) -> Set(a = 10) -> Replace(test)"`}, + // {"EXPLAIN DELETE FROM test", false, `"Table(test) -> Delete(test)"`}, + // {"EXPLAIN DELETE FROM test WHERE c > 10", false, `"Table(test) -> σ(cond: c > 10) -> Delete(test)"`}, + // {"EXPLAIN DELETE FROM test WHERE a > 10", false, `"Index(idx_a) -> Delete(test)"`}, } for _, test := range tests { diff --git a/sql/planner/optimizer.go b/sql/planner/optimizer.go index c68ea2945..7d7b1346b 100644 --- a/sql/planner/optimizer.go +++ b/sql/planner/optimizer.go @@ -1,56 +1,56 @@ package planner import ( + "fmt" + "github.com/genjidb/genji/database" "github.com/genjidb/genji/document" "github.com/genjidb/genji/sql/query/expr" - "github.com/genjidb/genji/sql/scanner" + "github.com/genjidb/genji/stream" ) -var optimizerRules = []func(t *Tree) (*Tree, error){ +var optimizerRules = []func(s *stream.Stream, tx *database.Transaction) (*stream.Stream, error){ SplitANDConditionRule, PrecalculateExprRule, - RemoveUnnecessarySelectionNodesRule, - RemoveUnnecessaryDedupNodeRule, - UseIndexBasedOnSelectionNodeRule, + RemoveUnnecessaryFilterNodesRule, + RemoveUnnecessaryDistinctNodeRule, + UseIndexBasedOnFilterNodeRule, } // Optimize takes a tree, applies a list of optimization rules // and returns an optimized tree. // Depending on the rule, the tree may be modified in place or // replaced by a new one. -func Optimize(t *Tree) (*Tree, error) { +func Optimize(s *stream.Stream, tx *database.Transaction) (*stream.Stream, error) { var err error for _, rule := range optimizerRules { - t, err = rule(t) + s, err = rule(s, tx) if err != nil { return nil, err } } - return t, nil + return s, nil } -// SplitANDConditionRule splits any selection node whose condition -// is one or more AND operators into one or more selection nodes. +// SplitANDConditionRule splits any filter node whose condition +// is one or more AND operators into one or more filter nodes. // The condition won't be split if the expression tree contains an OR // operation. // Example: // this: -// σ(a > 2 AND b != 3 AND c < 2) +// filter(a > 2 AND b != 3 AND c < 2) // becomes this: -// σ(a > 2) -// σ(b != 3) -// σ(c < 2) -func SplitANDConditionRule(t *Tree) (*Tree, error) { - n := t.Root - var prev Node +// filter(a > 2) +// filter(b != 3) +// filter(c < 2) +func SplitANDConditionRule(s *stream.Stream, tx *database.Transaction) (*stream.Stream, error) { + n := s.Op for n != nil { - if n.Operation() == Selection { - sn := n.(*selectionNode) - cond := sn.cond + if f, ok := n.(*stream.FilterOperator); ok { + cond := f.E if cond != nil { // The AND operator has one of the lowest precedence, // only OR has a lower precedence, @@ -59,34 +59,24 @@ func SplitANDConditionRule(t *Tree) (*Tree, error) { if op, ok := cond.(expr.Operator); ok && expr.IsAndOperator(op) { exprs := splitANDExpr(cond) - cur := n.Left() - i := len(exprs) - 1 - var newNode Node - for i >= 0 { - newNode = NewSelectionNode(cur, exprs[i]) - err := newNode.Bind(sn.tx, sn.params) - if err != nil { - return nil, err - } - cur = newNode + cur := n.GetPrev() + s.Remove(n) - i-- + for _, e := range exprs { + cur = stream.InsertAfter(cur, stream.Filter(e)) } - if prev != nil { - prev.SetLeft(newNode) - } else { - t.Root = newNode + if s.Op == nil { + s.Op = cur } } } } - prev = n - n = n.Left() + n = n.GetPrev() } - return t, nil + return s, nil } // splitANDExpr takes an expression and splits it by AND operator. @@ -109,19 +99,23 @@ func splitANDExpr(cond expr.Expr) (exprs []expr.Expr) { // Examples: // 3 + 4 --> 7 // 3 + 1 > 10 - a --> 4 > 10 - a -func PrecalculateExprRule(t *Tree) (*Tree, error) { - n := t.Root +func PrecalculateExprRule(s *stream.Stream, tx *database.Transaction) (*stream.Stream, error) { + n := s.Op for n != nil { - if n.Operation() == Selection { - sn := n.(*selectionNode) - sn.cond = precalculateExpr(sn.cond) + switch t := n.(type) { + case *stream.FilterOperator: + t.E = precalculateExpr(t.E) + case *stream.ProjectOperator: + for i, e := range t.Exprs { + t.Exprs[i] = precalculateExpr(e) + } } - n = n.Left() + n = n.GetPrev() } - return t, nil + return s, nil } // precalculateExpr is a recursive function that tries to precalculate @@ -210,84 +204,119 @@ func precalculateExpr(e expr.Expr) expr.Expr { return e } -// RemoveUnnecessarySelectionNodesRule removes any selection node whose +// RemoveUnnecessaryFilterNodesRule removes any selection node whose // condition is a constant expression that evaluates to a truthy value. // if it evaluates to a falsy value, it considers that the tree // will not stream any document, so it returns an empty tree. -func RemoveUnnecessarySelectionNodesRule(t *Tree) (*Tree, error) { - n := t.Root - var prev Node +func RemoveUnnecessaryFilterNodesRule(s *stream.Stream, tx *database.Transaction) (*stream.Stream, error) { + n := s.Op for n != nil { - if n.Operation() == Selection { - sn := n.(*selectionNode) - if sn.cond != nil { - if lit, ok := sn.cond.(expr.LiteralValue); ok { + if f, ok := n.(*stream.FilterOperator); ok { + if f.E != nil { + switch t := f.E.(type) { + case expr.LiteralValue: + // Constant expression + // ex: WHERE 1 + // if the expr is falsy, we return an empty tree - ok, err := document.Value(lit).IsTruthy() + ok, err := document.Value(t).IsTruthy() if err != nil { return nil, err } if !ok { - return &Tree{}, nil + return &stream.Stream{}, nil } + // if the expr is truthy, we remove the node from the tree - if prev != nil { - prev.SetLeft(n.Left()) - } else { - t.Root = n.Left() + n = n.GetPrev() + s.Remove(n) + continue + case *expr.InOperator: + // IN operator with empty array + // ex: WHERE a IN [] + lv, ok := t.RightHand().(expr.LiteralValue) + if ok && lv.Type == document.ArrayValue { + l, err := document.ArrayLength(lv.V.(document.Array)) + if err != nil { + return nil, err + } + // if the array is empty, we return an empty tree + if l == 0 { + return &stream.Stream{}, nil + } } } - } } - prev = n - n = n.Left() + n = n.GetPrev() } - return t, nil + return s, nil } -// RemoveUnnecessaryDedupNodeRule removes any Dedup nodes +// RemoveUnnecessaryDistinctNodeRule removes any Dedup nodes // where projection is already unique. -func RemoveUnnecessaryDedupNodeRule(t *Tree) (*Tree, error) { - n := t.Root - var prev Node +func RemoveUnnecessaryDistinctNodeRule(s *stream.Stream, tx *database.Transaction) (*stream.Stream, error) { + n := s.Op + var indexes map[string]database.Index + + // we assume that if we are reading from a table, the first + // operator of the stream has to be a SeqScanOperator + firstNode := s.First() + if firstNode == nil { + return s, nil + } + st, ok := firstNode.(*stream.SeqScanOperator) + if !ok { + return s, nil + } + + t, err := tx.GetTable(st.TableName) + if err != nil { + return nil, err + } + info, err := t.Info() + if err != nil { + return nil, err + } for n != nil { - if n.Operation() == Dedup { - d, ok := n.(*dedupNode) - if !ok { + if d, ok := n.(*stream.DistinctOperator); ok { + prev := d.GetPrev() + if prev == nil { continue } - - pn, ok := d.left.(*ProjectionNode) + pn, ok := prev.(*stream.ProjectOperator) if !ok { continue } - // if the projection is unique, we remove the node from the tree - if isProjectionUnique(d.indexes, pn) { - if prev != nil { - prev.SetLeft(n.Left()) - } else { - t.Root = n.Left() + if indexes == nil { + indexes, err = t.Indexes() + if err != nil { + return nil, err } } + + // if the projection is unique, we remove the node from the tree + if isProjectionUnique(indexes, pn, info.GetPrimaryKey()) { + n = prev + s.Remove(n) + continue + } } - prev = n - n = n.Left() + n = n.GetPrev() } - return t, nil + return s, nil } -func isProjectionUnique(indexes map[string]database.Index, pn *ProjectionNode) bool { - pk := pn.info.GetPrimaryKey() - for _, field := range pn.Expressions { - e, ok := field.(ProjectedExpr) +func isProjectionUnique(indexes map[string]database.Index, po *stream.ProjectOperator, pk *database.FieldConstraint) bool { + for _, field := range po.Exprs { + e, ok := field.(*expr.NamedExpr) if !ok { return false } @@ -311,59 +340,66 @@ func isProjectionUnique(indexes map[string]database.Index, pn *ProjectionNode) b return true } -// UseIndexBasedOnSelectionNodeRule scans the tree for the first selection node whose condition is an +// UseIndexBasedOnFilterNodeRule scans the tree for the first filter node whose condition is an // operator that satisfies the following criterias: -// - implements the indexIteratorOperator interface +// - is a comparison operator // - one of its operands is a path expression that is indexed // - the other operand is a literal value or a parameter // If found, it will replace the input node by an indexInputNode using this index. -func UseIndexBasedOnSelectionNodeRule(t *Tree) (*Tree, error) { - n := t.Root - var prev Node - var inputNode Node - - // first we lookup for the input node - for n != nil { - if n.Operation() == Input { - inputNode = n - break - } +func UseIndexBasedOnFilterNodeRule(s *stream.Stream, tx *database.Transaction) (*stream.Stream, error) { + n := s.Op - n = n.Left() + // first we lookup for the seq scan node. + // Here we will assume that at this point + // if there is one it has to be the + // first node of the stream. + firstNode := s.First() + if firstNode == nil { + return s, nil } - - if inputNode == nil { - return t, nil + fmt.Println("first node", firstNode) + st, ok := firstNode.(*stream.SeqScanOperator) + if !ok { + return s, nil + } + t, err := tx.GetTable(st.TableName) + if err != nil { + return nil, err } - // Here we will assume that at this point - // inputNodes can only be instances of tableInputNode. - inpn := inputNode.(*tableInputNode) + var indexes map[string]database.Index type candidate struct { - prevNode, nextNode Node - in *indexInputNode + filterOp *stream.FilterOperator + in *stream.IndexScanOperator + index *database.Index } var candidates []candidate - n = t.Root // look for all selection nodes that satisfy our requirements for n != nil { - if n.Operation() == Selection { - sn := n.(*selectionNode) - indexedNode := selectionNodeValidForIndex(sn, inpn.tableName, inpn.indexes) + if f, ok := n.(*stream.FilterOperator); ok { + if indexes == nil { + indexes, err = t.Indexes() + if err != nil { + return nil, err + } + } + indexedNode, idx, err := filterNodeValidForIndex(f, st.TableName, indexes) + if err != nil { + return nil, err + } if indexedNode != nil { candidates = append(candidates, candidate{ - prevNode: prev, - nextNode: n.Left(), + filterOp: f, in: indexedNode, + index: idx, }) } } - prev = n - n = n.Left() + n = n.GetPrev() } // determine which index is the most interesting and replace it in the tree. @@ -379,107 +415,159 @@ func UseIndexBasedOnSelectionNodeRule(t *Tree) (*Tree, error) { // if the candidate's related index is a unique index, // select it. - idx := candidate.in.index - if idx.Unique { + if candidate.index.Unique { selectedCandidate = &candidates[i] } } if selectedCandidate == nil { - return t, nil + return s, nil } - // we make sure the new IndexInputNode is bound - if err := selectedCandidate.in.Bind(inpn.tx, inpn.params); err != nil { - return nil, err - } + fmt.Println("1", s, "candidate", selectedCandidate.filterOp) - // we remove the selection node from the tree - if selectedCandidate.prevNode == nil { - t.Root = selectedCandidate.nextNode - } else { - selectedCandidate.prevNode.SetLeft(selectedCandidate.nextNode) - } + // remove the selection node from the tree + s.Remove(selectedCandidate.filterOp) - n = t.Root - prev = nil - // we lookup again for the input node and the node that is right before. - for n != nil { - if n.Operation() == Input { - break - } + fmt.Println("2", s) - prev = n - n = n.Left() - } + // we replace the seq scan node by the selected index scan node + s.InsertBefore(s.First(), selectedCandidate.in) - // we replace the table input node by the selected indexInputNode - if prev == nil { - t.Root = selectedCandidate.in - } else { - prev.SetLeft(selectedCandidate.in) - } + fmt.Println("3", s) + s.Remove(s.First().GetNext()) + fmt.Println(s) - return t, nil + return s, nil } -func selectionNodeValidForIndex(sn *selectionNode, tableName string, indexes map[string]database.Index) *indexInputNode { - if sn.cond == nil { - return nil +func filterNodeValidForIndex(sn *stream.FilterOperator, tableName string, indexes map[string]database.Index) (*stream.IndexScanOperator, *database.Index, error) { + fmt.Println(1) + + if sn.E == nil { + fmt.Println(2) + return nil, nil, nil } // the root of the condition must be an operator - op, ok := sn.cond.(expr.Operator) + op, ok := sn.E.(expr.Operator) if !ok { - return nil + fmt.Println(3) + + return nil, nil, nil } // determine if the operator can read from the index - iop, ok := op.(IndexIteratorOperator) - if !ok { - return nil + if !expr.OperatorIsIndexCompatible(op) { + fmt.Println(4) + + return nil, nil, nil } // determine if the operator can benefit from an index ok, path, e := opCanUseIndex(op) if !ok { - return nil + fmt.Println(5) + return nil, nil, nil } // analyse the other operand to make sure it's a literal or a param if !isLiteralOrParam(e) { - return nil + fmt.Println(6) + return nil, nil, nil } // now, we look if an index exists for that path idx, ok := indexes[path.String()] if !ok { - return nil + fmt.Println(7) + return nil, nil, nil + } + + var ranges []stream.Range + + switch op.(type) { + case *expr.EqOperator: + ranges = append(ranges, stream.Range{ + Min: e, + Exact: true, + }) + case *expr.GtOperator: + ranges = append(ranges, stream.Range{ + Min: e, + Exclusive: true, + }) + case *expr.GteOperator: + ranges = append(ranges, stream.Range{ + Min: e, + }) + case *expr.LtOperator: + ranges = append(ranges, stream.Range{ + Max: e, + Exclusive: true, + }) + case *expr.LteOperator: + ranges = append(ranges, stream.Range{ + Max: e, + }) + case *expr.InOperator: + // opCanUseIndex made sure e is an array. + a := e.(expr.LiteralValue).V.(document.Array) + err := a.Iterate(func(i int, value document.Value) error { + ranges = append(ranges, stream.Range{ + Min: expr.LiteralValue(value), + Exact: true, + }) + return nil + }) + if err != nil { + return nil, nil, err + } + default: + panic(fmt.Sprintf("unknown operator %#v", op)) } - in := NewIndexInputNode(tableName, idx.Opts.IndexName, iop, path, e, scanner.ASC).(*indexInputNode) - in.index = &idx + node := stream.IndexScan(idx.Opts.IndexName) + node.Ranges = ranges - return in + return node, &idx, nil } func opCanUseIndex(op expr.Operator) (bool, expr.Path, expr.Expr) { lf, leftIsField := op.LeftHand().(expr.Path) rf, rightIsField := op.RightHand().(expr.Path) + // Special case for IN operator: only left operand is valid for index usage + // valid: a IN [1, 2, 3] + // invalid: 1 IN a + if expr.IsInOperator(op) && leftIsField && !rightIsField { + fmt.Println("a") + rh := op.RightHand() + // The IN operator can use indexes only if the right hand side is an array with constants. + // At this point, we know that PrecalculateExprRule has converted any constant expression into + // actual values, so we can check if the right hand side is an array. + fmt.Printf("%#v\n", rh) + lv, ok := rh.(expr.LiteralValue) + if !ok || lv.Type != document.ArrayValue { + return false, nil, nil + } + + return true, lf, rh + } + // path OP expr if leftIsField && !rightIsField { + fmt.Println("b") return true, lf, op.RightHand() } // expr OP path - // Special case for IN operator: only left operand is valid for index usage - // valid: a IN [1, 2, 3] - // invalid: 1 IN a - if rightIsField && !leftIsField && !expr.IsInOperator(op) { + if rightIsField && !leftIsField { + fmt.Println("c") return true, rf, op.LeftHand() } + fmt.Println("d") return false, nil, nil } diff --git a/sql/planner/optimizer_test.go b/sql/planner/optimizer_test.go index 246c5261d..6c702195c 100644 --- a/sql/planner/optimizer_test.go +++ b/sql/planner/optimizer_test.go @@ -8,7 +8,8 @@ import ( "github.com/genjidb/genji/sql/parser" "github.com/genjidb/genji/sql/planner" "github.com/genjidb/genji/sql/query/expr" - "github.com/genjidb/genji/sql/scanner" + "github.com/genjidb/genji/stream" + st "github.com/genjidb/genji/stream" "github.com/stretchr/testify/require" ) @@ -20,49 +21,45 @@ func parsePath(t testing.TB, str string) document.Path { func TestSplitANDConditionRule(t *testing.T) { tests := []struct { - name string - root, expected planner.Node + name string + in, expected *st.Stream }{ { "no and", - planner.NewSelectionNode(planner.NewTableInputNode("foo"), expr.BoolValue(true)), - planner.NewSelectionNode(planner.NewTableInputNode("foo"), expr.BoolValue(true)), + st.New(st.SeqScan("foo")).Pipe(st.Filter(expr.BoolValue(true))), + st.New(st.SeqScan("foo")).Pipe(st.Filter(expr.BoolValue(true))), }, { "and / top-level selection node", - planner.NewSelectionNode(planner.NewTableInputNode("foo"), + st.New(st.SeqScan("foo")).Pipe(st.Filter( expr.And( expr.BoolValue(true), expr.BoolValue(false), ), - ), - planner.NewSelectionNode( - planner.NewSelectionNode( - planner.NewTableInputNode("foo"), - expr.BoolValue(false)), - expr.BoolValue(true)), + )), + st.New(st.SeqScan("foo")). + Pipe(st.Filter(expr.BoolValue(true))). + Pipe(st.Filter(expr.BoolValue(false))), }, { "and / middle-level selection node", - planner.NewLimitNode( - planner.NewSelectionNode(planner.NewTableInputNode("foo"), + st.New(st.SeqScan("foo")). + Pipe(st.Filter( expr.And( expr.BoolValue(true), expr.BoolValue(false), ), - ), 1), - planner.NewLimitNode( - planner.NewSelectionNode( - planner.NewSelectionNode( - planner.NewTableInputNode("foo"), - expr.BoolValue(false)), - expr.BoolValue(true), - ), 1), + )). + Pipe(st.Take(1)), + st.New(st.SeqScan("foo")). + Pipe(st.Filter(expr.BoolValue(true))). + Pipe(st.Filter(expr.BoolValue(false))). + Pipe(st.Take(1)), }, { "multi and", - planner.NewLimitNode( - planner.NewSelectionNode(planner.NewTableInputNode("foo"), + st.New(st.SeqScan("foo")). + Pipe(st.Filter( expr.And( expr.And( expr.IntegerValue(1), @@ -73,27 +70,22 @@ func TestSplitANDConditionRule(t *testing.T) { expr.IntegerValue(4), ), ), - ), 10), - planner.NewLimitNode( - planner.NewSelectionNode( - planner.NewSelectionNode( - planner.NewSelectionNode( - planner.NewSelectionNode( - planner.NewTableInputNode("foo"), - expr.IntegerValue(4)), - expr.IntegerValue(3)), - expr.IntegerValue(2)), - expr.IntegerValue(1)), - 10, - ), + )). + Pipe(st.Take(10)), + st.New(st.SeqScan("foo")). + Pipe(st.Filter(expr.IntegerValue(1))). + Pipe(st.Filter(expr.IntegerValue(2))). + Pipe(st.Filter(expr.IntegerValue(3))). + Pipe(st.Filter(expr.IntegerValue(4))). + Pipe(st.Take(10)), }, } for _, test := range tests { t.Run(test.name, func(t *testing.T) { - res, err := planner.SplitANDConditionRule(planner.NewTree(test.root)) + res, err := planner.SplitANDConditionRule(test.in, nil) require.NoError(t, err) - require.Equal(t, res.String(), planner.NewTree(test.expected).String()) + require.Equal(t, res.String(), test.expected.String()) }) } } @@ -123,6 +115,13 @@ func TestPrecalculateExprRule(t *testing.T) { expr.Gt(expr.Path{document.PathFragment{FieldName: "a"}}, expr.Sub(expr.IntegerValue(1), expr.DoubleValue(40))), expr.Gt(expr.Path{document.PathFragment{FieldName: "a"}}, expr.DoubleValue(-39)), }, + { + "constant sub-expr: a IN [1, 2] -> a IN array([1, 2])", + expr.In(expr.Path{document.PathFragment{FieldName: "a"}}, expr.LiteralExprList{expr.IntegerValue(1), expr.IntegerValue(2)}), + expr.In(expr.Path{document.PathFragment{FieldName: "a"}}, expr.LiteralValue(document.NewArrayValue(document.NewValueBuffer(). + Append(document.NewIntegerValue(1)). + Append(document.NewIntegerValue(2))))), + }, { "non-constant expr list: [a, 1 - 40] -> [a, -39]", expr.LiteralExprList{ @@ -170,9 +169,11 @@ func TestPrecalculateExprRule(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { - res, err := planner.PrecalculateExprRule(planner.NewTree(planner.NewSelectionNode(planner.NewTableInputNode("foo"), test.e))) + s := stream.New(stream.SeqScan("foo")). + Pipe(stream.Filter(test.e)) + res, err := planner.PrecalculateExprRule(s, nil) require.NoError(t, err) - require.Equal(t, planner.NewTree(planner.NewSelectionNode(planner.NewTableInputNode("foo"), test.expected)).String(), res.String()) + require.Equal(t, stream.New(stream.SeqScan("foo")).Pipe(stream.Filter(test.expected)).String(), res.String()) }) } } @@ -180,33 +181,33 @@ func TestPrecalculateExprRule(t *testing.T) { func TestRemoveUnnecessarySelectionNodesRule(t *testing.T) { tests := []struct { name string - root, expected planner.Node + root, expected *stream.Stream }{ { "non-constant expr", - planner.NewSelectionNode(planner.NewTableInputNode("foo"), expr.Path{document.PathFragment{FieldName: "a"}}), - planner.NewSelectionNode(planner.NewTableInputNode("foo"), expr.Path{document.PathFragment{FieldName: "a"}}), + stream.New(stream.SeqScan("foo")).Pipe(stream.Filter(parser.MustParseExpr("a"))), + stream.New(stream.SeqScan("foo")).Pipe(stream.Filter(parser.MustParseExpr("a"))), }, { "truthy constant expr", - planner.NewSelectionNode(planner.NewTableInputNode("foo"), expr.IntegerValue(10)), - planner.NewTableInputNode("foo"), + stream.New(stream.SeqScan("foo")).Pipe(stream.Filter(parser.MustParseExpr("10"))), + stream.New(stream.SeqScan("foo")), }, { "falsy constant expr", - planner.NewSelectionNode(planner.NewTableInputNode("foo"), expr.IntegerValue(0)), + stream.New(stream.SeqScan("foo")).Pipe(stream.Filter(parser.MustParseExpr("0"))), nil, }, } for _, test := range tests { t.Run(test.name, func(t *testing.T) { - res, err := planner.RemoveUnnecessarySelectionNodesRule(planner.NewTree(test.root)) + res, err := planner.RemoveUnnecessaryFilterNodesRule(test.root, nil) require.NoError(t, err) if test.expected != nil { - require.Equal(t, planner.NewTree(test.expected).String(), res.String()) + require.Equal(t, test.expected.String(), res.String()) } else { - require.Equal(t, test.expected, res.Root) + require.Equal(t, test.expected, res) } }) } @@ -215,80 +216,40 @@ func TestRemoveUnnecessarySelectionNodesRule(t *testing.T) { func TestRemoveUnnecessaryDedupNodeRule(t *testing.T) { tests := []struct { name string - root, expected planner.Node + root, expected *stream.Stream }{ { "non-unique key", - planner.NewDedupNode( - planner.NewProjectionNode( - planner.NewTableInputNode("foo"), - []planner.ProjectedField{planner.ProjectedExpr{ - Expr: expr.Path{document.PathFragment{FieldName: "b"}}, - ExprName: "b", - }}, - "foo", - ), "foo"), - nil, + stream.New(stream.SeqScan("foo")). + Pipe(stream.Project(parser.MustParseExpr("b"))). + Pipe(stream.Distinct()), + stream.New(stream.SeqScan("foo")). + Pipe(stream.Project(parser.MustParseExpr("b"))). + Pipe(stream.Distinct()), }, { "primary key", - planner.NewDedupNode( - planner.NewProjectionNode( - planner.NewTableInputNode("foo"), - []planner.ProjectedField{planner.ProjectedExpr{ - Expr: expr.Path{document.PathFragment{FieldName: "a"}}, - ExprName: "a", - }}, - "foo", - ), "foo"), - planner.NewProjectionNode( - planner.NewTableInputNode("foo"), - []planner.ProjectedField{planner.ProjectedExpr{ - Expr: expr.Path{document.PathFragment{FieldName: "a"}}, - ExprName: "a", - }}, - "foo", - ), + stream.New(stream.SeqScan("foo")). + Pipe(stream.Project(parser.MustParseExpr("a"))). + Pipe(stream.Distinct()), + stream.New(stream.SeqScan("foo")). + Pipe(stream.Project(parser.MustParseExpr("a"))), }, { "unique index", - planner.NewDedupNode( - planner.NewProjectionNode( - planner.NewTableInputNode("foo"), - []planner.ProjectedField{planner.ProjectedExpr{ - Expr: expr.Path{document.PathFragment{FieldName: "c"}}, - ExprName: "c", - }}, - "foo", - ), "foo"), - planner.NewProjectionNode( - planner.NewTableInputNode("foo"), - []planner.ProjectedField{planner.ProjectedExpr{ - Expr: expr.Path{document.PathFragment{FieldName: "c"}}, - ExprName: "c", - }}, - "foo", - ), + stream.New(stream.SeqScan("foo")). + Pipe(stream.Project(parser.MustParseExpr("c"))). + Pipe(stream.Distinct()), + stream.New(stream.SeqScan("foo")). + Pipe(stream.Project(parser.MustParseExpr("c"))), }, { "pk() function", - planner.NewDedupNode( - planner.NewProjectionNode( - planner.NewTableInputNode("foo"), - []planner.ProjectedField{planner.ProjectedExpr{ - Expr: expr.PKFunc{}, - ExprName: "pk()", - }}, - "foo", - ), "foo"), - planner.NewProjectionNode( - planner.NewTableInputNode("foo"), - []planner.ProjectedField{planner.ProjectedExpr{ - Expr: expr.PKFunc{}, - ExprName: "pk()", - }}, - "foo", - ), + stream.New(stream.SeqScan("foo")). + Pipe(stream.Project(parser.MustParseExpr("pk()"))). + Pipe(stream.Distinct()), + stream.New(stream.SeqScan("foo")). + Pipe(stream.Project(parser.MustParseExpr("pk()"))), }, } @@ -312,16 +273,9 @@ func TestRemoveUnnecessaryDedupNodeRule(t *testing.T) { `) require.NoError(t, err) - err = planner.Bind(planner.NewTree(test.root), tx.Transaction, nil) + res, err := planner.RemoveUnnecessaryDistinctNodeRule(test.root, tx.Transaction) require.NoError(t, err) - - res, err := planner.RemoveUnnecessaryDedupNodeRule(planner.NewTree(test.root)) - require.NoError(t, err) - if test.expected != nil { - require.Equal(t, planner.NewTree(test.expected).String(), res.String()) - } else { - require.Equal(t, test.root, res.Root) - } + require.Equal(t, test.expected.String(), res.String()) }) } } @@ -329,168 +283,67 @@ func TestRemoveUnnecessaryDedupNodeRule(t *testing.T) { func TestUseIndexBasedOnSelectionNodeRule(t *testing.T) { tests := []struct { name string - root, expected planner.Node + root, expected *stream.Stream }{ - { - "non-indexed path", - planner.NewSelectionNode(planner.NewTableInputNode("foo"), - expr.Eq( - expr.Path{document.PathFragment{FieldName: "d"}}, - expr.IntegerValue(1), - )), - nil, - }, - { - "FROM foo WHERE a = 1", - planner.NewSelectionNode(planner.NewTableInputNode("foo"), - expr.Eq( - expr.Path{document.PathFragment{FieldName: "a"}}, - expr.IntegerValue(1), - )), - planner.NewIndexInputNode( - "foo", - "idx_foo_a", - expr.Eq(nil, nil).(planner.IndexIteratorOperator), - expr.Path(parsePath(t, "a")), - expr.IntegerValue(1), - scanner.ASC, - ), - }, - { - "FROM foo WHERE a = 1 AND b = 2", - planner.NewSelectionNode( - planner.NewSelectionNode(planner.NewTableInputNode("foo"), - expr.Eq( - expr.Path{document.PathFragment{FieldName: "a"}}, - expr.IntegerValue(1), - ), - ), - expr.Eq( - expr.Path{document.PathFragment{FieldName: "b"}}, - expr.IntegerValue(2), - ), - ), - planner.NewSelectionNode( - planner.NewIndexInputNode( - "foo", - "idx_foo_b", - expr.Eq(nil, nil).(planner.IndexIteratorOperator), - expr.Path(parsePath(t, "b")), - expr.IntegerValue(2), - scanner.ASC, - ), - expr.Eq( - expr.Path{document.PathFragment{FieldName: "a"}}, - expr.IntegerValue(1), - ), - ), - }, - { - "FROM foo WHERE c = 3 AND b = 2", - planner.NewSelectionNode( - planner.NewSelectionNode(planner.NewTableInputNode("foo"), - expr.Eq( - expr.Path{document.PathFragment{FieldName: "c"}}, - expr.IntegerValue(3), - ), - ), - expr.Eq( - expr.Path{document.PathFragment{FieldName: "b"}}, - expr.IntegerValue(2), - ), - ), - planner.NewSelectionNode( - planner.NewIndexInputNode( - "foo", - "idx_foo_c", - expr.Eq(nil, nil).(planner.IndexIteratorOperator), - expr.Path(parsePath(t, "c")), - expr.IntegerValue(3), - scanner.ASC, - ), - expr.Eq( - expr.Path{document.PathFragment{FieldName: "b"}}, - expr.IntegerValue(2), - ), - ), - }, - { - "SELECT a FROM foo WHERE c = 3 AND b = 2", - planner.NewProjectionNode( - planner.NewSelectionNode( - planner.NewSelectionNode(planner.NewTableInputNode("foo"), - expr.Eq( - expr.Path{document.PathFragment{FieldName: "c"}}, - expr.IntegerValue(3), - ), - ), - expr.Eq( - expr.Path{document.PathFragment{FieldName: "b"}}, - expr.IntegerValue(2), - ), - ), - []planner.ProjectedField{ - planner.ProjectedExpr{ - Expr: expr.Path{document.PathFragment{FieldName: "a"}}, - }, - }, - "foo", - ), - planner.NewProjectionNode( - planner.NewSelectionNode( - planner.NewIndexInputNode( - "foo", - "idx_foo_c", - expr.Eq(nil, nil).(planner.IndexIteratorOperator), - expr.Path(parsePath(t, "c")), - expr.IntegerValue(3), - scanner.ASC, - ), - expr.Eq( - expr.Path{document.PathFragment{FieldName: "b"}}, - expr.IntegerValue(2), - ), - ), - []planner.ProjectedField{ - planner.ProjectedExpr{ - Expr: expr.Path{document.PathFragment{FieldName: "a"}}, - }, - }, - "foo", - ), - }, + // { + // "non-indexed path", + // stream.New(stream.SeqScan("foo")).Pipe(stream.Filter(parser.MustParseExpr("d = 1"))), + // stream.New(stream.SeqScan("foo")).Pipe(stream.Filter(parser.MustParseExpr("d = 1"))), + // }, + // { + // "FROM foo WHERE a = 1", + // stream.New(stream.SeqScan("foo")).Pipe(stream.Filter(parser.MustParseExpr("a = 1"))), + // stream.New(stream.IndexScan("idx_foo_a", st.Range{Min: parser.MustParseExpr("1"), Exact: true})), + // }, + // { + // "FROM foo WHERE a = 1 AND b = 2", + // stream.New(stream.SeqScan("foo")). + // Pipe(stream.Filter(parser.MustParseExpr("a = 1"))). + // Pipe(stream.Filter(parser.MustParseExpr("b = 2"))), + // stream.New(stream.IndexScan("idx_foo_b", st.Range{Min: parser.MustParseExpr("2"), Exact: true})). + // Pipe(stream.Filter(parser.MustParseExpr("a = 1"))), + // }, + // { + // "FROM foo WHERE c = 3 AND b = 2", + // stream.New(stream.SeqScan("foo")). + // Pipe(stream.Filter(parser.MustParseExpr("c = 3"))). + // Pipe(stream.Filter(parser.MustParseExpr("b = 2"))), + // stream.New(stream.IndexScan("idx_foo_c", st.Range{Min: parser.MustParseExpr("3"), Exact: true})). + // Pipe(stream.Filter(parser.MustParseExpr("b = 2"))), + // }, + // { + // "FROM foo WHERE c > 3 AND b = 2", + // stream.New(stream.SeqScan("foo")). + // Pipe(stream.Filter(parser.MustParseExpr("c > 3"))). + // Pipe(stream.Filter(parser.MustParseExpr("b = 2"))), + // stream.New(stream.IndexScan("idx_foo_b", st.Range{Min: parser.MustParseExpr("2"), Exact: true})). + // Pipe(stream.Filter(parser.MustParseExpr("c > 3"))), + // }, + // { + // "SELECT a FROM foo WHERE c = 3 AND b = 2", + // stream.New(stream.SeqScan("foo")). + // Pipe(stream.Filter(parser.MustParseExpr("c = 3"))). + // Pipe(stream.Filter(parser.MustParseExpr("b = 2"))). + // Pipe(stream.Project(parser.MustParseExpr("a"))), + // stream.New(stream.IndexScan("idx_foo_c", st.Range{Min: parser.MustParseExpr("3"), Exact: true})). + // Pipe(stream.Filter(parser.MustParseExpr("b = 2"))). + // Pipe(stream.Project(parser.MustParseExpr("a"))), + // }, { "FROM foo WHERE a IN [1, 2]", - planner.NewSelectionNode(planner.NewTableInputNode("foo"), - expr.In( - expr.Path{document.PathFragment{FieldName: "a"}}, - expr.ArrayValue(document.NewValueBuffer(document.NewIntegerValue(1), document.NewIntegerValue(2))), - ), - ), - planner.NewIndexInputNode( - "foo", - "idx_foo_a", - expr.In(nil, nil).(planner.IndexIteratorOperator), - expr.Path(parsePath(t, "a")), - expr.ArrayValue(document.NewValueBuffer(document.NewIntegerValue(1), document.NewIntegerValue(2))), - scanner.ASC, - ), - }, - { - "FROM foo WHERE 1 IN a", - planner.NewSelectionNode(planner.NewTableInputNode("foo"), - expr.In( - expr.IntegerValue(1), - expr.Path(parsePath(t, "a")), - ), - ), - planner.NewSelectionNode(planner.NewTableInputNode("foo"), - expr.In( - expr.IntegerValue(1), - expr.Path(parsePath(t, "a")), - ), - ), + stream.New(stream.SeqScan("foo")).Pipe(stream.Filter(parser.MustParseExpr("a IN [1, 2]"))), + stream.New(stream.IndexScan("idx_foo_a", st.Range{Min: parser.MustParseExpr("1"), Exact: true}, st.Range{Min: parser.MustParseExpr("2"), Exact: true})), }, + // { + // "FROM foo WHERE 1 IN a", + // stream.New(stream.SeqScan("foo")).Pipe(stream.Filter(parser.MustParseExpr("1 IN a"))), + // stream.New(stream.SeqScan("foo")).Pipe(stream.Filter(parser.MustParseExpr("1 IN a"))), + // }, + // { + // "FROM foo WHERE a > 10", + // stream.New(stream.SeqScan("foo")).Pipe(stream.Filter(parser.MustParseExpr("a > 10"))), + // stream.New(stream.IndexScan("idx_foo_a", st.Range{Min: parser.MustParseExpr("10"), Exclusive: true})), + // }, } for _, test := range tests { @@ -515,19 +368,9 @@ func TestUseIndexBasedOnSelectionNodeRule(t *testing.T) { `) require.NoError(t, err) - err = planner.Bind(planner.NewTree(test.root), tx.Transaction, []expr.Param{ - {Name: "p1", Value: 1}, - {Name: "p2", Value: 2}, - }) - require.NoError(t, err) - - res, err := planner.UseIndexBasedOnSelectionNodeRule(planner.NewTree(test.root)) + res, err := planner.UseIndexBasedOnFilterNodeRule(test.root, tx.Transaction) require.NoError(t, err) - if test.expected != nil { - require.Equal(t, planner.NewTree(test.expected).String(), res.String()) - } else { - require.Equal(t, res.Root, res.Root) - } + require.Equal(t, test.expected.String(), res.String()) }) } } diff --git a/sql/planner/tree.go b/sql/planner/tree.go deleted file mode 100644 index 7adc12702..000000000 --- a/sql/planner/tree.go +++ /dev/null @@ -1,485 +0,0 @@ -// Package planner provides types to describe and manage the lifecycle of a query. -// A query is represented as a tree, which itself represents a stream of documents. -// Each node of the tree is an operation that transforms that stream, following rules -// of relational algebra. -// Once a tree is created, it can be optimized by a list of rules. -package planner - -import ( - "fmt" - - "github.com/genjidb/genji/database" - "github.com/genjidb/genji/document" - "github.com/genjidb/genji/sql/query" - "github.com/genjidb/genji/sql/query/expr" -) - -// An Operation can manipulate and transform a stream of documents. -type Operation int - -const ( - // Input is a node from where data is read. It represents a stream of documents. - Input Operation = iota - // Selection (σ) is an operation that filters documents that satisfy a given condition. - Selection - // Projection (∏) is an operation that selects a list of fields from each document of a stream. - Projection - // Rename (ρ) is an operation that renames a path from each document of a stream. - Rename - // Deletion is an operation that removes all of the documents of a stream from their respective table. - Deletion - // Replacement is an operation that stores every document of a stream in their respective keys. - Replacement - // Limit is an operation that only allows a certain number of documents to be processed - // by the stream. - Limit - // Skip is an operation that ignores a certain number of documents. - Skip - // Sort is an operation that sorts a stream of document according to a given path and a direction. - Sort - // Set is an operation that adds a value or replaces at a given path for every document of the stream. - Set - // Unset is an operation that removes a value at a given path from every document of a stream - Unset - // Group is an operation that groups documents based on a given path. - Group - // Aggregation is an operation that creates one document per group of documents. - Aggregation - // Dedup is an operation that removes duplicate documents from a stream - Dedup -) - -// A Tree describes the flow of a stream of documents. -// Each node will manipulate the stream using relational algebra operations. -type Tree struct { - Root Node -} - -// NewTree creates a new tree with n as root. -func NewTree(n Node) *Tree { - return &Tree{Root: n} -} - -// Run implements the query.Statement interface. -// It binds the tree to the database resources and executes it. -func (t *Tree) Run(tx *database.Transaction, params []expr.Param) (query.Result, error) { - err := Bind(t, tx, params) - if err != nil { - return query.Result{}, err - } - - t, err = Optimize(t) - if err != nil { - return query.Result{}, err - } - - return t.execute() -} - -func (t *Tree) execute() (query.Result, error) { - if t.Root == nil { - return query.Result{}, nil - } - var st document.Stream - var err error - - if t.Root.Left() != nil { - st, err = nodeToStream(t.Root.Left()) - if err != nil { - return query.Result{}, err - } - } - - st, err = t.Root.(operationNode).toStream(st) - if err != nil { - return query.Result{}, err - } - - return query.Result{ - Stream: st, - }, nil -} - -func (t *Tree) String() string { - n := t.Root - - if n == nil { - return "" - } - - return nodeToString(t.Root) -} - -func nodeToString(n Node) string { - var s string - - if n.Left() != nil { - s = nodeToString(n.Left()) - } - - if s == "" { - return fmt.Sprintf("%v", n) - } - - return fmt.Sprintf("%s -> %v", s, n) -} - -// IsReadOnly implements the query.Statement interface. -func (t *Tree) IsReadOnly() bool { - return false -} - -func nodeToStream(n Node) (st document.Stream, err error) { - l := n.Left() - if l != nil { - st, err = nodeToStream(l) - if err != nil { - return - } - } - - switch t := n.(type) { - case inputNode: - st, err = t.buildStream() - case operationNode: - st, err = t.toStream(st) - default: - panic(fmt.Sprintf("incorrect node type %#v", n)) - } - - return -} - -// A Node represents an operation on the stream. -type Node interface { - Operation() Operation - Left() Node - Right() Node - SetLeft(Node) - SetRight(Node) - Bind(tx *database.Transaction, params []expr.Param) error -} - -type inputNode interface { - Node - - buildStream() (document.Stream, error) -} - -type operationNode interface { - Node - - toStream(st document.Stream) (document.Stream, error) -} - -type node struct { - op Operation - left, right Node -} - -func (n *node) Operation() Operation { - return n.op -} - -func (n *node) Left() Node { - return n.left -} - -func (n *node) Right() Node { - return n.right -} - -func (n *node) SetLeft(ln Node) { - n.left = ln -} - -func (n *node) SetRight(rn Node) { - n.right = rn -} - -type selectionNode struct { - node - - cond expr.Expr - tx *database.Transaction - params []expr.Param -} - -var _ operationNode = (*selectionNode)(nil) - -// NewSelectionNode creates a node that filters documents of a stream, according to -// the expression condition. -func NewSelectionNode(n Node, cond expr.Expr) Node { - return &selectionNode{ - node: node{ - op: Selection, - left: n, - }, - cond: cond, - } -} - -func (n *selectionNode) Bind(tx *database.Transaction, params []expr.Param) (err error) { - n.tx = tx - n.params = params - return -} - -func (n *selectionNode) toStream(st document.Stream) (document.Stream, error) { - if n.cond == nil { - return st, nil - } - - env := expr.Environment{ - Params: n.params, - } - - return st.Filter(func(d document.Document) (bool, error) { - env.SetDocument(d) - v, err := n.cond.Eval(&env) - if err != nil { - return false, err - } - - ok, err := v.IsTruthy() - if err != nil { - return false, err - } - return ok, nil - }), nil -} - -func (n *selectionNode) String() string { - return fmt.Sprintf("σ(cond: %s)", n.cond) -} - -type limitNode struct { - node - - limit int - tx *database.Transaction - params []expr.Param -} - -var _ operationNode = (*limitNode)(nil) - -// NewLimitNode creates a node that limits the number of documents processed by the stream. -func NewLimitNode(n Node, limit int) Node { - return &limitNode{ - node: node{ - op: Limit, - left: n, - }, - limit: limit, - } -} - -func (n *limitNode) Bind(tx *database.Transaction, params []expr.Param) (err error) { - n.tx = tx - n.params = params - return -} - -func (n *limitNode) toStream(st document.Stream) (document.Stream, error) { - return st.Limit(n.limit), nil -} - -func (n *limitNode) String() string { - return fmt.Sprintf("Limit(%d)", n.limit) -} - -type offsetNode struct { - node - offset int - - tx *database.Transaction - params []expr.Param -} - -var _ operationNode = (*offsetNode)(nil) - -// NewOffsetNode creates a node that skips a certain number of documents from the stream. -func NewOffsetNode(n Node, offset int) Node { - return &offsetNode{ - node: node{ - op: Limit, - left: n, - }, - offset: offset, - } -} - -func (n *offsetNode) String() string { - return fmt.Sprintf("Offset(%d)", n.offset) -} - -func (n *offsetNode) Bind(tx *database.Transaction, params []expr.Param) (err error) { - n.tx = tx - n.params = params - return -} - -func (n *offsetNode) toStream(st document.Stream) (document.Stream, error) { - return st.Offset(n.offset), nil -} - -type setNode struct { - node - - path document.Path - e expr.Expr - - tx *database.Transaction - params []expr.Param -} - -var _ operationNode = (*setNode)(nil) - -// NewSetNode creates a node that adds or replaces a value at the given path for every document of the stream. -func NewSetNode(n Node, path document.Path, e expr.Expr) Node { - return &setNode{ - node: node{ - op: Set, - left: n, - }, - path: path, - e: e, - } -} - -func (n *setNode) Bind(tx *database.Transaction, params []expr.Param) (err error) { - n.tx = tx - n.params = params - return -} - -func (n *setNode) String() string { - return fmt.Sprintf("Set(%s = %s)", n.path, n.e) -} - -func (n *setNode) toStream(st document.Stream) (document.Stream, error) { - var fb document.FieldBuffer - - env := expr.Environment{ - Params: n.params, - } - - return st.Map(func(d document.Document) (document.Document, error) { - env.SetDocument(d) - ev, err := n.e.Eval(&env) - if err != nil && err != document.ErrFieldNotFound { - return nil, err - } - - fb.Reset() - - err = fb.ScanDocument(d) - if err != nil { - return nil, err - } - - err = fb.Set(n.path, ev) - if err != nil { - return nil, err - } - - return &fb, nil - }), nil -} - -type unsetNode struct { - node - - field string -} - -var _ operationNode = (*unsetNode)(nil) - -// NewUnsetNode creates a node that removes a value at a given path for every document of the stream. -func NewUnsetNode(n Node, field string) Node { - return &unsetNode{ - node: node{ - op: Unset, - left: n, - }, - field: field, - } -} - -func (n *unsetNode) Bind(tx *database.Transaction, params []expr.Param) error { - return nil -} - -func (n *unsetNode) toStream(st document.Stream) (document.Stream, error) { - var fb document.FieldBuffer - - return st.Map(func(d document.Document) (document.Document, error) { - fb.Reset() - - _, err := d.GetByField(n.field) - if err != nil { - if err != document.ErrFieldNotFound { - return nil, err - } - - return d, nil - } - - err = fb.ScanDocument(d) - if err != nil { - return nil, err - } - - err = fb.Delete(document.Path{document.PathFragment{FieldName: n.field}}) - if err != nil { - return nil, err - } - - return &fb, nil - }), nil -} - -func (n *unsetNode) String() string { - return fmt.Sprintf("Unset(%s)", n.field) -} - -// A GroupingNode is a node that groups documents by value. -type GroupingNode struct { - node - - Tx *database.Transaction - Params []expr.Param - Expr expr.Expr -} - -var _ operationNode = (*GroupingNode)(nil) - -// NewGroupingNode creates a GroupingNode. -func NewGroupingNode(n Node, e expr.Expr) Node { - return &GroupingNode{ - node: node{ - op: Group, - left: n, - }, - Expr: e, - } -} - -// Bind database resources to this node. -func (n *GroupingNode) Bind(tx *database.Transaction, params []expr.Param) (err error) { - n.Tx = tx - n.Params = params - return -} - -// toStream uses the GroupBy stream operation. It evaluates Expr for every field and returns -// the result. -func (n *GroupingNode) toStream(st document.Stream) (document.Stream, error) { - return st.GroupBy(func(d document.Document) (document.Value, error) { - return n.Expr.Eval(expr.NewEnvironment(d, n.Params...)) - }), nil -} - -func (n *GroupingNode) String() string { - return fmt.Sprintf("Group(%s)", n.Expr) -} diff --git a/testutil/stream.go b/testutil/stream.go new file mode 100644 index 000000000..110b2e6a7 --- /dev/null +++ b/testutil/stream.go @@ -0,0 +1 @@ +package testutil