-
Notifications
You must be signed in to change notification settings - Fork 12
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
19 changed files
with
892 additions
and
333 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,136 @@ | ||
package costmodel | ||
|
||
import ( | ||
"bytes" | ||
"fmt" | ||
"github.com/kwilteam/kwil-db/internal/engine/cost/datatypes" | ||
"github.com/kwilteam/kwil-db/internal/engine/cost/logical_plan" | ||
) | ||
|
||
const ( | ||
SeqAccessCostPerRow = 1 // sequential access disk cost | ||
RandAccessCost = 3 // random access disk cost, i.e., index scan | ||
) | ||
|
||
// RelExpr is a wrapper of a logical plan,it's used for cost estimation. | ||
// It tracks the statistics and cost from bottom to top. | ||
// NOTE: this is simplified version of LogicalRel in memo package. | ||
type RelExpr struct { | ||
logical_plan.LogicalPlan | ||
|
||
stat *datatypes.Statistics // current node's statistics | ||
cost int64 | ||
inputs []*RelExpr | ||
} | ||
|
||
func (r *RelExpr) Inputs() []*RelExpr { | ||
return r.inputs | ||
} | ||
|
||
func (r *RelExpr) String() string { | ||
return fmt.Sprintf("%s, Stat: (%s), Cost: %d", | ||
logical_plan.PlanString(r.LogicalPlan), r.stat, r.cost) | ||
} | ||
|
||
// reorderColStat reorders the columns in the statistics according to the schema. | ||
// Schema can be changed by the projection/join, so we need to reorder the columns in | ||
// the statistics. | ||
func reorderColStat(oldStat *datatypes.Statistics, schema *datatypes.Schema) *datatypes.Statistics { | ||
|
||
} | ||
|
||
// BuildRelExpr builds a RelExpr from a logical plan, also build the statistics. | ||
// TODO: using iterator to traverse the plan tree. | ||
func BuildRelExpr(plan logical_plan.LogicalPlan) *RelExpr { | ||
inputs := make([]*RelExpr, len(plan.Inputs())) | ||
for i, input := range plan.Inputs() { | ||
inputs[i] = BuildRelExpr(input) | ||
} | ||
|
||
var stat *datatypes.Statistics | ||
|
||
switch p := plan.(type) { | ||
case *logical_plan.ScanOp: | ||
stat = p.DataSource().Statistics() | ||
|
||
case *logical_plan.ProjectionOp: | ||
stat = inputs[0].stat | ||
|
||
case *logical_plan.FilterOp: | ||
stat = inputs[0].stat | ||
// with filter, we can make uniformity assumption to simplify the cost model | ||
exprs := p.Exprs() | ||
fields := make([]datatypes.Field, len(exprs)) | ||
for i, expr := range exprs { | ||
fields[i] = expr.Resolve() | ||
} | ||
|
||
default: | ||
stat = datatypes.NewEmptyStatistics() | ||
} | ||
|
||
return &RelExpr{ | ||
LogicalPlan: plan, | ||
cost: 0, | ||
inputs: inputs, | ||
stat: stat, | ||
} | ||
} | ||
|
||
func Format(plan *RelExpr, indent int) string { | ||
var msg bytes.Buffer | ||
for i := 0; i < indent; i++ { | ||
msg.WriteString(" ") | ||
} | ||
msg.WriteString(plan.String()) | ||
msg.WriteString("\n") | ||
for _, child := range plan.Inputs() { | ||
msg.WriteString(Format(child, indent+2)) | ||
} | ||
return msg.String() | ||
} | ||
|
||
//func EstimateCost(plan *RelExpr) int64 { | ||
// cost := int64(0) | ||
// // bottom-up | ||
// for _, child := range plan.Inputs() { | ||
// cost += EstimateCost(child) | ||
// } | ||
// | ||
// // estimate current node's cost | ||
// switch plan.LogicalPlan.(type) { | ||
// case *logical_plan.ScanOp: | ||
// // TODO: index scan | ||
// cost += SeqAccessCost | ||
// } | ||
// return cost | ||
//} | ||
// | ||
|
||
//// EstimateCost estimates the cost of a logical plan. | ||
//// It uses iterator to traverse the plan tree. | ||
//func EstimateCost(plan *RelExpr) int64 { | ||
// stack := []*RelExpr{plan} | ||
// cost := int64(0) | ||
// | ||
// for len(stack) > 0 { | ||
// // Pop a node from the stack | ||
// n := len(stack) - 1 | ||
// node := stack[n] | ||
// stack = stack[:n] | ||
// | ||
// // Estimate current node's cost | ||
// switch p := node.LogicalPlan.(type) { | ||
// case *logical_plan.ScanOp: | ||
// // TODO: index scan | ||
// cost += p. | ||
// } | ||
// | ||
// // Push all children onto the stack | ||
// for _, child := range node.Inputs() { | ||
// stack = append(stack, child) | ||
// } | ||
// } | ||
// | ||
// return cost | ||
//} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,166 @@ | ||
package costmodel | ||
|
||
import ( | ||
"github.com/kwilteam/kwil-db/internal/engine/cost/internal/testkit" | ||
"github.com/kwilteam/kwil-db/internal/engine/cost/query_planner" | ||
sqlparser "github.com/kwilteam/kwil-db/parse/sql" | ||
"github.com/stretchr/testify/assert" | ||
"testing" | ||
) | ||
|
||
func Test_RelExpr_String(t *testing.T) { | ||
tests := []struct { | ||
name string | ||
r *RelExpr | ||
want string | ||
}{ | ||
{ | ||
name: "test", | ||
r: &RelExpr{}, | ||
want: "test\n\n stat: &{0 []}\n cost: 0", | ||
}, | ||
} | ||
for _, tt := range tests { | ||
t.Run(tt.name, func(t *testing.T) { | ||
if got := tt.r.String(); got != tt.want { | ||
t.Errorf("RelExpr.String() = %v, want %v", got, tt.want) | ||
} | ||
}) | ||
} | ||
} | ||
|
||
func Test_NewRelExpr(t *testing.T) { | ||
cat := testkit.InitMockCatalog() | ||
|
||
tests := []struct { | ||
name string | ||
sql string | ||
wt string // want | ||
}{ | ||
/////////////////////// no relation | ||
{ | ||
name: "select int", | ||
sql: "SELECT 1", | ||
wt: "Projection: 1, Stat: (RowCount: 0), Cost: 0\n" + | ||
" NoRelationOp, Stat: (RowCount: 0), Cost: 0\n", | ||
}, | ||
{ | ||
name: "select string", | ||
sql: "SELECT 'hello'", | ||
wt: "Projection: 'hello', Stat: (RowCount: 0), Cost: 0\n" + | ||
" NoRelationOp, Stat: (RowCount: 0), Cost: 0\n", | ||
}, | ||
{ | ||
name: "select value expression", | ||
sql: "SELECT 1+2", | ||
wt: "Projection: 1 + 2, Stat: (RowCount: 0), Cost: 0\n" + | ||
" NoRelationOp, Stat: (RowCount: 0), Cost: 0\n", | ||
}, | ||
// TODO: add function metadata to catalog | ||
// TODO: add support for functions in logical expr | ||
//{ | ||
// name: "select function abs", | ||
// sql: "SELECT ABS(-1)", | ||
// wt: "", | ||
//}, | ||
/////////////////////// one relation | ||
{ | ||
name: "select wildcard", | ||
sql: "SELECT * FROM users", | ||
wt: "Projection: users.id, users.username, users.age, users.state, users.wallet, Stat: (RowCount: 0), Cost: 0\n" + | ||
" Scan: users, Stat: (RowCount: 5), Cost: 0\n", | ||
}, | ||
//{ // TODO? | ||
// name: "select wildcard, deduplication", | ||
// sql: "SELECT *, age FROM users", | ||
// wt: "Projection: users.id, users.username, users.age, users.state, users.wallet\n" + | ||
// " Scan: users; projection=[]\n", | ||
//}, | ||
{ | ||
name: "select columns", | ||
sql: "select username, age from users", | ||
wt: "Projection: users.username, users.age, Stat: (RowCount: 0), Cost: 0\n" + | ||
" Scan: users, Stat: (RowCount: 5), Cost: 0\n", | ||
}, | ||
{ | ||
name: "select column with alias", | ||
sql: "select username as name from users", | ||
wt: "Projection: users.username AS name\n" + | ||
" Scan: users\n", | ||
}, | ||
{ | ||
name: "select column expression", | ||
sql: "select username, age+10 from users", | ||
wt: "Projection: users.username, users.age + 10\n" + | ||
" Scan: users\n", | ||
}, | ||
{ | ||
name: "select with where", | ||
sql: "select username, age from users where age > 20", | ||
wt: "Projection: users.username, users.age\n" + | ||
" Filter: users.age > 20\n" + | ||
" Scan: users\n", | ||
}, | ||
{ | ||
name: "select with multiple where", | ||
sql: "select username, age from users where age > 20 and state = 'CA'", | ||
wt: "Projection: users.username, users.age\n" + | ||
" Filter: users.age > 20 AND users.state = 'CA'\n" + | ||
" Scan: users\n", | ||
}, | ||
//{ | ||
// name: "select with group by", | ||
// sql: "select username, count(*) from users group by username", | ||
// wt: "GroupBy: users.username\n", | ||
//}, | ||
{ | ||
name: "select with limit, without offset", | ||
sql: "select username, age from users limit 10", | ||
wt: "Limit: skip=0, fetch=10\n" + | ||
" Projection: users.username, users.age\n" + | ||
" Scan: users\n", | ||
}, | ||
{ | ||
name: "select with limit and offset", | ||
sql: "select username, age from users limit 10 offset 5", | ||
wt: "Limit: skip=5, fetch=10\n" + | ||
" Projection: users.username, users.age\n" + | ||
" Scan: users\n", | ||
}, | ||
{ | ||
name: "select with order by default", | ||
sql: "select username, age from users order by age", | ||
wt: "Sort: age ASC NULLS LAST\n" + | ||
" Projection: users.username, users.age\n" + | ||
" Scan: users\n", | ||
}, | ||
{ | ||
name: "select with order by desc", | ||
sql: "select username, age from users order by age desc", | ||
wt: "Sort: age DESC NULLS FIRST\n" + | ||
" Projection: users.username, users.age\n" + | ||
" Scan: users\n", | ||
}, | ||
/////////////////////// subquery | ||
{ | ||
name: "select with subquery", | ||
sql: "select username, age from (select * from users) as u", | ||
wt: "Projection: users.username, users.age\n" + | ||
" Projection: users.id, users.username, users.age, users.state, users.wallet\n" + | ||
" Scan: users\n", | ||
}, | ||
/////////////////////// two relations | ||
|
||
} | ||
for _, tt := range tests { | ||
t.Run(tt.name, func(t *testing.T) { | ||
stmt, err := sqlparser.Parse(tt.sql) | ||
assert.NoError(t, err) | ||
|
||
q := query_planner.NewPlanner(cat) | ||
plan := q.ToPlan(stmt) | ||
rel := BuildRelExpr(plan) | ||
assert.Equal(t, tt.wt, Format(rel, 0)) | ||
}) | ||
} | ||
} |
This file was deleted.
Oops, something went wrong.
Oops, something went wrong.