Skip to content

Commit

Permalink
feat: add dialect#normaliseidentifier and dialect#parserelationref
Browse files Browse the repository at this point in the history
  • Loading branch information
atzoum committed Jul 22, 2024
1 parent 180a706 commit ca468c0
Show file tree
Hide file tree
Showing 13 changed files with 355 additions and 0 deletions.
12 changes: 12 additions & 0 deletions sqlconnect/db.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,20 @@ type JsonRowMapper interface {
type Dialect interface {
// QuoteTable quotes a table name
QuoteTable(table RelationRef) string

// QuoteIdentifier quotes an identifier, e.g. a column name
QuoteIdentifier(name string) string

// FormatTableName formats a table name, typically by lower or upper casing it, depending on the database
//
// Deprecated: to be removed in future versions, since its behaviour is not consistent across databases, e.g. using lowercase for BigQuery while it shouldn't.
// If you want to have a consistent behaviour across databases, use [NormaliseIdentifier] and [ParseRelationRef] instead.
FormatTableName(name string) string

// NormaliseIdentifier normalises the identifier's parts that are unquoted, typically by lower or upper casing them, depending on the database
NormaliseIdentifier(identifier string) string

// ParseRelationRef parses a string into a RelationRef after normalising the identifier and stripping out surrounding quotes.
// The result is a RelationRef with case-sensitive fields, i.e. it can be safely quoted (see [QuoteTable] and, for instance, used for matching against the database's information schema.
ParseRelationRef(identifier string) (RelationRef, error)
}
73 changes: 73 additions & 0 deletions sqlconnect/internal/base/dialect.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,76 @@ func (d dialect) QuoteIdentifier(name string) string {
func (d dialect) FormatTableName(name string) string {
return strings.ToLower(name)
}

// NormaliseIdentifier normalises identifier parts that are unquoted, typically by lower or upper casing them, depending on the database
func (d dialect) NormaliseIdentifier(identifier string) string {
return NormaliseIdentifier(identifier, '"', strings.ToLower)
}

// ParseRelationRef parses a string into a RelationRef after normalising the identifier and stripping out surrounding quotes.
// The result is a RelationRef with case-sensitive fields, i.e. it can be safely quoted (see [QuoteTable] and, for instance, used for matching against the database's information schema.
func (d dialect) ParseRelationRef(identifier string) (sqlconnect.RelationRef, error) {
return ParseRelationRef(identifier, '"', strings.ToLower)
}

func ParseRelationRef(identifier string, quote rune, normF func(string) string) (sqlconnect.RelationRef, error) {
normalised := doNormaliseIdentifier(identifier, quote, normF, true)
parts := strings.Split(normalised, ".")
switch len(parts) {
case 1:
return sqlconnect.RelationRef{Name: parts[0]}, nil
case 2:
return sqlconnect.RelationRef{Schema: parts[0], Name: parts[1]}, nil
case 3:
return sqlconnect.RelationRef{Catalog: parts[0], Schema: parts[1], Name: parts[2]}, nil
default:
return sqlconnect.RelationRef{}, fmt.Errorf("invalid relation reference: %s", identifier)
}
}

func NormaliseIdentifier(identifier string, quote rune, normF func(string) string) string {
return doNormaliseIdentifier(identifier, quote, normF, false)
}

func doNormaliseIdentifier(identifier string, quote rune, normF func(string) string, stripQuotes bool) string {
var result strings.Builder
var inQuotedIdentifier bool
var inEscapedQuote bool
next := func(input string, i int) (rune, bool) {
runes := []rune(input)
if len(input) > i+1 {
return runes[i+1], true
}
return 0, false
}
for i, c := range identifier {
if c == quote {
if !stripQuotes {
result.WriteRune(c)
}
if inQuotedIdentifier {
if inEscapedQuote {
inEscapedQuote = false
if stripQuotes {
result.WriteRune(c)
}
} else {
if next, ok := next(identifier, i); ok {
if next == quote {
inEscapedQuote = true
} else {
inQuotedIdentifier = false
}
}
}
} else {
inQuotedIdentifier = true
}
} else if !inQuotedIdentifier {
result.WriteString(normF(string(c)))
} else {
result.WriteRune(c)
}
}
return result.String()
}
39 changes: 39 additions & 0 deletions sqlconnect/internal/base/dialect_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,43 @@ func TestDialect(t *testing.T) {
quoted = d.QuoteTable(sqlconnect.NewSchemaTableRef("schema", "table"))
require.Equal(t, `"schema"."table"`, quoted, "schema and table name should be quoted with double quotes")
})

t.Run("normalise identifier", func(t *testing.T) {
normalised := d.NormaliseIdentifier("column")
require.Equal(t, "column", normalised, "column name should be normalised to lowercase")

normalised = d.NormaliseIdentifier("COLUMN")
require.Equal(t, "column", normalised, "column name should be normalised to lowercase")

normalised = d.NormaliseIdentifier(`"ColUmn"`)
require.Equal(t, `"ColUmn"`, normalised, "quoted column name should not be normalised")

normalised = d.NormaliseIdentifier(`TaBle."ColUmn"`)
require.Equal(t, `table."ColUmn"`, normalised, "non quoted parts should be normalised")

normalised = d.NormaliseIdentifier(`"Sh""EmA".TABLE."ColUmn"`)
require.Equal(t, `"Sh""EmA".table."ColUmn"`, normalised, "non quoted parts should be normalised")
})

t.Run("parse relation", func(t *testing.T) {
parsed, err := d.ParseRelationRef("table")
require.NoError(t, err)
require.Equal(t, sqlconnect.RelationRef{Name: "table"}, parsed)

parsed, err = d.ParseRelationRef("TABLE")
require.NoError(t, err)
require.Equal(t, sqlconnect.RelationRef{Name: "table"}, parsed)

parsed, err = d.ParseRelationRef(`"TaBle"`)
require.NoError(t, err)
require.Equal(t, sqlconnect.RelationRef{Name: `TaBle`}, parsed)

parsed, err = d.ParseRelationRef(`ScHeMA."TaBle"`)
require.NoError(t, err)
require.Equal(t, sqlconnect.RelationRef{Schema: "schema", Name: "TaBle"}, parsed)

parsed, err = d.ParseRelationRef(`"CaTa""LoG".ScHeMA."TaBle"`)
require.NoError(t, err)
require.Equal(t, sqlconnect.RelationRef{Catalog: "CaTa\"LoG", Schema: "schema", Name: "TaBle"}, parsed)
})
}
14 changes: 14 additions & 0 deletions sqlconnect/internal/bigquery/dialect.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"strings"

"github.com/rudderlabs/sqlconnect-go/sqlconnect"
"github.com/rudderlabs/sqlconnect-go/sqlconnect/internal/base"
)

type dialect struct{}
Expand All @@ -25,3 +26,16 @@ func (d dialect) QuoteIdentifier(name string) string {
func (d dialect) FormatTableName(name string) string {
return strings.ToLower(name)
}

var identityFn = func(s string) string { return s }

// NormaliseIdentifier normalises identifier parts that are unquoted, typically by lower or upper casing them, depending on the database
func (d dialect) NormaliseIdentifier(identifier string) string {
return base.NormaliseIdentifier(identifier, '`', identityFn)
}

// ParseRelationRef parses a string into a RelationRef after normalising the identifier and stripping out surrounding quotes.
// The result is a RelationRef with case-sensitive fields, i.e. it can be safely quoted (see [QuoteTable] and, for instance, used for matching against the database's information schema.
func (d dialect) ParseRelationRef(identifier string) (sqlconnect.RelationRef, error) {
return base.ParseRelationRef(identifier, '`', identityFn)
}
39 changes: 39 additions & 0 deletions sqlconnect/internal/bigquery/dialect_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,43 @@ func TestDialect(t *testing.T) {
quoted = d.QuoteTable(sqlconnect.NewRelationRef("table", sqlconnect.WithSchema("schema")))
require.Equal(t, "`schema.table`", quoted, "schema and table name should be quoted with backticks")
})

t.Run("normalise identifier", func(t *testing.T) {
normalised := d.NormaliseIdentifier("column")
require.Equal(t, "column", normalised, "column name should be normalised")

normalised = d.NormaliseIdentifier("COLUMN")
require.Equal(t, "COLUMN", normalised, "column name should be normalised")

normalised = d.NormaliseIdentifier("`ColUmn`")
require.Equal(t, "`ColUmn`", normalised, "quoted column name should not be normalised")

normalised = d.NormaliseIdentifier("TaBle.`ColUmn`")
require.Equal(t, "TaBle.`ColUmn`", normalised, "non quoted parts should be normalised")

normalised = d.NormaliseIdentifier("`Sh``EmA`.TABLE.`ColUmn`")
require.Equal(t, "`Sh``EmA`.TABLE.`ColUmn`", normalised, "non quoted parts should be normalised")
})

t.Run("parse relation", func(t *testing.T) {
parsed, err := d.ParseRelationRef(`table`)
require.NoError(t, err)
require.Equal(t, sqlconnect.RelationRef{Name: "table"}, parsed)

parsed, err = d.ParseRelationRef("TABLE")
require.NoError(t, err)
require.Equal(t, sqlconnect.RelationRef{Name: "TABLE"}, parsed)

parsed, err = d.ParseRelationRef("`TaBle`")
require.NoError(t, err)
require.Equal(t, sqlconnect.RelationRef{Name: "TaBle"}, parsed)

parsed, err = d.ParseRelationRef("ScHeMA.`TaBle`")
require.NoError(t, err)
require.Equal(t, sqlconnect.RelationRef{Schema: "ScHeMA", Name: "TaBle"}, parsed)

parsed, err = d.ParseRelationRef("`CaTa``LoG`.ScHeMA.`TaBle`")
require.NoError(t, err)
require.Equal(t, sqlconnect.RelationRef{Catalog: "CaTa`LoG", Schema: "ScHeMA", Name: "TaBle"}, parsed)
})
}
12 changes: 12 additions & 0 deletions sqlconnect/internal/databricks/dialect.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"strings"

"github.com/rudderlabs/sqlconnect-go/sqlconnect"
"github.com/rudderlabs/sqlconnect-go/sqlconnect/internal/base"
)

type dialect struct{}
Expand All @@ -25,3 +26,14 @@ func (d dialect) QuoteIdentifier(name string) string {
func (d dialect) FormatTableName(name string) string {
return strings.ToLower(name)
}

// NormaliseIdentifier normalises identifier parts that are unquoted, typically by lower or upper casing them, depending on the database
func (d dialect) NormaliseIdentifier(identifier string) string {
return base.NormaliseIdentifier(identifier, '`', strings.ToLower)
}

// ParseRelationRef parses a string into a RelationRef after normalising the identifier and stripping out surrounding quotes.
// The result is a RelationRef with case-sensitive fields, i.e. it can be safely quoted (see [QuoteTable] and, for instance, used for matching against the database's information schema.
func (d dialect) ParseRelationRef(identifier string) (sqlconnect.RelationRef, error) {
return base.ParseRelationRef(identifier, '`', strings.ToLower)
}
39 changes: 39 additions & 0 deletions sqlconnect/internal/databricks/dialect_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,43 @@ func TestDialect(t *testing.T) {
quoted = d.QuoteTable(sqlconnect.NewRelationRef("table", sqlconnect.WithSchema("schema")))
require.Equal(t, "`schema`.`table`", quoted, "schema and table name should be quoted with backticks")
})

t.Run("normalise identifier", func(t *testing.T) {
normalised := d.NormaliseIdentifier("column")
require.Equal(t, "column", normalised, "column name should be normalised to lowercase")

normalised = d.NormaliseIdentifier("COLUMN")
require.Equal(t, "column", normalised, "column name should be normalised to lowercase")

normalised = d.NormaliseIdentifier("`ColUmn`")
require.Equal(t, "`ColUmn`", normalised, "quoted column name should not be normalised")

normalised = d.NormaliseIdentifier("TaBle.`ColUmn`")
require.Equal(t, "table.`ColUmn`", normalised, "non quoted parts should be normalised")

normalised = d.NormaliseIdentifier("`Sh``EmA`.TABLE.`ColUmn`")
require.Equal(t, "`Sh``EmA`.table.`ColUmn`", normalised, "non quoted parts should be normalised")
})

t.Run("parse relation", func(t *testing.T) {
parsed, err := d.ParseRelationRef(`table`)
require.NoError(t, err)
require.Equal(t, sqlconnect.RelationRef{Name: "table"}, parsed)

parsed, err = d.ParseRelationRef("TABLE")
require.NoError(t, err)
require.Equal(t, sqlconnect.RelationRef{Name: "table"}, parsed)

parsed, err = d.ParseRelationRef("`TaBle`")
require.NoError(t, err)
require.Equal(t, sqlconnect.RelationRef{Name: "TaBle"}, parsed)

parsed, err = d.ParseRelationRef("ScHeMA.`TaBle`")
require.NoError(t, err)
require.Equal(t, sqlconnect.RelationRef{Schema: "schema", Name: "TaBle"}, parsed)

parsed, err = d.ParseRelationRef("`CaTa``LoG`.ScHeMA.`TaBle`")
require.NoError(t, err)
require.Equal(t, sqlconnect.RelationRef{Catalog: "CaTa`LoG", Schema: "schema", Name: "TaBle"}, parsed)
})
}
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,20 @@ func TestDatabaseScenarios(t *testing.T, warehouse string, configJSON json.RawMe
})
})

t.Run("dialect", func(t *testing.T) {
// Create an unquoted table
unquotedTable := "UnQuoted_TablE"
identifier := db.QuoteIdentifier(schema.Name) + "." + unquotedTable
_, err := db.Exec("CREATE TABLE " + identifier + " (c1 int)")
require.NoError(t, err, "it should be able to create an unquoted table")

table, err := db.ParseRelationRef(identifier)
require.NoError(t, err, "it should be able to parse an unquoted table")
exists, err := db.TableExists(ctx, table)
require.NoError(t, err, "it should be able to check if a table exists")
require.True(t, exists, "it should return true for a table that exists")
})

t.Run("table admin", func(t *testing.T) {
table := sqlconnect.NewRelationRef(formatfn("test_table"), sqlconnect.WithSchema(schema.Name))

Expand Down
14 changes: 14 additions & 0 deletions sqlconnect/internal/mysql/dialect.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"strings"

"github.com/rudderlabs/sqlconnect-go/sqlconnect"
"github.com/rudderlabs/sqlconnect-go/sqlconnect/internal/base"
)

type dialect struct{}
Expand All @@ -25,3 +26,16 @@ func (d dialect) QuoteIdentifier(name string) string {
func (d dialect) FormatTableName(name string) string {
return strings.ToLower(name)
}

var identityFn = func(s string) string { return s }

// NormaliseIdentifier normalises identifier parts that are unquoted, typically by lower or upper casing them, depending on the database
func (d dialect) NormaliseIdentifier(identifier string) string {
return base.NormaliseIdentifier(identifier, '`', identityFn)
}

// ParseRelationRef parses a string into a RelationRef after normalising the identifier and stripping out surrounding quotes.
// The result is a RelationRef with case-sensitive fields, i.e. it can be safely quoted (see [QuoteTable] and, for instance, used for matching against the database's information schema.
func (d dialect) ParseRelationRef(identifier string) (sqlconnect.RelationRef, error) {
return base.ParseRelationRef(identifier, '`', identityFn)
}
39 changes: 39 additions & 0 deletions sqlconnect/internal/mysql/dialect_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,43 @@ func TestDialect(t *testing.T) {
quoted = d.QuoteTable(sqlconnect.NewRelationRef("table", sqlconnect.WithSchema("schema")))
require.Equal(t, "`schema`.`table`", quoted, "schema and table name should be quoted with backticks")
})

t.Run("normalise identifier", func(t *testing.T) {
normalised := d.NormaliseIdentifier("column")
require.Equal(t, "column", normalised, "column name should be normalised")

normalised = d.NormaliseIdentifier("COLUMN")
require.Equal(t, "COLUMN", normalised, "column name should be normalised")

normalised = d.NormaliseIdentifier("`ColUmn`")
require.Equal(t, "`ColUmn`", normalised, "quoted column name should not be normalised")

normalised = d.NormaliseIdentifier("TaBle.`ColUmn`")
require.Equal(t, "TaBle.`ColUmn`", normalised, "non quoted parts should be normalised")

normalised = d.NormaliseIdentifier("`Sh``EmA`.TABLE.`ColUmn`")
require.Equal(t, "`Sh``EmA`.TABLE.`ColUmn`", normalised, "non quoted parts should be normalised")
})

t.Run("parse relation", func(t *testing.T) {
parsed, err := d.ParseRelationRef(`table`)
require.NoError(t, err)
require.Equal(t, sqlconnect.RelationRef{Name: "table"}, parsed)

parsed, err = d.ParseRelationRef("TABLE")
require.NoError(t, err)
require.Equal(t, sqlconnect.RelationRef{Name: "TABLE"}, parsed)

parsed, err = d.ParseRelationRef("`TaBle`")
require.NoError(t, err)
require.Equal(t, sqlconnect.RelationRef{Name: "TaBle"}, parsed)

parsed, err = d.ParseRelationRef("ScHeMA.`TaBle`")
require.NoError(t, err)
require.Equal(t, sqlconnect.RelationRef{Schema: "ScHeMA", Name: "TaBle"}, parsed)

parsed, err = d.ParseRelationRef("`CaTa``LoG`.ScHeMA.`TaBle`")
require.NoError(t, err)
require.Equal(t, sqlconnect.RelationRef{Catalog: "CaTa`LoG", Schema: "ScHeMA", Name: "TaBle"}, parsed)
})
}
12 changes: 12 additions & 0 deletions sqlconnect/internal/snowflake/dialect.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"strings"

"github.com/rudderlabs/sqlconnect-go/sqlconnect"
"github.com/rudderlabs/sqlconnect-go/sqlconnect/internal/base"
)

type dialect struct{}
Expand All @@ -25,3 +26,14 @@ func (d dialect) QuoteIdentifier(name string) string {
func (d dialect) FormatTableName(name string) string {
return strings.ToUpper(name)
}

// NormaliseIdentifier normalises identifier parts that are unquoted, typically by lower or upper casing them, depending on the database
func (d dialect) NormaliseIdentifier(identifier string) string {
return base.NormaliseIdentifier(identifier, '"', strings.ToUpper)
}

// ParseRelationRef parses a string into a RelationRef after normalising the identifier and stripping out surrounding quotes.
// The result is a RelationRef with case-sensitive fields, i.e. it can be safely quoted (see [QuoteTable] and, for instance, used for matching against the database's information schema.
func (d dialect) ParseRelationRef(identifier string) (sqlconnect.RelationRef, error) {
return base.ParseRelationRef(identifier, '"', strings.ToUpper)
}
Loading

0 comments on commit ca468c0

Please sign in to comment.