diff --git a/sqlconnect/db.go b/sqlconnect/db.go index aac2ddf..9126585 100644 --- a/sqlconnect/db.go +++ b/sqlconnect/db.go @@ -101,8 +101,17 @@ type JsonRowMapper interface { type Dialect interface { // QuoteTable quotes a table name QuoteTable(table RelationRef) string + // QuoteIdentifier quotes an identifier, e.g. a column name QuoteIdentifier(name string) string + // FormatTableName formats a table name, typically by lower or upper casing it, depending on the database FormatTableName(name string) string + + // NormaliseIdentifier normalises the identifier's parts that are unquoted, typically by lower or upper casing them, depending on the database + NormaliseIdentifier(identifier string) string + + // ParseRelationRef parses a string into a RelationRef after normalising the identifier and stripping out surrounding quotes. + // The result is a RelationRef with case-sensitive fields, i.e. it can be safely quoted (see [QuoteTable] and used for matching against the database's fields). + ParseRelationRef(identifier string) (RelationRef, error) } diff --git a/sqlconnect/internal/base/dialect.go b/sqlconnect/internal/base/dialect.go index eaf3468..2ad0044 100644 --- a/sqlconnect/internal/base/dialect.go +++ b/sqlconnect/internal/base/dialect.go @@ -26,3 +26,76 @@ func (d dialect) QuoteIdentifier(name string) string { func (d dialect) FormatTableName(name string) string { return strings.ToLower(name) } + +// NormaliseIdentifier normalises identifier parts that are unquoted, typically by lower or upper casing them, depending on the database +func (d dialect) NormaliseIdentifier(identifier string) string { + return NormaliseIdentifier(identifier, '"', strings.ToLower) +} + +// ParseRelationRef parses a string into a RelationRef after normalising the identifier and stripping out surrounding quotes. +// The result is a RelationRef with case-sensitive fields, i.e. it can be safely quoted (see [QuoteTable] and used for matching against the database's fields). +func (d dialect) ParseRelationRef(identifier string) (sqlconnect.RelationRef, error) { + return ParseRelationRef(identifier, '"', strings.ToLower) +} + +func ParseRelationRef(identifier string, quote rune, normF func(string) string) (sqlconnect.RelationRef, error) { + normalised := doNormaliseIdentifier(identifier, quote, normF, true) + parts := strings.Split(normalised, ".") + switch len(parts) { + case 1: + return sqlconnect.RelationRef{Name: parts[0]}, nil + case 2: + return sqlconnect.RelationRef{Schema: parts[0], Name: parts[1]}, nil + case 3: + return sqlconnect.RelationRef{Catalog: parts[0], Schema: parts[1], Name: parts[2]}, nil + default: + return sqlconnect.RelationRef{}, fmt.Errorf("invalid relation reference: %s", identifier) + } +} + +func NormaliseIdentifier(identifier string, quote rune, normF func(string) string) string { + return doNormaliseIdentifier(identifier, quote, normF, false) +} + +func doNormaliseIdentifier(identifier string, quote rune, normF func(string) string, stripQuotes bool) string { + var result strings.Builder + var inQuotedIdentifier bool + var inEscapedQuote bool + next := func(input string, i int) (rune, bool) { + runes := []rune(input) + if len(input) > i+1 { + return runes[i+1], true + } + return 0, false + } + for i, c := range identifier { + if c == quote { + if !stripQuotes { + result.WriteRune(c) + } + if inQuotedIdentifier { + if inEscapedQuote { + inEscapedQuote = false + if stripQuotes { + result.WriteRune(c) + } + } else { + if next, ok := next(identifier, i); ok { + if next == quote { + inEscapedQuote = true + } else { + inQuotedIdentifier = false + } + } + } + } else { + inQuotedIdentifier = true + } + } else if !inQuotedIdentifier { + result.WriteString(normF(string(c))) + } else { + result.WriteRune(c) + } + } + return result.String() +} diff --git a/sqlconnect/internal/base/dialect_test.go b/sqlconnect/internal/base/dialect_test.go index 28f5205..9ea5670 100644 --- a/sqlconnect/internal/base/dialect_test.go +++ b/sqlconnect/internal/base/dialect_test.go @@ -27,4 +27,44 @@ func TestDialect(t *testing.T) { quoted = d.QuoteTable(sqlconnect.NewSchemaTableRef("schema", "table")) require.Equal(t, `"schema"."table"`, quoted, "schema and table name should be quoted with double quotes") }) + + t.Run("normalise identifier", func(t *testing.T) { + normalised := d.NormaliseIdentifier("column") + require.Equal(t, "column", normalised, "column name should be normalised to lowercase") + + normalised = d.NormaliseIdentifier("COLUMN") + require.Equal(t, "column", normalised, "column name should be normalised to lowercase") + + normalised = d.NormaliseIdentifier(`"ColUmn"`) + require.Equal(t, `"ColUmn"`, normalised, "quoted column name should not be normalised") + + normalised = d.NormaliseIdentifier(`TaBle."ColUmn"`) + require.Equal(t, `table."ColUmn"`, normalised, "non quoted parts should be normalised") + + normalised = d.NormaliseIdentifier(`"Sh""EmA".TABLE."ColUmn"`) + require.Equal(t, `"Sh""EmA".table."ColUmn"`, normalised, "non quoted parts should be normalised") + + }) + + t.Run("parse relation", func(t *testing.T) { + parsed, err := d.ParseRelationRef("table") + require.NoError(t, err) + require.Equal(t, sqlconnect.RelationRef{Name: "table"}, parsed) + + parsed, err = d.ParseRelationRef("TABLE") + require.NoError(t, err) + require.Equal(t, sqlconnect.RelationRef{Name: "table"}, parsed) + + parsed, err = d.ParseRelationRef(`"TaBle"`) + require.NoError(t, err) + require.Equal(t, sqlconnect.RelationRef{Name: `TaBle`}, parsed) + + parsed, err = d.ParseRelationRef(`ScHeMA."TaBle"`) + require.NoError(t, err) + require.Equal(t, sqlconnect.RelationRef{Schema: "schema", Name: "TaBle"}, parsed) + + parsed, err = d.ParseRelationRef(`"CaTa""LoG".ScHeMA."TaBle"`) + require.NoError(t, err) + require.Equal(t, sqlconnect.RelationRef{Catalog: "CaTa\"LoG", Schema: "schema", Name: "TaBle"}, parsed) + }) } diff --git a/sqlconnect/internal/bigquery/dialect.go b/sqlconnect/internal/bigquery/dialect.go index 71c9fde..ff65f72 100644 --- a/sqlconnect/internal/bigquery/dialect.go +++ b/sqlconnect/internal/bigquery/dialect.go @@ -4,6 +4,7 @@ import ( "strings" "github.com/rudderlabs/sqlconnect-go/sqlconnect" + "github.com/rudderlabs/sqlconnect-go/sqlconnect/internal/base" ) type dialect struct{} @@ -25,3 +26,14 @@ func (d dialect) QuoteIdentifier(name string) string { func (d dialect) FormatTableName(name string) string { return strings.ToLower(name) } + +// NormaliseIdentifier normalises identifier parts that are unquoted, typically by lower or upper casing them, depending on the database +func (d dialect) NormaliseIdentifier(identifier string) string { + return base.NormaliseIdentifier(identifier, '`', strings.ToLower) +} + +// ParseRelationRef parses a string into a RelationRef after normalising the identifier and stripping out surrounding quotes. +// The result is a RelationRef with case-sensitive fields, i.e. it can be safely quoted (see [QuoteTable] and used for matching against the database's fields). +func (d dialect) ParseRelationRef(identifier string) (sqlconnect.RelationRef, error) { + return base.ParseRelationRef(identifier, '`', strings.ToLower) +} diff --git a/sqlconnect/internal/bigquery/dialect_test.go b/sqlconnect/internal/bigquery/dialect_test.go index 3e245e9..2db37a6 100644 --- a/sqlconnect/internal/bigquery/dialect_test.go +++ b/sqlconnect/internal/bigquery/dialect_test.go @@ -27,4 +27,44 @@ func TestDialect(t *testing.T) { quoted = d.QuoteTable(sqlconnect.NewRelationRef("table", sqlconnect.WithSchema("schema"))) require.Equal(t, "`schema.table`", quoted, "schema and table name should be quoted with backticks") }) + + t.Run("normalise identifier", func(t *testing.T) { + normalised := d.NormaliseIdentifier("column") + require.Equal(t, "column", normalised, "column name should be normalised to lowercase") + + normalised = d.NormaliseIdentifier("COLUMN") + require.Equal(t, "column", normalised, "column name should be normalised to lowercase") + + normalised = d.NormaliseIdentifier("`ColUmn`") + require.Equal(t, "`ColUmn`", normalised, "quoted column name should not be normalised") + + normalised = d.NormaliseIdentifier("TaBle.`ColUmn`") + require.Equal(t, "table.`ColUmn`", normalised, "non quoted parts should be normalised") + + normalised = d.NormaliseIdentifier("`Sh``EmA`.TABLE.`ColUmn`") + require.Equal(t, "`Sh``EmA`.table.`ColUmn`", normalised, "non quoted parts should be normalised") + + }) + + t.Run("parse relation", func(t *testing.T) { + parsed, err := d.ParseRelationRef(`table`) + require.NoError(t, err) + require.Equal(t, sqlconnect.RelationRef{Name: "table"}, parsed) + + parsed, err = d.ParseRelationRef("TABLE") + require.NoError(t, err) + require.Equal(t, sqlconnect.RelationRef{Name: "table"}, parsed) + + parsed, err = d.ParseRelationRef("`TaBle`") + require.NoError(t, err) + require.Equal(t, sqlconnect.RelationRef{Name: "TaBle"}, parsed) + + parsed, err = d.ParseRelationRef("ScHeMA.`TaBle`") + require.NoError(t, err) + require.Equal(t, sqlconnect.RelationRef{Schema: "schema", Name: "TaBle"}, parsed) + + parsed, err = d.ParseRelationRef("`CaTa``LoG`.ScHeMA.`TaBle`") + require.NoError(t, err) + require.Equal(t, sqlconnect.RelationRef{Catalog: "CaTa`LoG", Schema: "schema", Name: "TaBle"}, parsed) + }) } diff --git a/sqlconnect/internal/databricks/dialect.go b/sqlconnect/internal/databricks/dialect.go index 2003de0..ead3fc3 100644 --- a/sqlconnect/internal/databricks/dialect.go +++ b/sqlconnect/internal/databricks/dialect.go @@ -4,6 +4,7 @@ import ( "strings" "github.com/rudderlabs/sqlconnect-go/sqlconnect" + "github.com/rudderlabs/sqlconnect-go/sqlconnect/internal/base" ) type dialect struct{} @@ -25,3 +26,14 @@ func (d dialect) QuoteIdentifier(name string) string { func (d dialect) FormatTableName(name string) string { return strings.ToLower(name) } + +// NormaliseIdentifier normalises identifier parts that are unquoted, typically by lower or upper casing them, depending on the database +func (d dialect) NormaliseIdentifier(identifier string) string { + return base.NormaliseIdentifier(identifier, '`', strings.ToLower) +} + +// ParseRelationRef parses a string into a RelationRef after normalising the identifier and stripping out surrounding quotes. +// The result is a RelationRef with case-sensitive fields, i.e. it can be safely quoted (see [QuoteTable] and used for matching against the database's fields). +func (d dialect) ParseRelationRef(identifier string) (sqlconnect.RelationRef, error) { + return base.ParseRelationRef(identifier, '`', strings.ToLower) +} diff --git a/sqlconnect/internal/databricks/dialect_test.go b/sqlconnect/internal/databricks/dialect_test.go index 33f9866..cc9a8bd 100644 --- a/sqlconnect/internal/databricks/dialect_test.go +++ b/sqlconnect/internal/databricks/dialect_test.go @@ -27,4 +27,44 @@ func TestDialect(t *testing.T) { quoted = d.QuoteTable(sqlconnect.NewRelationRef("table", sqlconnect.WithSchema("schema"))) require.Equal(t, "`schema`.`table`", quoted, "schema and table name should be quoted with backticks") }) + + t.Run("normalise identifier", func(t *testing.T) { + normalised := d.NormaliseIdentifier("column") + require.Equal(t, "column", normalised, "column name should be normalised to lowercase") + + normalised = d.NormaliseIdentifier("COLUMN") + require.Equal(t, "column", normalised, "column name should be normalised to lowercase") + + normalised = d.NormaliseIdentifier("`ColUmn`") + require.Equal(t, "`ColUmn`", normalised, "quoted column name should not be normalised") + + normalised = d.NormaliseIdentifier("TaBle.`ColUmn`") + require.Equal(t, "table.`ColUmn`", normalised, "non quoted parts should be normalised") + + normalised = d.NormaliseIdentifier("`Sh``EmA`.TABLE.`ColUmn`") + require.Equal(t, "`Sh``EmA`.table.`ColUmn`", normalised, "non quoted parts should be normalised") + + }) + + t.Run("parse relation", func(t *testing.T) { + parsed, err := d.ParseRelationRef(`table`) + require.NoError(t, err) + require.Equal(t, sqlconnect.RelationRef{Name: "table"}, parsed) + + parsed, err = d.ParseRelationRef("TABLE") + require.NoError(t, err) + require.Equal(t, sqlconnect.RelationRef{Name: "table"}, parsed) + + parsed, err = d.ParseRelationRef("`TaBle`") + require.NoError(t, err) + require.Equal(t, sqlconnect.RelationRef{Name: "TaBle"}, parsed) + + parsed, err = d.ParseRelationRef("ScHeMA.`TaBle`") + require.NoError(t, err) + require.Equal(t, sqlconnect.RelationRef{Schema: "schema", Name: "TaBle"}, parsed) + + parsed, err = d.ParseRelationRef("`CaTa``LoG`.ScHeMA.`TaBle`") + require.NoError(t, err) + require.Equal(t, sqlconnect.RelationRef{Catalog: "CaTa`LoG", Schema: "schema", Name: "TaBle"}, parsed) + }) } diff --git a/sqlconnect/internal/mysql/dialect.go b/sqlconnect/internal/mysql/dialect.go index 6ff3a02..900b329 100644 --- a/sqlconnect/internal/mysql/dialect.go +++ b/sqlconnect/internal/mysql/dialect.go @@ -4,6 +4,7 @@ import ( "strings" "github.com/rudderlabs/sqlconnect-go/sqlconnect" + "github.com/rudderlabs/sqlconnect-go/sqlconnect/internal/base" ) type dialect struct{} @@ -25,3 +26,14 @@ func (d dialect) QuoteIdentifier(name string) string { func (d dialect) FormatTableName(name string) string { return strings.ToLower(name) } + +// NormaliseIdentifier normalises identifier parts that are unquoted, typically by lower or upper casing them, depending on the database +func (d dialect) NormaliseIdentifier(identifier string) string { + return base.NormaliseIdentifier(identifier, '`', strings.ToLower) +} + +// ParseRelationRef parses a string into a RelationRef after normalising the identifier and stripping out surrounding quotes. +// The result is a RelationRef with case-sensitive fields, i.e. it can be safely quoted (see [QuoteTable] and used for matching against the database's fields). +func (d dialect) ParseRelationRef(identifier string) (sqlconnect.RelationRef, error) { + return base.ParseRelationRef(identifier, '`', strings.ToLower) +} diff --git a/sqlconnect/internal/mysql/dialect_test.go b/sqlconnect/internal/mysql/dialect_test.go index 28ffc84..a5a3064 100644 --- a/sqlconnect/internal/mysql/dialect_test.go +++ b/sqlconnect/internal/mysql/dialect_test.go @@ -27,4 +27,43 @@ func TestDialect(t *testing.T) { quoted = d.QuoteTable(sqlconnect.NewRelationRef("table", sqlconnect.WithSchema("schema"))) require.Equal(t, "`schema`.`table`", quoted, "schema and table name should be quoted with backticks") }) + + t.Run("normalise identifier", func(t *testing.T) { + normalised := d.NormaliseIdentifier("column") + require.Equal(t, "column", normalised, "column name should be normalised to lowercase") + + normalised = d.NormaliseIdentifier("COLUMN") + require.Equal(t, "column", normalised, "column name should be normalised to lowercase") + + normalised = d.NormaliseIdentifier("`ColUmn`") + require.Equal(t, "`ColUmn`", normalised, "quoted column name should not be normalised") + + normalised = d.NormaliseIdentifier("TaBle.`ColUmn`") + require.Equal(t, "table.`ColUmn`", normalised, "non quoted parts should be normalised") + + normalised = d.NormaliseIdentifier("`Sh``EmA`.TABLE.`ColUmn`") + require.Equal(t, "`Sh``EmA`.table.`ColUmn`", normalised, "non quoted parts should be normalised") + }) + + t.Run("parse relation", func(t *testing.T) { + parsed, err := d.ParseRelationRef(`table`) + require.NoError(t, err) + require.Equal(t, sqlconnect.RelationRef{Name: "table"}, parsed) + + parsed, err = d.ParseRelationRef("TABLE") + require.NoError(t, err) + require.Equal(t, sqlconnect.RelationRef{Name: "table"}, parsed) + + parsed, err = d.ParseRelationRef("`TaBle`") + require.NoError(t, err) + require.Equal(t, sqlconnect.RelationRef{Name: "TaBle"}, parsed) + + parsed, err = d.ParseRelationRef("ScHeMA.`TaBle`") + require.NoError(t, err) + require.Equal(t, sqlconnect.RelationRef{Schema: "schema", Name: "TaBle"}, parsed) + + parsed, err = d.ParseRelationRef("`CaTa``LoG`.ScHeMA.`TaBle`") + require.NoError(t, err) + require.Equal(t, sqlconnect.RelationRef{Catalog: "CaTa`LoG", Schema: "schema", Name: "TaBle"}, parsed) + }) } diff --git a/sqlconnect/internal/snowflake/dialect.go b/sqlconnect/internal/snowflake/dialect.go index 867056c..1bf58a8 100644 --- a/sqlconnect/internal/snowflake/dialect.go +++ b/sqlconnect/internal/snowflake/dialect.go @@ -4,6 +4,7 @@ import ( "strings" "github.com/rudderlabs/sqlconnect-go/sqlconnect" + "github.com/rudderlabs/sqlconnect-go/sqlconnect/internal/base" ) type dialect struct{} @@ -25,3 +26,14 @@ func (d dialect) QuoteIdentifier(name string) string { func (d dialect) FormatTableName(name string) string { return strings.ToUpper(name) } + +// NormaliseIdentifier normalises identifier parts that are unquoted, typically by lower or upper casing them, depending on the database +func (d dialect) NormaliseIdentifier(identifier string) string { + return base.NormaliseIdentifier(identifier, '"', strings.ToUpper) +} + +// ParseRelationRef parses a string into a RelationRef after normalising the identifier and stripping out surrounding quotes. +// The result is a RelationRef with case-sensitive fields, i.e. it can be safely quoted (see [QuoteTable] and used for matching against the database's fields). +func (d dialect) ParseRelationRef(identifier string) (sqlconnect.RelationRef, error) { + return base.ParseRelationRef(identifier, '"', strings.ToUpper) +} diff --git a/sqlconnect/internal/snowflake/dialect_test.go b/sqlconnect/internal/snowflake/dialect_test.go index ec66800..d984d78 100644 --- a/sqlconnect/internal/snowflake/dialect_test.go +++ b/sqlconnect/internal/snowflake/dialect_test.go @@ -27,4 +27,43 @@ func TestDialect(t *testing.T) { quoted = d.QuoteTable(sqlconnect.NewRelationRef("table", sqlconnect.WithSchema("schema"))) require.Equal(t, `"schema"."table"`, quoted, "schema and table name should be quoted with double quotes") }) + + t.Run("normalise identifier", func(t *testing.T) { + normalised := d.NormaliseIdentifier("COLUMN") + require.Equal(t, "COLUMN", normalised, "column name should be normalised to uppercase") + + normalised = d.NormaliseIdentifier("column") + require.Equal(t, "COLUMN", normalised, "column name should be normalised to uppercase") + + normalised = d.NormaliseIdentifier(`"ColUmn"`) + require.Equal(t, `"ColUmn"`, normalised, "quoted column name should not be normalised") + + normalised = d.NormaliseIdentifier(`TaBle."ColUmn"`) + require.Equal(t, `TABLE."ColUmn"`, normalised, "non quoted parts should be normalised") + + normalised = d.NormaliseIdentifier(`"Sh""EmA".TABLE."ColUmn"`) + require.Equal(t, `"Sh""EmA".TABLE."ColUmn"`, normalised, "non quoted parts should be normalised") + }) + + t.Run("parse relation", func(t *testing.T) { + parsed, err := d.ParseRelationRef("TABLE") + require.NoError(t, err) + require.Equal(t, sqlconnect.RelationRef{Name: "TABLE"}, parsed) + + parsed, err = d.ParseRelationRef("table") + require.NoError(t, err) + require.Equal(t, sqlconnect.RelationRef{Name: "TABLE"}, parsed) + + parsed, err = d.ParseRelationRef(`"TaBle"`) + require.NoError(t, err) + require.Equal(t, sqlconnect.RelationRef{Name: `TaBle`}, parsed) + + parsed, err = d.ParseRelationRef(`ScHeMA."TaBle"`) + require.NoError(t, err) + require.Equal(t, sqlconnect.RelationRef{Schema: "SCHEMA", Name: "TaBle"}, parsed) + + parsed, err = d.ParseRelationRef(`"CaTa""LoG".ScHeMA."TaBle"`) + require.NoError(t, err) + require.Equal(t, sqlconnect.RelationRef{Catalog: "CaTa\"LoG", Schema: "SCHEMA", Name: "TaBle"}, parsed) + }) }