Skip to content

Commit

Permalink
Add backfill generation methods (#73)
Browse files Browse the repository at this point in the history
## Scope

Part of #61

This PR adds the method that generates backfill queries for the MS SQL source
  • Loading branch information
s-vitaliy authored Nov 4, 2024
1 parent b27b6da commit 35a58e5
Show file tree
Hide file tree
Showing 4 changed files with 55 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
declare @currentVersion bigint = CHANGE_TRACKING_CURRENT_VERSION()

SELECT
{ChangeTrackingColumnsStatement},
@currentVersion AS 'ChangeTrackingVersion',
lower(convert(nvarchar(128), HashBytes('SHA2_256', {MERGE_EXPRESSION}),2)) as [{MERGE_KEY}]
FROM [{dbName}].[{schema}].[{tableName}] tq
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
declare @currentVersion bigint = CHANGE_TRACKING_CURRENT_VERSION()

SELECT
{ChangeTrackingColumnsStatement},
@currentVersion AS 'ChangeTrackingVersion',
lower(convert(nvarchar(128), HashBytes('SHA2_256', {MERGE_EXPRESSION}),2)) as [{MERGE_KEY}],
{DATE_PARTITION_EXPRESSION} as [{DATE_PARTITION_KEY}]
FROM [{dbName}].[{schema}].[{tableName}] tq
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,17 @@ object QueryProvider:
.replace("{schema}", schemaName)
.replace("{table}", tableName)

def getBackfillQuery(msSqlConnection: MsSqlConnection): Future[MsSqlQuery] =
msSqlConnection.getColumnSummaries
.map(columnSummaries => {
val mergeExpression = QueryProvider.getMergeExpression(columnSummaries, "tq")
val columnExpression = QueryProvider.getChangeTrackingColumns(columnSummaries, "ct", "tq")
QueryProvider.getAllQuery(
msSqlConnection.connectionOptions,
mergeExpression,
columnExpression)
})

private def getMergeExpression(cs: List[ColumnSummary], tableAlias: String): String =
cs.filter((name, isPrimaryKey) => isPrimaryKey)
.map((name, _) => s"cast($tableAlias.[$name] as nvarchar(128))")
Expand Down Expand Up @@ -252,3 +263,23 @@ object QueryProvider:
.replace("{DATE_PARTITION_EXPRESSION}", connectionOptions.partitionExpression.getOrElse(""))
.replace("{DATE_PARTITION_KEY}", DATE_PARTITION_KEY)
.replace("{lastId}", changeTrackingId.toString)

private def getAllQuery(connectionOptions: ConnectionOptions,
mergeExpression: String,
columnExpression: String): String = {

val baseQuery = connectionOptions.partitionExpression match {
case Some(_) => Source.fromResource("get_select_all_query_date_partitioned.sql").getLines.mkString("\n")
case None => Source.fromResource("get_select_all_query.sql").getLines.mkString("\n")
}

baseQuery
.replace("{dbName}", connectionOptions.databaseName)
.replace("{schema}", connectionOptions.schemaName)
.replace("{tableName}", connectionOptions.tableName)
.replace("{ChangeTrackingColumnsStatement}", columnExpression)
.replace("{MERGE_EXPRESSION}", mergeExpression)
.replace("{MERGE_KEY}", UPSERT_MERGE_KEY)
.replace("{DATE_PARTITION_EXPRESSION}", connectionOptions.partitionExpression.getOrElse(""))
.replace("{DATE_PARTITION_KEY}", DATE_PARTITION_KEY)
}
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,15 @@ class MsSqlConnectorsTests extends flatspec.AsyncFlatSpec with Matchers:
}
}

"QueryProvider" should "generate backfill query" in withDatabase { dbInfo =>
val connector = MsSqlConnection(dbInfo.connectionOptions)
QueryProvider.getBackfillQuery(connector) map { query =>
query should (
include ("ct.SYS_CHANGE_VERSION") and include ("ARCANE_MERGE_KEY") and include("format(getdate(), 'yyyyMM')")
)
}
}

"MsSqlConnection" should "be able to extract schema column names from the database" in withDatabase { dbInfo =>
val connection = MsSqlConnection(dbInfo.connectionOptions)
connection.getSchema map { schema =>
Expand Down

0 comments on commit 35a58e5

Please sign in to comment.