From abd3df2e65bfe1ff1c0e4e6fcddd38ed577b5707 Mon Sep 17 00:00:00 2001 From: Flook Peter Date: Tue, 1 Oct 2024 12:04:00 +0800 Subject: [PATCH] Add in API for data contract cli support --- .../api/MetadataSourceBuilder.scala | 30 +++++++++++++- .../datacaterer/api/model/Constants.scala | 2 + .../api/model/MetadataSourceModels.scala | 8 +++- .../api/MetadataSourceBuilderTest.scala | 41 ++++++++++++++++++- gradle.properties | 2 +- 5 files changed, 77 insertions(+), 6 deletions(-) diff --git a/api/src/main/scala/io/github/datacatering/datacaterer/api/MetadataSourceBuilder.scala b/api/src/main/scala/io/github/datacatering/datacaterer/api/MetadataSourceBuilder.scala index 5b27b8fa..9288f58b 100644 --- a/api/src/main/scala/io/github/datacatering/datacaterer/api/MetadataSourceBuilder.scala +++ b/api/src/main/scala/io/github/datacatering/datacaterer/api/MetadataSourceBuilder.scala @@ -1,9 +1,9 @@ package io.github.datacatering.datacaterer.api import io.github.datacatering.datacaterer.api.converter.Converters.toScalaMap -import io.github.datacatering.datacaterer.api.model.Constants.{DATA_CONTRACT_FILE, GREAT_EXPECTATIONS_FILE, METADATA_SOURCE_URL, OPEN_LINEAGE_DATASET, OPEN_LINEAGE_NAMESPACE, OPEN_METADATA_API_VERSION, OPEN_METADATA_AUTH_TYPE, OPEN_METADATA_AUTH_TYPE_OPEN_METADATA, OPEN_METADATA_DEFAULT_API_VERSION, OPEN_METADATA_HOST, OPEN_METADATA_JWT_TOKEN, SCHEMA_LOCATION} +import io.github.datacatering.datacaterer.api.model.Constants.{DATA_CONTRACT_FILE, DATA_CONTRACT_SCHEMA, GREAT_EXPECTATIONS_FILE, METADATA_SOURCE_URL, OPEN_LINEAGE_DATASET, OPEN_LINEAGE_NAMESPACE, OPEN_METADATA_API_VERSION, OPEN_METADATA_AUTH_TYPE, OPEN_METADATA_AUTH_TYPE_OPEN_METADATA, OPEN_METADATA_DEFAULT_API_VERSION, OPEN_METADATA_HOST, OPEN_METADATA_JWT_TOKEN, SCHEMA_LOCATION} import com.softwaremill.quicklens.ModifyPimp -import io.github.datacatering.datacaterer.api.model.{GreatExpectationsSource, MarquezMetadataSource, MetadataSource, OpenAPISource, OpenDataContractStandardSource, OpenMetadataSource} +import io.github.datacatering.datacaterer.api.model.{DataContractCliSource, GreatExpectationsSource, MarquezMetadataSource, MetadataSource, OpenAPISource, OpenDataContractStandardSource, OpenMetadataSource} case class MetadataSourceBuilder(metadataSource: MetadataSource = MarquezMetadataSource()) { def this() = this(MarquezMetadataSource()) @@ -79,4 +79,30 @@ case class MetadataSourceBuilder(metadataSource: MetadataSource = MarquezMetadat def openDataContractStandard(dataContractFile: String): MetadataSourceBuilder = { this.modify(_.metadataSource).setTo(OpenDataContractStandardSource(Map(DATA_CONTRACT_FILE -> dataContractFile))) } + + def openDataContractStandard(dataContractFile: String, schemaName: String): MetadataSourceBuilder = { + openDataContractStandard(dataContractFile, List(schemaName)) + } + + def openDataContractStandard(dataContractFile: String, schemaNames: List[String]): MetadataSourceBuilder = { + this.modify(_.metadataSource).setTo(OpenDataContractStandardSource(Map( + DATA_CONTRACT_FILE -> dataContractFile, + DATA_CONTRACT_SCHEMA -> schemaNames.mkString(",") + ))) + } + + def dataContractCli(dataContractFile: String): MetadataSourceBuilder = { + this.modify(_.metadataSource).setTo(DataContractCliSource(Map(DATA_CONTRACT_FILE -> dataContractFile))) + } + + def dataContractCli(dataContractFile: String, modelName: String): MetadataSourceBuilder = { + dataContractCli(dataContractFile, List(modelName)) + } + + def dataContractCli(dataContractFile: String, modelNames: List[String]): MetadataSourceBuilder = { + this.modify(_.metadataSource).setTo(DataContractCliSource(Map( + DATA_CONTRACT_FILE -> dataContractFile, + DATA_CONTRACT_SCHEMA -> modelNames.mkString(",") + ))) + } } diff --git a/api/src/main/scala/io/github/datacatering/datacaterer/api/model/Constants.scala b/api/src/main/scala/io/github/datacatering/datacaterer/api/model/Constants.scala index 21146af0..344655d7 100644 --- a/api/src/main/scala/io/github/datacatering/datacaterer/api/model/Constants.scala +++ b/api/src/main/scala/io/github/datacatering/datacaterer/api/model/Constants.scala @@ -55,6 +55,7 @@ object Constants { lazy val SCHEMA_LOCATION = "schemaLocation" lazy val GREAT_EXPECTATIONS_FILE = "expectationsFile" lazy val DATA_CONTRACT_FILE = "dataContractFile" + lazy val DATA_CONTRACT_SCHEMA = "dataContractSchema" lazy val ROWS_PER_SECOND = "rowsPerSecond" lazy val HUDI_TABLE_NAME = "hoodie.table.name" lazy val ICEBERG_CATALOG_TYPE = "catalogType" @@ -318,6 +319,7 @@ object Constants { lazy val OPEN_API = "openApi" lazy val GREAT_EXPECTATIONS = "greatExpectations" lazy val OPEN_DATA_CONTRACT_STANDARD = "openDataContractStandard" + lazy val DATA_CONTRACT_CLI = "dataContractCli" lazy val AMUNDSEN = "amundsen" lazy val DATAHUB = "datahub" lazy val DEFAULT_METADATA_SOURCE_NAME = "defaultMetadataSource" diff --git a/api/src/main/scala/io/github/datacatering/datacaterer/api/model/MetadataSourceModels.scala b/api/src/main/scala/io/github/datacatering/datacaterer/api/model/MetadataSourceModels.scala index a523cef5..d3f33bb3 100644 --- a/api/src/main/scala/io/github/datacatering/datacaterer/api/model/MetadataSourceModels.scala +++ b/api/src/main/scala/io/github/datacatering/datacaterer/api/model/MetadataSourceModels.scala @@ -1,6 +1,6 @@ package io.github.datacatering.datacaterer.api.model -import Constants.{GREAT_EXPECTATIONS, MARQUEZ, METADATA_SOURCE_HAS_OPEN_LINEAGE_SUPPORT, METADATA_SOURCE_TYPE, OPEN_API, OPEN_DATA_CONTRACT_STANDARD, OPEN_METADATA} +import Constants.{DATA_CONTRACT_CLI, GREAT_EXPECTATIONS, MARQUEZ, METADATA_SOURCE_HAS_OPEN_LINEAGE_SUPPORT, METADATA_SOURCE_TYPE, OPEN_API, OPEN_DATA_CONTRACT_STANDARD, OPEN_METADATA} trait MetadataSource { @@ -43,3 +43,9 @@ case class OpenDataContractStandardSource(override val connectionOptions: Map[St override val `type`: String = OPEN_DATA_CONTRACT_STANDARD } + +case class DataContractCliSource(override val connectionOptions: Map[String, String] = Map()) extends MetadataSource { + + override val `type`: String = DATA_CONTRACT_CLI + +} diff --git a/api/src/test/scala/io/github/datacatering/datacaterer/api/MetadataSourceBuilderTest.scala b/api/src/test/scala/io/github/datacatering/datacaterer/api/MetadataSourceBuilderTest.scala index da15745c..2e019291 100644 --- a/api/src/test/scala/io/github/datacatering/datacaterer/api/MetadataSourceBuilderTest.scala +++ b/api/src/test/scala/io/github/datacatering/datacaterer/api/MetadataSourceBuilderTest.scala @@ -1,7 +1,7 @@ package io.github.datacatering.datacaterer.api -import io.github.datacatering.datacaterer.api.model.Constants.{DATA_CONTRACT_FILE, GREAT_EXPECTATIONS_FILE, METADATA_SOURCE_URL, OPEN_LINEAGE_DATASET, OPEN_LINEAGE_NAMESPACE, OPEN_METADATA_API_VERSION, OPEN_METADATA_AUTH_TYPE, OPEN_METADATA_AUTH_TYPE_BASIC, OPEN_METADATA_AUTH_TYPE_OPEN_METADATA, OPEN_METADATA_BASIC_AUTH_PASSWORD, OPEN_METADATA_BASIC_AUTH_USERNAME, OPEN_METADATA_DEFAULT_API_VERSION, OPEN_METADATA_HOST, OPEN_METADATA_JWT_TOKEN, SCHEMA_LOCATION} -import io.github.datacatering.datacaterer.api.model.{GreatExpectationsSource, MarquezMetadataSource, OpenAPISource, OpenDataContractStandardSource, OpenMetadataSource} +import io.github.datacatering.datacaterer.api.model.Constants.{DATA_CONTRACT_FILE, DATA_CONTRACT_SCHEMA, GREAT_EXPECTATIONS_FILE, METADATA_SOURCE_URL, OPEN_LINEAGE_DATASET, OPEN_LINEAGE_NAMESPACE, OPEN_METADATA_API_VERSION, OPEN_METADATA_AUTH_TYPE, OPEN_METADATA_AUTH_TYPE_BASIC, OPEN_METADATA_AUTH_TYPE_OPEN_METADATA, OPEN_METADATA_BASIC_AUTH_PASSWORD, OPEN_METADATA_BASIC_AUTH_USERNAME, OPEN_METADATA_DEFAULT_API_VERSION, OPEN_METADATA_HOST, OPEN_METADATA_JWT_TOKEN, SCHEMA_LOCATION} +import io.github.datacatering.datacaterer.api.model.{DataContractCliSource, GreatExpectationsSource, MarquezMetadataSource, OpenAPISource, OpenDataContractStandardSource, OpenMetadataSource} import org.junit.runner.RunWith import org.scalatest.funsuite.AnyFunSuite import org.scalatestplus.junit.JUnitRunner @@ -66,4 +66,41 @@ class MetadataSourceBuilderTest extends AnyFunSuite { assert(result.asInstanceOf[OpenDataContractStandardSource].connectionOptions == Map(DATA_CONTRACT_FILE -> "/tmp/odcs")) } + test("Can create Open Data Contract Standard metadata source with schema name") { + val result = MetadataSourceBuilder().openDataContractStandard("/tmp/odcs", "accounts").metadataSource + + assert(result.isInstanceOf[OpenDataContractStandardSource]) + assert(result.asInstanceOf[OpenDataContractStandardSource].connectionOptions == Map( + DATA_CONTRACT_FILE -> "/tmp/odcs", + DATA_CONTRACT_SCHEMA -> "accounts" + )) + } + + test("Can create Data Contract CLI metadata source") { + val result = MetadataSourceBuilder().dataContractCli("/tmp/datacli").metadataSource + + assert(result.isInstanceOf[DataContractCliSource]) + assert(result.asInstanceOf[DataContractCliSource].connectionOptions == Map(DATA_CONTRACT_FILE -> "/tmp/datacli")) + } + + test("Can create Data Contract CLI metadata source with schema name") { + val result = MetadataSourceBuilder().dataContractCli("/tmp/datacli", "accounts").metadataSource + + assert(result.isInstanceOf[DataContractCliSource]) + assert(result.asInstanceOf[DataContractCliSource].connectionOptions == Map( + DATA_CONTRACT_FILE -> "/tmp/datacli", + DATA_CONTRACT_SCHEMA -> "accounts" + )) + } + + test("Can create Data Contract CLI metadata source with multiple schema names") { + val result = MetadataSourceBuilder().dataContractCli("/tmp/datacli", List("accounts", "balances")).metadataSource + + assert(result.isInstanceOf[DataContractCliSource]) + assert(result.asInstanceOf[DataContractCliSource].connectionOptions == Map( + DATA_CONTRACT_FILE -> "/tmp/datacli", + DATA_CONTRACT_SCHEMA -> "accounts,balances" + )) + } + } diff --git a/gradle.properties b/gradle.properties index 99420600..36d7718e 100644 --- a/gradle.properties +++ b/gradle.properties @@ -1,5 +1,5 @@ groupId=io.github.data-catering -version=0.11.10 +version=0.11.11 scalaVersion=2.12 scalaSpecificVersion=2.12.19