From 930afaea992c285f707b519f54f1d424e4b4236e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lovro=20Ma=C5=BEgon?= Date: Mon, 15 Jul 2024 20:01:13 +0200 Subject: [PATCH] Schema support (#1702) --- go.mod | 14 +- go.sum | 20 +- pkg/conduit/config.go | 98 ++- pkg/conduit/config_test.go | 12 +- pkg/conduit/entrypoint.go | 3 + pkg/conduit/runtime.go | 222 ++++-- pkg/conduit/runtime_test.go | 13 +- pkg/orchestrator/orchestrator_test.go | 5 +- pkg/plugin/connector/builtin/registry.go | 46 +- pkg/plugin/connector/connutils/schema.go | 96 +++ .../processor/builtin/impl/avro/config.go | 10 +- .../processor/builtin/impl/avro/decode.go | 14 +- .../builtin/impl/avro/decode_examples_test.go | 7 +- .../builtin/impl/avro/decode_paramgen.go | 24 +- .../processor/builtin/impl/avro/encode.go | 12 +- .../builtin/impl/avro/encode_examples_test.go | 9 +- .../builtin/impl/avro/encode_paramgen.go | 36 +- .../builtin/impl/avro/internal/decoder.go | 59 ++ .../builtin/impl/avro/internal/encoder.go | 103 +++ .../encoder_test.go | 42 +- .../avro/schemaregistry/avro/extractor.go | 388 ----------- .../impl/avro/schemaregistry/avro/schema.go | 100 --- .../avro/schemaregistry/avro/schema_test.go | 658 ------------------ .../impl/avro/schemaregistry/avro/traverse.go | 193 ----- .../impl/avro/schemaregistry/avro/union.go | 482 ------------- .../avro/schemaregistry/avro/union_test.go | 155 ----- .../impl/avro/schemaregistry/decoder.go | 90 --- .../impl/avro/schemaregistry/encoder.go | 133 ---- .../builtin/impl/avro/schemaregistry/fake.go | 400 ----------- .../avro/schemaregistry/internal/cache.go | 91 --- .../schemaregistry/internal/cache_test.go | 186 ----- .../avro/schemaregistry/internal/rabin.go | 49 -- .../impl/avro/schemaregistry/schema.go | 45 -- .../builtin/impl/base64/decode_paramgen.go | 6 +- .../builtin/impl/base64/encode_paramgen.go | 6 +- .../impl/custom/javascript_paramgen.go | 9 +- .../processor/builtin/impl/error_paramgen.go | 6 +- .../builtin/impl/field/convert_paramgen.go | 9 +- .../builtin/impl/field/exclude_paramgen.go | 6 +- .../builtin/impl/field/rename_paramgen.go | 6 +- .../builtin/impl/field/set_paramgen.go | 9 +- .../builtin/impl/json/decode_paramgen.go | 6 +- .../builtin/impl/json/encode_paramgen.go | 6 +- .../builtin/impl/unwrap/debezium_paramgen.go | 6 +- .../impl/unwrap/kafka_connect_paramgen.go | 6 +- .../builtin/impl/unwrap/opencdc_paramgen.go | 6 +- .../builtin/impl/webhook/http_paramgen.go | 36 +- pkg/plugin/processor/builtin/registry.go | 4 +- pkg/provisioning/service_test.go | 5 +- .../impl/avro => }/schemaregistry/client.go | 96 ++- .../avro => }/schemaregistry/client_test.go | 13 +- pkg/schemaregistry/fromschema/sr.go | 48 ++ .../registry.go} | 32 +- .../schemaregistrytest/confluent.go} | 4 +- .../schemaregistrytest/inmemory.go | 93 +++ pkg/schemaregistry/toschema/sr.go | 56 ++ 56 files changed, 1010 insertions(+), 3279 deletions(-) create mode 100644 pkg/plugin/connector/connutils/schema.go create mode 100644 pkg/plugin/processor/builtin/impl/avro/internal/decoder.go create mode 100644 pkg/plugin/processor/builtin/impl/avro/internal/encoder.go rename pkg/plugin/processor/builtin/impl/avro/{schemaregistry => internal}/encoder_test.go (72%) delete mode 100644 pkg/plugin/processor/builtin/impl/avro/schemaregistry/avro/extractor.go delete mode 100644 pkg/plugin/processor/builtin/impl/avro/schemaregistry/avro/schema.go delete mode 100644 pkg/plugin/processor/builtin/impl/avro/schemaregistry/avro/schema_test.go delete mode 100644 pkg/plugin/processor/builtin/impl/avro/schemaregistry/avro/traverse.go delete mode 100644 pkg/plugin/processor/builtin/impl/avro/schemaregistry/avro/union.go delete mode 100644 pkg/plugin/processor/builtin/impl/avro/schemaregistry/avro/union_test.go delete mode 100644 pkg/plugin/processor/builtin/impl/avro/schemaregistry/decoder.go delete mode 100644 pkg/plugin/processor/builtin/impl/avro/schemaregistry/encoder.go delete mode 100644 pkg/plugin/processor/builtin/impl/avro/schemaregistry/fake.go delete mode 100644 pkg/plugin/processor/builtin/impl/avro/schemaregistry/internal/cache.go delete mode 100644 pkg/plugin/processor/builtin/impl/avro/schemaregistry/internal/cache_test.go delete mode 100644 pkg/plugin/processor/builtin/impl/avro/schemaregistry/internal/rabin.go delete mode 100644 pkg/plugin/processor/builtin/impl/avro/schemaregistry/schema.go rename pkg/{plugin/processor/builtin/impl/avro => }/schemaregistry/client.go (51%) rename pkg/{plugin/processor/builtin/impl/avro => }/schemaregistry/client_test.go (95%) create mode 100644 pkg/schemaregistry/fromschema/sr.go rename pkg/{plugin/processor/builtin/impl/avro/schemaregistry/internal/rabin_test.go => schemaregistry/registry.go} (54%) rename pkg/{plugin/processor/builtin/impl/avro/schemaregistry/integration.go => schemaregistry/schemaregistrytest/confluent.go} (95%) create mode 100644 pkg/schemaregistry/schemaregistrytest/inmemory.go create mode 100644 pkg/schemaregistry/toschema/sr.go diff --git a/go.mod b/go.mod index 30673aed0..1de45ac12 100644 --- a/go.mod +++ b/go.mod @@ -8,16 +8,17 @@ require ( github.com/Masterminds/sprig/v3 v3.2.3 github.com/NYTimes/gziphandler v1.1.1 github.com/bufbuild/buf v1.34.0 - github.com/conduitio/conduit-commons v0.2.0 + github.com/conduitio/conduit-commons v0.2.1-0.20240708122218-5d1883981cfc github.com/conduitio/conduit-connector-file v0.6.1-0.20240621111422-221c138201d3 github.com/conduitio/conduit-connector-generator v0.6.1-0.20240621111436-e9fa3464f7b2 github.com/conduitio/conduit-connector-kafka v0.8.1-0.20240621111431-87c01cf39a06 github.com/conduitio/conduit-connector-log v0.3.1-0.20240621111440-e2f0f04a35a4 github.com/conduitio/conduit-connector-postgres v0.7.6-0.20240630172132-84b5a6e6104f - github.com/conduitio/conduit-connector-protocol v0.6.1-0.20240619121958-1df466646d01 + github.com/conduitio/conduit-connector-protocol v0.6.1-0.20240705154009-b938cfa7f251 github.com/conduitio/conduit-connector-s3 v0.5.2-0.20240630172807-e278fde1fb46 - github.com/conduitio/conduit-connector-sdk v0.9.2-0.20240628152134-4cafa91a4ded + github.com/conduitio/conduit-connector-sdk v0.9.2-0.20240705162050-971c5f7facc2 github.com/conduitio/conduit-processor-sdk v0.1.2-0.20240516124003-442e4a3f0edd + github.com/conduitio/conduit-schema-registry v0.0.0-20240705193355-7e2064b44e0d github.com/conduitio/yaml/v3 v3.3.0 github.com/dgraph-io/badger/v4 v4.2.0 github.com/dop251/goja v0.0.0-20231027120936-b396bb4c349d @@ -29,14 +30,12 @@ require ( github.com/google/uuid v1.6.0 github.com/gorilla/websocket v1.5.3 github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0 - github.com/hamba/avro/v2 v2.22.1 github.com/hashicorp/go-hclog v1.6.3 github.com/hashicorp/go-plugin v1.6.1 github.com/jackc/pgx/v5 v5.6.0 github.com/jpillora/backoff v1.0.0 - github.com/lovromazgon/franz-go/pkg/sr v0.0.0-20230630140346-bb9ce3f90f4a github.com/matryer/is v1.4.1 - github.com/modern-go/reflect2 v1.0.2 + github.com/neilotoole/slogt v1.1.0 github.com/peterbourgon/ff/v3 v3.4.0 github.com/piotrkowalczuk/promgrpc/v4 v4.1.3 github.com/prometheus/client_golang v1.19.1 @@ -45,6 +44,7 @@ require ( github.com/rs/zerolog v1.33.0 github.com/stealthrocket/wazergo v0.19.1 github.com/tetratelabs/wazero v1.7.3 + github.com/twmb/franz-go/pkg/sr v1.0.0 github.com/twmb/go-cache v1.2.1 go.uber.org/mock v0.4.0 golang.org/x/exp v0.0.0-20240613232115-7f521ea00fb8 @@ -187,6 +187,7 @@ require ( github.com/gostaticanalysis/comment v1.4.2 // indirect github.com/gostaticanalysis/forcetypeassert v0.1.0 // indirect github.com/gostaticanalysis/nilerr v0.1.1 // indirect + github.com/hamba/avro/v2 v2.22.1 // indirect github.com/hashicorp/go-version v1.7.0 // indirect github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect github.com/hashicorp/hcl v1.0.0 // indirect @@ -240,6 +241,7 @@ require ( github.com/moby/docker-image-spec v1.3.1 // indirect github.com/moby/term v0.5.0 // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect + github.com/modern-go/reflect2 v1.0.2 // indirect github.com/moricho/tparallel v0.3.1 // indirect github.com/morikuni/aec v1.0.0 // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect diff --git a/go.sum b/go.sum index 8ea6882c8..749549565 100644 --- a/go.sum +++ b/go.sum @@ -211,8 +211,8 @@ github.com/ckaznocha/intrange v0.1.2/go.mod h1:RWffCw/vKBwHeOEwWdCikAtY0q4gGt8Vh github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= github.com/colinmarc/hdfs/v2 v2.1.1/go.mod h1:M3x+k8UKKmxtFu++uAZ0OtDU8jR3jnaZIAc6yK4Ue0c= -github.com/conduitio/conduit-commons v0.2.0 h1:TMpVGXi0Wski537qLAyQWdGjuGHEhaZxOS5L90pZJSQ= -github.com/conduitio/conduit-commons v0.2.0/go.mod h1:i7Q2jm7FBSi2zj1/4MCsFD1hIKAbvamlNtSQfkhUTiY= +github.com/conduitio/conduit-commons v0.2.1-0.20240708122218-5d1883981cfc h1:q2o3R/cXbQBUrJFBHV1YietwB275rBjkmPu1njzV4bk= +github.com/conduitio/conduit-commons v0.2.1-0.20240708122218-5d1883981cfc/go.mod h1:oF0KZc+TiVGBdKoMLLQ9Otb83Shq4CRZ2hClhNlFRio= github.com/conduitio/conduit-connector-file v0.6.1-0.20240621111422-221c138201d3 h1:/mdy7vQzdfqDFLM13M39CYwI6Pk7xClMVZpGQW3+5DQ= github.com/conduitio/conduit-connector-file v0.6.1-0.20240621111422-221c138201d3/go.mod h1:bCnmA+29l871cNhroZfiCS2O8+GhBNVECfL5DOof2ew= github.com/conduitio/conduit-connector-generator v0.6.1-0.20240621111436-e9fa3464f7b2 h1:WMKvmvaE/E+03/0nz/2JpyelCd2nPtOTuBy3eyWcI58= @@ -223,14 +223,16 @@ github.com/conduitio/conduit-connector-log v0.3.1-0.20240621111440-e2f0f04a35a4 github.com/conduitio/conduit-connector-log v0.3.1-0.20240621111440-e2f0f04a35a4/go.mod h1:6IkveRPUPJDCtdH6vXOW1T+B8Vj99OA+szybqYSnlyY= github.com/conduitio/conduit-connector-postgres v0.7.6-0.20240630172132-84b5a6e6104f h1:p8CH8UlYkOSlqOREJtUW9eHm6fyn3M+5b0lUQByMVvg= github.com/conduitio/conduit-connector-postgres v0.7.6-0.20240630172132-84b5a6e6104f/go.mod h1:2v+hTwyTZFjM9evlMv6Id9M/rVuCZgzUnA3szRnWOiI= -github.com/conduitio/conduit-connector-protocol v0.6.1-0.20240619121958-1df466646d01 h1:sZA0aZpZlleULAu+KQYL+WAapXdJNzV3XnSJmwAF0Mg= -github.com/conduitio/conduit-connector-protocol v0.6.1-0.20240619121958-1df466646d01/go.mod h1:3R3eUxN/Z3O3jR1TcfFb9zeGWpiDLvpSOlSWUVa8KsI= +github.com/conduitio/conduit-connector-protocol v0.6.1-0.20240705154009-b938cfa7f251 h1:X/dY6GJ8PxIDPgqpWO0bZqBoHrBUVA+8x//tO50PQMk= +github.com/conduitio/conduit-connector-protocol v0.6.1-0.20240705154009-b938cfa7f251/go.mod h1:LDGRw1uphxd0MNaF9NbLUbFwoJWS+GehsX4eQYau6f4= github.com/conduitio/conduit-connector-s3 v0.5.2-0.20240630172807-e278fde1fb46 h1:tur/pSyX1RLzkxiBwhsV1qa6wP60pb20hJMptH5RRJY= github.com/conduitio/conduit-connector-s3 v0.5.2-0.20240630172807-e278fde1fb46/go.mod h1:m+pf2cMF+qCwhMj9gUBV1BPGLPYauhtYkj2zFddfvdE= -github.com/conduitio/conduit-connector-sdk v0.9.2-0.20240628152134-4cafa91a4ded h1:qsc8PuCkWY5G4IeQW1fZQOVWUzB8DoVzFat41BPYf68= -github.com/conduitio/conduit-connector-sdk v0.9.2-0.20240628152134-4cafa91a4ded/go.mod h1:hCmuIMKtYqFnLZWNK343dtQEZJIp+wv/0Qck9N+q+oY= +github.com/conduitio/conduit-connector-sdk v0.9.2-0.20240705162050-971c5f7facc2 h1:L7RrVxEn7qlAzDKzSO+bu0N371MwVlwx3SOdnCiYSfU= +github.com/conduitio/conduit-connector-sdk v0.9.2-0.20240705162050-971c5f7facc2/go.mod h1:8gTzxxOZ8tRf7XvWxIDviLLcOPa3oXlVueleYYTZzNs= github.com/conduitio/conduit-processor-sdk v0.1.2-0.20240516124003-442e4a3f0edd h1:R+tpcZKWOnr6LRsXr85C167SK9MhaLhYUEjBSUupU9Y= github.com/conduitio/conduit-processor-sdk v0.1.2-0.20240516124003-442e4a3f0edd/go.mod h1:E9zqj0atY1+yBHWi4eZ3TagCZSBnFxBQBUcZktL6RFE= +github.com/conduitio/conduit-schema-registry v0.0.0-20240705193355-7e2064b44e0d h1:C6wRzdyqdQQCL/lCruAsH0j1JoN2GEZBQdDHoxA2B0o= +github.com/conduitio/conduit-schema-registry v0.0.0-20240705193355-7e2064b44e0d/go.mod h1:bw7SeE1nFhPIxDo5PcmjZySvh7f4iQkNWS/K5CBkGrg= github.com/conduitio/yaml/v3 v3.3.0 h1:kbbaOSHcuH39gP4+rgbJGl6DSbLZcJgEaBvkEXJlCsI= github.com/conduitio/yaml/v3 v3.3.0/go.mod h1:JNgFMOX1t8W4YJuRZOh6GggVtSMsgP9XgTw+7dIenpc= github.com/containerd/log v0.1.0 h1:TCJt7ioM2cr/tfR8GPbGf9/VRAX8D2B4PjzCpfX540I= @@ -592,8 +594,6 @@ github.com/ldez/tagliatelle v0.5.0/go.mod h1:rj1HmWiL1MiKQuOONhd09iySTEkUuE/8+5j github.com/ledongthuc/pdf v0.0.0-20220302134840-0c2507a12d80/go.mod h1:imJHygn/1yfhB7XSJJKlFZKl/J+dCPAknuiaGOshXAs= github.com/leonklingele/grouper v1.1.2 h1:o1ARBDLOmmasUaNDesWqWCIFH3u7hoFlM84YrjT3mIY= github.com/leonklingele/grouper v1.1.2/go.mod h1:6D0M/HVkhs2yRKRFZUoGjeDy7EZTfFBE9gl4kjmIGkA= -github.com/lovromazgon/franz-go/pkg/sr v0.0.0-20230630140346-bb9ce3f90f4a h1:TrxQUmJBE1pZsnTW3rqG5Fsx3Xz0wGm5xgqLDV/mMGk= -github.com/lovromazgon/franz-go/pkg/sr v0.0.0-20230630140346-bb9ce3f90f4a/go.mod h1:iz9EnaFViALD6sVqxYHs8BPC0ZEQtfhTpN7SG5b0Nqo= github.com/lufeee/execinquery v1.2.1 h1:hf0Ems4SHcUGBxpGN7Jz78z1ppVkP/837ZlETPCEtOM= github.com/lufeee/execinquery v1.2.1/go.mod h1:EC7DrEKView09ocscGHC+apXMIaorh4xqSxS/dy8SbM= github.com/macabu/inamedparam v0.1.3 h1:2tk/phHkMlEL/1GNe/Yf6kkR/hkcUdAEY3L0hjYV1Mk= @@ -658,6 +658,8 @@ github.com/nakabonne/nestif v0.3.1/go.mod h1:9EtoZochLn5iUprVDmDjqGKPofoUEBL8U4N github.com/ncruces/go-strftime v0.1.9 h1:bY0MQC28UADQmHmaF5dgpLmImcShSi2kHU9XLdhx/f4= github.com/ncruces/go-strftime v0.1.9/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls= github.com/ncw/swift v1.0.52/go.mod h1:23YIA4yWVnGwv2dQlN4bB7egfYX6YLn0Yo/S6zZO/ZM= +github.com/neilotoole/slogt v1.1.0 h1:c7qE92sq+V0yvCuaxph+RQ2jOKL61c4hqS1Bv9W7FZE= +github.com/neilotoole/slogt v1.1.0/go.mod h1:RCrGXkPc/hYybNulqQrMHRtvlQ7F6NktNVLuLwk6V+w= github.com/nishanths/exhaustive v0.12.0 h1:vIY9sALmw6T/yxiASewa4TQcFsVYZQQRUQJhKRf3Swg= github.com/nishanths/exhaustive v0.12.0/go.mod h1:mEZ95wPIZW+x8kC4TgC+9YCUgiST7ecevsVDTgc2obs= github.com/nishanths/predeclared v0.2.2 h1:V2EPdZPliZymNAn79T8RkNApBjMmVKh5XRpLm/w98Vk= @@ -847,6 +849,8 @@ github.com/twmb/franz-go/pkg/kadm v1.12.0 h1:I8P/gpXFzhl73QcAYmJu+1fOXvrynyH/MAo github.com/twmb/franz-go/pkg/kadm v1.12.0/go.mod h1:VMvpfjz/szpH9WB+vGM+rteTzVv0djyHFimci9qm2C0= github.com/twmb/franz-go/pkg/kmsg v1.8.0 h1:lAQB9Z3aMrIP9qF9288XcFf/ccaSxEitNA1CDTEIeTA= github.com/twmb/franz-go/pkg/kmsg v1.8.0/go.mod h1:HzYEb8G3uu5XevZbtU0dVbkphaKTHk0X68N5ka4q6mU= +github.com/twmb/franz-go/pkg/sr v1.0.0 h1:4FUatTSTEuG2xievT0iDrgnpErgRg7kFLNioJYqfrqs= +github.com/twmb/franz-go/pkg/sr v1.0.0/go.mod h1:aUFRRLI5WYKpKzmWDztzZFecx5eOkCNuuamd91jUV5c= github.com/twmb/go-cache v1.2.1 h1:yUkLutow4S2x5NMbqFW24o14OsucoFI5Fzmlb6uBinM= github.com/twmb/go-cache v1.2.1/go.mod h1:lArg9KhCl+GTFMikitLGhIBh/i11OK0lhSveqlMbbrY= github.com/ultraware/funlen v0.1.0 h1:BuqclbkY6pO+cvxoq7OsktIXZpgBSkYTQtmwhAK81vI= diff --git a/pkg/conduit/config.go b/pkg/conduit/config.go index 280faea09..71b72257c 100644 --- a/pkg/conduit/config.go +++ b/pkg/conduit/config.go @@ -17,6 +17,7 @@ package conduit import ( "os" + sdk "github.com/conduitio/conduit-connector-sdk" "github.com/conduitio/conduit/pkg/foundation/cerrors" "github.com/conduitio/conduit/pkg/foundation/database" "github.com/conduitio/conduit/pkg/foundation/log" @@ -29,6 +30,9 @@ const ( DBTypePostgres = "postgres" DBTypeInMemory = "inmemory" DBTypeSQLite = "sqlite" + + SchemaRegistryTypeConfluent = "confluent" + SchemaRegistryTypeBuiltin = "builtin" ) // Config holds all configurable values for Conduit. @@ -81,18 +85,26 @@ type Config struct { ExitOnError bool } - PluginDispenserFactories map[string]builtin.DispenserFactory + ConnectorPlugins map[string]sdk.Connector dev struct { cpuprofile string memprofile string blockprofile string } + + SchemaRegistry struct { + Type string + + Confluent struct { + ConnectionString string + } + } } func DefaultConfig() Config { var cfg Config - cfg.DB.Type = "badger" + cfg.DB.Type = DBTypeBadger cfg.DB.Badger.Path = "conduit.db" cfg.DB.Postgres.Table = "conduit_kv_store" cfg.DB.SQLite.Path = "conduit.db" @@ -105,11 +117,56 @@ func DefaultConfig() Config { cfg.Connectors.Path = "./connectors" cfg.Processors.Path = "./processors" cfg.Pipelines.Path = "./pipelines" + cfg.SchemaRegistry.Type = SchemaRegistryTypeBuiltin - cfg.PluginDispenserFactories = builtin.DefaultDispenserFactories + cfg.ConnectorPlugins = builtin.DefaultBuiltinConnectors return cfg } +func (c Config) validateDBConfig() error { + if c.DB.Driver == nil { + switch c.DB.Type { + case DBTypeBadger: + if c.DB.Badger.Path == "" { + return requiredConfigFieldErr("db.badger.path") + } + case DBTypePostgres: + if c.DB.Postgres.ConnectionString == "" { + return requiredConfigFieldErr("db.postgres.connection-string") + } + if c.DB.Postgres.Table == "" { + return requiredConfigFieldErr("db.postgres.table") + } + case DBTypeInMemory: + // all good + case DBTypeSQLite: + if c.DB.SQLite.Path == "" { + return requiredConfigFieldErr("db.sqlite.path") + } + if c.DB.SQLite.Table == "" { + return requiredConfigFieldErr("db.sqlite.table") + } + default: + return invalidConfigFieldErr("db.type") + } + } + return nil +} + +func (c Config) validateSchemaRegistryConfig() error { + switch c.SchemaRegistry.Type { + case SchemaRegistryTypeConfluent: + if c.SchemaRegistry.Confluent.ConnectionString == "" { + return requiredConfigFieldErr("schema-registry.confluent.connection-string") + } + case SchemaRegistryTypeBuiltin: + // all good + default: + return invalidConfigFieldErr("schema-registry.type") + } + return nil +} + func (c Config) Validate() error { // TODO simplify validation with struct tags @@ -117,6 +174,10 @@ func (c Config) Validate() error { return err } + if err := c.validateSchemaRegistryConfig(); err != nil { + return err + } + if c.API.Enabled { if c.API.GRPC.Address == "" { return requiredConfigFieldErr("grpc.address") @@ -154,37 +215,6 @@ func (c Config) Validate() error { return nil } -func (c Config) validateDBConfig() error { - if c.DB.Driver == nil { - switch c.DB.Type { - case DBTypeBadger: - if c.DB.Badger.Path == "" { - return requiredConfigFieldErr("db.badger.path") - } - case DBTypePostgres: - if c.DB.Postgres.ConnectionString == "" { - return requiredConfigFieldErr("db.postgres.connection-string") - } - if c.DB.Postgres.Table == "" { - return requiredConfigFieldErr("db.postgres.table") - } - case DBTypeInMemory: - // all good - case DBTypeSQLite: - if c.DB.SQLite.Path == "" { - return requiredConfigFieldErr("db.sqlite.path") - } - if c.DB.SQLite.Table == "" { - return requiredConfigFieldErr("db.sqlite.table") - } - default: - return invalidConfigFieldErr("db.type") - } - } - - return nil -} - func invalidConfigFieldErr(name string) error { return cerrors.Errorf("%q config value is invalid", name) } diff --git a/pkg/conduit/config_test.go b/pkg/conduit/config_test.go index 156c9ffac..43b890733 100644 --- a/pkg/conduit/config_test.go +++ b/pkg/conduit/config_test.go @@ -149,18 +149,8 @@ func TestConfig_Validate(t *testing.T) { for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { - var validConfig Config - validConfig.DB.Type = DBTypeInMemory - validConfig.DB.Badger.Path = "conduit.app" - validConfig.DB.Postgres.Table = "conduit_kv_store" + validConfig := DefaultConfig() validConfig.DB.Postgres.ConnectionString = "postgres://user:pass@localhost:5432/mydb?sslmode=disable" - validConfig.API.Enabled = true - validConfig.API.HTTP.Address = ":8080" - validConfig.API.GRPC.Address = ":8084" - validConfig.Log.Level = "info" - validConfig.Log.Format = "cli" - validConfig.Pipelines.Path = "./pipelines" - underTest := tc.setupConfig(validConfig) got := underTest.Validate() if got == nil { diff --git a/pkg/conduit/entrypoint.go b/pkg/conduit/entrypoint.go index 2b4771c1f..145162f6d 100644 --- a/pkg/conduit/entrypoint.go +++ b/pkg/conduit/entrypoint.go @@ -96,6 +96,9 @@ func (*Entrypoint) Flags(cfg *Config) *flag.FlagSet { flags.StringVar(&cfg.Pipelines.Path, "pipelines.path", cfg.Pipelines.Path, "path to the directory that has the yaml pipeline configuration files, or a single pipeline configuration file") flags.BoolVar(&cfg.Pipelines.ExitOnError, "pipelines.exit-on-error", cfg.Pipelines.ExitOnError, "exit Conduit if a pipeline experiences an error while running") + flags.StringVar(&cfg.SchemaRegistry.Type, "schema-registry.type", cfg.SchemaRegistry.Type, "schema registry type; accepts builtin,confluent") + flags.StringVar(&cfg.SchemaRegistry.Confluent.ConnectionString, "schema-registry.confluent.connection-string", cfg.SchemaRegistry.Confluent.ConnectionString, "confluent schema registry connection string") + // NB: flags with prefix dev.* are hidden from help output by default, they only show up using '-dev -help' showDevHelp := flags.Bool("dev", false, "used together with the dev flag it shows dev flags") flags.StringVar(&cfg.dev.cpuprofile, "dev.cpuprofile", "", "write cpu profile to file") diff --git a/pkg/conduit/runtime.go b/pkg/conduit/runtime.go index 973cd0735..3839d6ccd 100644 --- a/pkg/conduit/runtime.go +++ b/pkg/conduit/runtime.go @@ -28,6 +28,9 @@ import ( "strings" "time" + pconduitserver "github.com/conduitio/conduit-connector-protocol/pconduit/v1/server" + conduitv1 "github.com/conduitio/conduit-connector-protocol/proto/conduit/v1" + conduitschemaregistry "github.com/conduitio/conduit-schema-registry" "github.com/conduitio/conduit/pkg/connector" "github.com/conduitio/conduit/pkg/foundation/cerrors" "github.com/conduitio/conduit/pkg/foundation/ctxutil" @@ -45,12 +48,14 @@ import ( "github.com/conduitio/conduit/pkg/pipeline" conn_plugin "github.com/conduitio/conduit/pkg/plugin/connector" conn_builtin "github.com/conduitio/conduit/pkg/plugin/connector/builtin" + "github.com/conduitio/conduit/pkg/plugin/connector/connutils" conn_standalone "github.com/conduitio/conduit/pkg/plugin/connector/standalone" proc_plugin "github.com/conduitio/conduit/pkg/plugin/processor" proc_builtin "github.com/conduitio/conduit/pkg/plugin/processor/builtin" proc_standalone "github.com/conduitio/conduit/pkg/plugin/processor/standalone" "github.com/conduitio/conduit/pkg/processor" "github.com/conduitio/conduit/pkg/provisioning" + "github.com/conduitio/conduit/pkg/schemaregistry" "github.com/conduitio/conduit/pkg/web/api" "github.com/conduitio/conduit/pkg/web/openapi" "github.com/conduitio/conduit/pkg/web/ui" @@ -60,11 +65,11 @@ import ( promclient "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promhttp" "github.com/rs/zerolog" + "github.com/twmb/franz-go/pkg/sr" "google.golang.org/grpc" "google.golang.org/grpc/credentials/insecure" "google.golang.org/grpc/health/grpc_health_v1" "google.golang.org/grpc/reflection" - "google.golang.org/grpc/stats" "gopkg.in/tomb.v2" ) @@ -89,8 +94,13 @@ type Runtime struct { connectorPluginService *conn_plugin.PluginService processorPluginService *proc_plugin.PluginService + schemaRegistry schemaregistry.Registry + connSchemaService *connutils.SchemaService + connectorPersister *connector.Persister - logger log.CtxLogger + + logger log.CtxLogger + gRPCStatsHandler *promgrpc.StatsHandler } // NewRuntime sets up a Runtime instance and primes it for start. @@ -133,35 +143,85 @@ func NewRuntime(cfg Config) (*Runtime, error) { connector.DefaultPersisterBundleCountThreshold, ) - // Create all necessary internal services - plService, connService, procService, connPluginService, procPluginService, err := newServices(logger, db, connectorPersister, cfg) + r := &Runtime{ + Config: cfg, + DB: db, + Ready: make(chan struct{}), + + connectorPersister: connectorPersister, + + gRPCStatsHandler: newGRPCStatsHandler(), + logger: logger, + } + + err := initServices(r) if err != nil { - return nil, cerrors.Errorf("failed to create services: %w", err) + return nil, cerrors.Errorf("failed to initialize services: %w", err) } - provisionService := provisioning.NewService(db, logger, plService, connService, procService, connPluginService, cfg.Pipelines.Path) + return r, nil +} - orc := orchestrator.NewOrchestrator(db, logger, plService, connService, procService, connPluginService, procPluginService) +// Create all necessary internal services +func initServices(r *Runtime) error { + standaloneReg, err := proc_standalone.NewRegistry(r.logger, r.Config.Processors.Path) + if err != nil { + return cerrors.Errorf("failed creating processor registry: %w", err) + } - r := &Runtime{ - Config: cfg, - DB: db, - Orchestrator: orc, - ProvisionService: provisionService, - Ready: make(chan struct{}), + procPluginService := proc_plugin.NewPluginService( + r.logger, + proc_builtin.NewRegistry(r.logger, proc_builtin.DefaultBuiltinProcessors), + standaloneReg, + ) + + var schemaRegistry schemaregistry.Registry + switch r.Config.SchemaRegistry.Type { + case SchemaRegistryTypeConfluent: + schemaRegistry, err = schemaregistry.NewClient(r.logger, sr.URLs(r.Config.SchemaRegistry.Confluent.ConnectionString)) + if err != nil { + return cerrors.Errorf("failed to create schema registry client: %w", err) + } + case SchemaRegistryTypeBuiltin: + schemaRegistry = conduitschemaregistry.NewSchemaRegistry() + default: + // shouldn't happen, we validate the config + return cerrors.Errorf("invalid schema registry type %q", r.Config.SchemaRegistry.Type) + } - pipelineService: plService, - connectorService: connService, - processorService: procService, + connSchemaService := connutils.NewSchemaService(r.logger, schemaRegistry) + connPluginService := conn_plugin.NewPluginService( + r.logger, + conn_builtin.NewRegistry(r.logger, r.Config.ConnectorPlugins, connSchemaService), + conn_standalone.NewRegistry(r.logger, r.Config.Connectors.Path), + ) - connectorPluginService: connPluginService, - processorPluginService: procPluginService, + plService := pipeline.NewService(r.logger, r.DB) + connService := connector.NewService(r.logger, r.DB, r.connectorPersister) + procService := processor.NewService(r.logger, r.DB, procPluginService) - connectorPersister: connectorPersister, + provisionService := provisioning.NewService(r.DB, r.logger, plService, connService, procService, connPluginService, r.Config.Pipelines.Path) - logger: logger, - } - return r, nil + orc := orchestrator.NewOrchestrator(r.DB, r.logger, plService, connService, procService, connPluginService, procPluginService) + + r.Orchestrator = orc + r.ProvisionService = provisionService + r.pipelineService = plService + r.connectorService = connService + r.processorService = procService + r.connectorPluginService = connPluginService + r.processorPluginService = procPluginService + r.schemaRegistry = schemaRegistry + r.connSchemaService = connSchemaService + + return nil +} + +func newGRPCStatsHandler() *promgrpc.StatsHandler { + h := promgrpc.ServerStatsHandler() + promclient.MustRegister(h) + + return h } func newLogger(level string, format string) log.CtxLogger { @@ -183,36 +243,6 @@ func configurePrometheus() { metrics.Register(registry) } -func newServices( - logger log.CtxLogger, - db database.DB, - connPersister *connector.Persister, - cfg Config, -) (*pipeline.Service, *connector.Service, *processor.Service, *conn_plugin.PluginService, *proc_plugin.PluginService, error) { - standaloneReg, err := proc_standalone.NewRegistry(logger, cfg.Processors.Path) - if err != nil { - return nil, nil, nil, nil, nil, cerrors.Errorf("failed creating processor registry: %w", err) - } - - procPluginService := proc_plugin.NewPluginService( - logger, - proc_builtin.NewRegistry(logger, proc_builtin.DefaultBuiltinProcessors), - standaloneReg, - ) - - connPluginService := conn_plugin.NewPluginService( - logger, - conn_builtin.NewRegistry(logger, cfg.PluginDispenserFactories), - conn_standalone.NewRegistry(logger, cfg.Connectors.Path), - ) - - pipelineService := pipeline.NewService(logger, db) - connectorService := connector.NewService(logger, db, connPersister) - processorService := processor.NewService(logger, db, procPluginService) - - return pipelineService, connectorService, processorService, connPluginService, procPluginService, nil -} - // Run initializes all of Conduit's underlying services and starts the GRPC and // HTTP APIs. This function blocks until the supplied context is cancelled or // one of the services experiences a fatal error. @@ -285,6 +315,13 @@ func (r *Runtime) Run(ctx context.Context) (err error) { }) } + // APIs needed by connector plugins + _, err = r.startConnectorUtils(ctx, t) + if err != nil { + return cerrors.Errorf("failed to start connector utilities: %w", err) + } + + // Public gRPC and HTTP API if r.Config.API.Enabled { // Serve grpc and http API grpcAddr, err := r.serveGRPCAPI(ctx, t) @@ -402,21 +439,6 @@ func (r *Runtime) registerCleanup(t *tomb.Tomb) { }) } -func (r *Runtime) newGrpcStatsHandler() stats.Handler { - // We are manually creating the stats handler and not using - // promgrpc.ServerStatsHandler(), because we don't need metrics related to - // messages. They would be relevant for GRPC streams, we don't use them. - grpcStatsHandler := promgrpc.NewStatsHandler( - promgrpc.NewServerConnectionsStatsHandler(promgrpc.NewServerConnectionsGaugeVec()), - promgrpc.NewServerRequestsTotalStatsHandler(promgrpc.NewServerRequestsTotalCounterVec()), - promgrpc.NewServerRequestsInFlightStatsHandler(promgrpc.NewServerRequestsInFlightGaugeVec()), - promgrpc.NewServerRequestDurationStatsHandler(promgrpc.NewServerRequestDurationHistogramVec()), - promgrpc.NewServerResponsesTotalStatsHandler(promgrpc.NewServerResponsesTotalCounterVec()), - ) - promclient.MustRegister(grpcStatsHandler) - return grpcStatsHandler -} - func (r *Runtime) newHTTPMetricsHandler() http.Handler { return promhttp.Handler() } @@ -427,7 +449,7 @@ func (r *Runtime) serveGRPCAPI(ctx context.Context, t *tomb.Tomb) (net.Addr, err grpcutil.RequestIDUnaryServerInterceptor(r.logger), grpcutil.LoggerUnaryServerInterceptor(r.logger), ), - grpc.StatsHandler(r.newGrpcStatsHandler()), + grpc.StatsHandler(r.gRPCStatsHandler), ) pipelineAPIv1 := api.NewPipelineAPIv1(r.Orchestrator.Pipelines) @@ -463,15 +485,58 @@ func (r *Runtime) serveGRPCAPI(ctx context.Context, t *tomb.Tomb) (net.Addr, err grpc_health_v1.RegisterHealthServer(grpcServer, healthServer) // serve grpc server - return r.serveGRPC(ctx, t, grpcServer) + addr, err := r.serveGRPC(ctx, t, grpcServer, r.Config.API.GRPC.Address) + if err != nil { + return nil, err + } + + r.logger.Info(ctx).Str(log.ServerAddressField, addr.String()).Msg("grpc API started") + return addr, nil +} + +// startConnectorUtils starts all the utility services needed by connectors. +func (r *Runtime) startConnectorUtils(ctx context.Context, t *tomb.Tomb) (net.Addr, error) { + grpcServer := grpc.NewServer( + grpc.ChainUnaryInterceptor( + grpcutil.RequestIDUnaryServerInterceptor(r.logger), + grpcutil.LoggerUnaryServerInterceptor(r.logger), + ), + grpc.StatsHandler(r.gRPCStatsHandler), + ) + + schemaServiceAPI := pconduitserver.NewSchemaServiceServer(r.connSchemaService) + conduitv1.RegisterSchemaServiceServer(grpcServer, schemaServiceAPI) + + // Makes it easier to use command line tools to interact + // with the gRPC API. + // https://github.com/grpc/grpc/blob/master/doc/server-reflection.md + reflection.Register(grpcServer) + + // Names taken from schema.proto + healthServer := api.NewHealthServer( + map[string]api.Checker{ + "SchemaService": r.connSchemaService, + }, + r.logger, + ) + grpc_health_v1.RegisterHealthServer(grpcServer, healthServer) + + // Serve utilities on a random port + addr, err := r.serveGRPC(ctx, t, grpcServer, ":0") + if err != nil { + return nil, err + } + + r.logger.Info(ctx).Str(log.ServerAddressField, addr.String()).Msg("connector utilities started") + return addr, nil } func (r *Runtime) serveHTTPAPI( ctx context.Context, t *tomb.Tomb, - addr net.Addr, + grpcAddr net.Addr, ) (net.Addr, error) { - conn, err := grpc.NewClient(addr.String(), grpc.WithTransportCredentials(insecure.NewCredentials())) + conn, err := grpc.NewClient(grpcAddr.String(), grpc.WithTransportCredentials(insecure.NewCredentials())) if err != nil { return nil, cerrors.Errorf("failed to dial server: %w", err) } @@ -579,7 +644,7 @@ func (r *Runtime) serveHTTPAPI( r.logger, ) - return r.serveHTTP( + addr, err := r.serveHTTP( ctx, t, &http.Server{ @@ -588,6 +653,12 @@ func (r *Runtime) serveHTTPAPI( ReadHeaderTimeout: 10 * time.Second, }, ) + if err != nil { + return nil, err + } + + r.logger.Info(ctx).Str(log.ServerAddressField, addr.String()).Msg("http API started") + return addr, nil } func preflightHandler(w http.ResponseWriter) { @@ -616,10 +687,11 @@ func (r *Runtime) serveGRPC( ctx context.Context, t *tomb.Tomb, srv *grpc.Server, + address string, ) (net.Addr, error) { - ln, err := net.Listen("tcp", r.Config.API.GRPC.Address) + ln, err := net.Listen("tcp", address) if err != nil { - return nil, cerrors.Errorf("failed to listen on address %q: %w", r.Config.API.GRPC.Address, err) + return nil, cerrors.Errorf("failed to listen on address %q: %w", address, err) } t.Go(func() error { @@ -641,7 +713,6 @@ func (r *Runtime) serveGRPC( } }) - r.logger.Info(ctx).Str(log.ServerAddressField, ln.Addr().String()).Msg("grpc server started") return ln.Addr(), nil } @@ -674,6 +745,5 @@ func (r *Runtime) serveHTTP( return srv.Shutdown(ctx) }) - r.logger.Info(ctx).Str(log.ServerAddressField, ln.Addr().String()).Msg("http server started") return ln.Addr(), nil } diff --git a/pkg/conduit/runtime_test.go b/pkg/conduit/runtime_test.go index 235984fd9..a2ed085fb 100644 --- a/pkg/conduit/runtime_test.go +++ b/pkg/conduit/runtime_test.go @@ -28,15 +28,10 @@ import ( func TestRuntime(t *testing.T) { is := is.New(t) - var cfg conduit.Config - cfg.DB.Type = "badger" + cfg := conduit.DefaultConfig() cfg.DB.Badger.Path = t.TempDir() + "/testing.app.db" cfg.API.GRPC.Address = ":0" cfg.API.HTTP.Address = ":0" - cfg.Log.Level = "info" - cfg.Log.Format = "cli" - cfg.Pipelines.Path = "./pipelines" - r, err := conduit.NewRuntime(cfg) is.NoErr(err) is.True(r != nil) @@ -52,8 +47,10 @@ func TestRuntime(t *testing.T) { go func() { errC <- r.Run(ctx) }() - err, got, recvErr := cchan.ChanOut[error](errC).RecvTimeout(context.Background(), time.Second) + err, got, recvErr := cchan.ChanOut[error](errC).RecvTimeout(context.Background(), 100*time.Second) is.NoErr(recvErr) is.True(got) - is.True(cerrors.Is(err, context.Canceled)) // expected error to be context.Cancelled + if !cerrors.Is(err, context.Canceled) { + t.Logf("expected error '%v', got '%v'", context.Canceled, err) + } } diff --git a/pkg/orchestrator/orchestrator_test.go b/pkg/orchestrator/orchestrator_test.go index a2ef83bc3..b07cf09d9 100644 --- a/pkg/orchestrator/orchestrator_test.go +++ b/pkg/orchestrator/orchestrator_test.go @@ -22,6 +22,7 @@ import ( "testing" "time" + schemaregistry "github.com/conduitio/conduit-schema-registry" "github.com/conduitio/conduit/pkg/connector" "github.com/conduitio/conduit/pkg/foundation/ctxutil" "github.com/conduitio/conduit/pkg/foundation/database/badger" @@ -30,6 +31,7 @@ import ( "github.com/conduitio/conduit/pkg/pipeline" conn_plugin "github.com/conduitio/conduit/pkg/plugin/connector" conn_builtin "github.com/conduitio/conduit/pkg/plugin/connector/builtin" + "github.com/conduitio/conduit/pkg/plugin/connector/connutils" conn_standalone "github.com/conduitio/conduit/pkg/plugin/connector/standalone" proc_plugin "github.com/conduitio/conduit/pkg/plugin/processor" proc_builtin "github.com/conduitio/conduit/pkg/plugin/processor/builtin" @@ -69,9 +71,10 @@ func TestPipelineSimple(t *testing.T) { is.NoErr(err) }) + schemaService := connutils.NewSchemaService(logger, schemaregistry.NewSchemaRegistry()) connPluginService := conn_plugin.NewPluginService( logger, - conn_builtin.NewRegistry(logger, conn_builtin.DefaultDispenserFactories), + conn_builtin.NewRegistry(logger, conn_builtin.DefaultBuiltinConnectors, schemaService), conn_standalone.NewRegistry(logger, ""), ) diff --git a/pkg/plugin/connector/builtin/registry.go b/pkg/plugin/connector/builtin/registry.go index 129ee3c57..13584bcfa 100644 --- a/pkg/plugin/connector/builtin/registry.go +++ b/pkg/plugin/connector/builtin/registry.go @@ -26,24 +26,25 @@ import ( "github.com/conduitio/conduit-connector-protocol/pconnector" s3 "github.com/conduitio/conduit-connector-s3" sdk "github.com/conduitio/conduit-connector-sdk" + "github.com/conduitio/conduit-connector-sdk/schema" "github.com/conduitio/conduit/pkg/foundation/cerrors" "github.com/conduitio/conduit/pkg/foundation/log" "github.com/conduitio/conduit/pkg/plugin" "github.com/conduitio/conduit/pkg/plugin/connector" + "github.com/conduitio/conduit/pkg/plugin/connector/connutils" ) var ( - // DefaultDispenserFactories contains default dispenser factories for - // built-in plugins. The key of the map is the import path of the module + // DefaultBuiltinConnectors contains the default built-in connectors. + // The key of the map is the import path of the module // containing the connector implementation. - DefaultDispenserFactories = map[string]DispenserFactory{ - // TODO update connectors to latest SDK and uncomment - "github.com/conduitio/conduit-connector-file": NewDispenserFactory(file.Connector), - "github.com/conduitio/conduit-connector-generator": NewDispenserFactory(generator.Connector), - "github.com/conduitio/conduit-connector-kafka": NewDispenserFactory(kafka.Connector), - "github.com/conduitio/conduit-connector-log": NewDispenserFactory(connLog.Connector), - "github.com/conduitio/conduit-connector-postgres": NewDispenserFactory(postgres.Connector), - "github.com/conduitio/conduit-connector-s3": NewDispenserFactory(s3.Connector), + DefaultBuiltinConnectors = map[string]sdk.Connector{ + "github.com/conduitio/conduit-connector-file": file.Connector, + "github.com/conduitio/conduit-connector-generator": generator.Connector, + "github.com/conduitio/conduit-connector-kafka": kafka.Connector, + "github.com/conduitio/conduit-connector-log": connLog.Connector, + "github.com/conduitio/conduit-connector-postgres": postgres.Connector, + "github.com/conduitio/conduit-connector-s3": s3.Connector, } ) @@ -58,12 +59,12 @@ type Registry struct { type blueprint struct { fullName plugin.FullName specification pconnector.Specification - dispenserFactory DispenserFactory + dispenserFactory dispenserFactory } -type DispenserFactory func(name plugin.FullName, logger log.CtxLogger) connector.Dispenser +type dispenserFactory func(name plugin.FullName, logger log.CtxLogger) connector.Dispenser -func NewDispenserFactory(conn sdk.Connector) DispenserFactory { +func newDispenserFactory(conn sdk.Connector) dispenserFactory { if conn.NewSource == nil { conn.NewSource = func() sdk.Source { return nil } } @@ -84,7 +85,7 @@ func NewDispenserFactory(conn sdk.Connector) DispenserFactory { } } -func NewRegistry(logger log.CtxLogger, factories map[string]DispenserFactory) *Registry { +func NewRegistry(logger log.CtxLogger, connectors map[string]sdk.Connector, service *connutils.SchemaService) *Registry { logger = logger.WithComponentFromType(Registry{}) buildInfo, ok := debug.ReadBuildInfo() if !ok { @@ -93,17 +94,22 @@ func NewRegistry(logger log.CtxLogger, factories map[string]DispenserFactory) *R buildInfo = &debug.BuildInfo{} // prevent nil pointer exceptions } + // The built-in plugins use Conduit's own schema service + schema.Service = service + r := &Registry{ - plugins: loadPlugins(buildInfo, factories), + plugins: loadPlugins(buildInfo, connectors), logger: logger, } - logger.Info(context.Background()).Int("count", len(r.List())).Msg("builtin plugins initialized") + logger.Info(context.Background()).Int("count", len(r.List())).Msg("builtin connector plugins initialized") return r } -func loadPlugins(buildInfo *debug.BuildInfo, factories map[string]DispenserFactory) map[string]map[string]blueprint { - plugins := make(map[string]map[string]blueprint, len(factories)) - for moduleName, factory := range factories { +func loadPlugins(buildInfo *debug.BuildInfo, connectors map[string]sdk.Connector) map[string]map[string]blueprint { + plugins := make(map[string]map[string]blueprint, len(connectors)) + for moduleName, conn := range connectors { + factory := newDispenserFactory(conn) + specs, err := getSpecification(moduleName, factory, buildInfo) if err != nil { // stop initialization if a built-in plugin is misbehaving @@ -136,7 +142,7 @@ func loadPlugins(buildInfo *debug.BuildInfo, factories map[string]DispenserFacto return plugins } -func getSpecification(moduleName string, factory DispenserFactory, buildInfo *debug.BuildInfo) (pconnector.Specification, error) { +func getSpecification(moduleName string, factory dispenserFactory, buildInfo *debug.BuildInfo) (pconnector.Specification, error) { dispenser := factory("", log.CtxLogger{}) specPlugin, err := dispenser.DispenseSpecifier() if err != nil { diff --git a/pkg/plugin/connector/connutils/schema.go b/pkg/plugin/connector/connutils/schema.go new file mode 100644 index 000000000..ea9142a79 --- /dev/null +++ b/pkg/plugin/connector/connutils/schema.go @@ -0,0 +1,96 @@ +// Copyright © 2024 Meroxa, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package connutils + +import ( + "context" + + "github.com/conduitio/conduit-connector-protocol/pconduit" + conduitschemaregistry "github.com/conduitio/conduit-schema-registry" + "github.com/conduitio/conduit/pkg/foundation/cerrors" + "github.com/conduitio/conduit/pkg/foundation/log" + "github.com/conduitio/conduit/pkg/schemaregistry" + "github.com/conduitio/conduit/pkg/schemaregistry/fromschema" + "github.com/conduitio/conduit/pkg/schemaregistry/toschema" + "github.com/twmb/franz-go/pkg/sr" +) + +type SchemaService struct { + registry schemaregistry.Registry + logger log.CtxLogger +} + +var _ pconduit.SchemaService = (*SchemaService)(nil) + +func NewSchemaService(logger log.CtxLogger, registry schemaregistry.Registry) *SchemaService { + return &SchemaService{ + registry: registry, + logger: logger.WithComponent("connutils.SchemaService"), + } +} + +func (s *SchemaService) Check(ctx context.Context) error { + r, ok := s.registry.(schemaregistry.RegistryWithCheck) + if !ok { + return nil + } + return r.Check(ctx) +} + +func (s *SchemaService) CreateSchema(ctx context.Context, req pconduit.CreateSchemaRequest) (pconduit.CreateSchemaResponse, error) { + ss, err := s.registry.CreateSchema(ctx, req.Subject, sr.Schema{ + Schema: string(req.Bytes), + Type: fromschema.SrSchemaType(req.Type), + }) + if err != nil { + var respErr *sr.ResponseError + if cerrors.As(err, &respErr) { + return pconduit.CreateSchemaResponse{}, unwrapSrError(respErr) // don't wrap response errors + } + return pconduit.CreateSchemaResponse{}, cerrors.Errorf("failed to create schema: %w", err) + } + return pconduit.CreateSchemaResponse{ + Schema: toschema.SrSubjectSchema(ss), + }, nil +} + +func (s *SchemaService) GetSchema(ctx context.Context, req pconduit.GetSchemaRequest) (pconduit.GetSchemaResponse, error) { + ss, err := s.registry.SchemaBySubjectVersion(ctx, req.Subject, req.Version) + if err != nil { + var respErr *sr.ResponseError + if cerrors.As(err, &respErr) { + return pconduit.GetSchemaResponse{}, unwrapSrError(respErr) // don't wrap response errors + } + return pconduit.GetSchemaResponse{}, cerrors.Errorf("failed to get schema by subject and version: %w", err) + } + + return pconduit.GetSchemaResponse{ + Schema: toschema.SrSubjectSchema(ss), + }, nil +} + +func unwrapSrError(e *sr.ResponseError) error { + switch e.ErrorCode { + case conduitschemaregistry.ErrorCodeSubjectNotFound, + conduitschemaregistry.ErrorCodeVersionNotFound, + conduitschemaregistry.ErrorCodeSchemaNotFound: + return pconduit.ErrSchemaNotFound + case conduitschemaregistry.ErrorCodeInvalidSchema: + return pconduit.ErrInvalidSchemaBytes // TODO change to ErrInvalidSchema + default: + // unknown error, don't unwrap + return e + } +} diff --git a/pkg/plugin/processor/builtin/impl/avro/config.go b/pkg/plugin/processor/builtin/impl/avro/config.go index 3d1e5c697..f10ec8713 100644 --- a/pkg/plugin/processor/builtin/impl/avro/config.go +++ b/pkg/plugin/processor/builtin/impl/avro/config.go @@ -21,8 +21,7 @@ import ( "os" "github.com/conduitio/conduit/pkg/foundation/cerrors" - "github.com/conduitio/conduit/pkg/plugin/processor/builtin/impl/avro/schemaregistry" - "github.com/lovromazgon/franz-go/pkg/sr" + "github.com/conduitio/conduit/pkg/plugin/processor/builtin/impl/avro/internal" ) type preRegisteredConfig struct { @@ -49,7 +48,7 @@ type schemaConfig struct { // The subject name under which the inferred schema will be registered in the schema registry. AutoRegisteredSubject string `json:"autoRegister.subject"` - strategy schemaregistry.SchemaStrategy + strategy internal.SchemaStrategy } func (c *schemaConfig) parse() error { @@ -72,7 +71,7 @@ func (c *schemaConfig) parsePreRegistered() error { return cerrors.Errorf("version needs to be positive: %v", c.PreRegistered.Version) } - c.strategy = schemaregistry.DownloadSchemaStrategy{ + c.strategy = internal.DownloadSchemaStrategy{ Subject: c.PreRegistered.Subject, Version: c.PreRegistered.Version, } @@ -84,8 +83,7 @@ func (c *schemaConfig) parseAutoRegister() error { return cerrors.New("subject required for schema strategy 'autoRegister'") } - c.strategy = schemaregistry.ExtractAndUploadSchemaStrategy{ - Type: sr.TypeAvro, + c.strategy = internal.ExtractAndUploadSchemaStrategy{ Subject: c.AutoRegisteredSubject, } return nil diff --git a/pkg/plugin/processor/builtin/impl/avro/decode.go b/pkg/plugin/processor/builtin/impl/avro/decode.go index 038b1bc11..c748890e5 100644 --- a/pkg/plugin/processor/builtin/impl/avro/decode.go +++ b/pkg/plugin/processor/builtin/impl/avro/decode.go @@ -25,8 +25,9 @@ import ( sdk "github.com/conduitio/conduit-processor-sdk" "github.com/conduitio/conduit/pkg/foundation/cerrors" "github.com/conduitio/conduit/pkg/foundation/log" - "github.com/conduitio/conduit/pkg/plugin/processor/builtin/impl/avro/schemaregistry" - "github.com/lovromazgon/franz-go/pkg/sr" + "github.com/conduitio/conduit/pkg/plugin/processor/builtin/impl/avro/internal" + "github.com/conduitio/conduit/pkg/schemaregistry" + "github.com/twmb/franz-go/pkg/sr" ) type decoder interface { @@ -75,8 +76,8 @@ func parseDecodeConfig(ctx context.Context, m map[string]string) (decodeConfig, return cfg, nil } -func (c decodeConfig) ClientOptions() []sr.Opt { - clientOpts := []sr.Opt{sr.URLs(c.URL), sr.Normalize()} +func (c decodeConfig) ClientOptions() []sr.ClientOpt { + clientOpts := []sr.ClientOpt{sr.URLs(c.URL)} if c.Auth.Username != "" && c.Auth.Password != "" { clientOpts = append(clientOpts, sr.BasicAuth(c.Auth.Username, c.Auth.Password)) } @@ -136,12 +137,13 @@ func (p *decodeProcessor) Configure(ctx context.Context, m map[string]string) er return nil } -func (p *decodeProcessor) Open(ctx context.Context) error { +func (p *decodeProcessor) Open(context.Context) error { + // TODO: somehow inject the schemaregistry.Registry that Conduit created. client, err := schemaregistry.NewClient(p.logger, p.cfg.ClientOptions()...) if err != nil { return cerrors.Errorf("could not create schema registry client: %w", err) } - p.decoder = schemaregistry.NewDecoder(client, p.logger, &sr.Serde{}) + p.decoder = internal.NewDecoder(client, p.logger) return nil } diff --git a/pkg/plugin/processor/builtin/impl/avro/decode_examples_test.go b/pkg/plugin/processor/builtin/impl/avro/decode_examples_test.go index 74658a335..34f7a0bcf 100644 --- a/pkg/plugin/processor/builtin/impl/avro/decode_examples_test.go +++ b/pkg/plugin/processor/builtin/impl/avro/decode_examples_test.go @@ -23,14 +23,15 @@ import ( "github.com/conduitio/conduit-commons/opencdc" sdk "github.com/conduitio/conduit-processor-sdk" "github.com/conduitio/conduit/pkg/foundation/log" - "github.com/conduitio/conduit/pkg/plugin/processor/builtin/impl/avro/schemaregistry" "github.com/conduitio/conduit/pkg/plugin/processor/builtin/internal/exampleutil" - "github.com/lovromazgon/franz-go/pkg/sr" + "github.com/conduitio/conduit/pkg/schemaregistry" + "github.com/conduitio/conduit/pkg/schemaregistry/schemaregistrytest" + "github.com/twmb/franz-go/pkg/sr" ) //nolint:govet // a more descriptive example description func ExampleDecodeProcessor() { - url, cleanup := schemaregistry.ExampleSchemaRegistryURL("ExampleDecodeProcessor", 54322) + url, cleanup := schemaregistrytest.ExampleSchemaRegistryURL("ExampleDecodeProcessor", 54322) defer cleanup() client, err := schemaregistry.NewClient(log.Nop(), sr.URLs(url)) diff --git a/pkg/plugin/processor/builtin/impl/avro/decode_paramgen.go b/pkg/plugin/processor/builtin/impl/avro/decode_paramgen.go index abe265bbd..ac8338521 100644 --- a/pkg/plugin/processor/builtin/impl/avro/decode_paramgen.go +++ b/pkg/plugin/processor/builtin/impl/avro/decode_paramgen.go @@ -7,45 +7,55 @@ import ( "github.com/conduitio/conduit-commons/config" ) +const ( + decodeConfigAuthBasicPassword = "auth.basic.password" + decodeConfigAuthBasicUsername = "auth.basic.username" + decodeConfigField = "field" + decodeConfigTlsCaCert = "tls.ca.cert" + decodeConfigTlsClientCert = "tls.client.cert" + decodeConfigTlsClientKey = "tls.client.key" + decodeConfigUrl = "url" +) + func (decodeConfig) Parameters() map[string]config.Parameter { return map[string]config.Parameter{ - "auth.basic.password": { + decodeConfigAuthBasicPassword: { Default: "", Description: "The password to use with basic authentication. This option is required if\nauth.basic.username contains a value. If both auth.basic.username and auth.basic.password\nare empty basic authentication is disabled.", Type: config.ParameterTypeString, Validations: []config.Validation{}, }, - "auth.basic.username": { + decodeConfigAuthBasicUsername: { Default: "", Description: "The username to use with basic authentication. This option is required if\nauth.basic.password contains a value. If both auth.basic.username and auth.basic.password\nare empty basic authentication is disabled.", Type: config.ParameterTypeString, Validations: []config.Validation{}, }, - "field": { + decodeConfigField: { Default: ".Payload.After", Description: "The field that will be decoded.\n\nFor more information about the format, see [Referencing fields](https://conduit.io/docs/processors/referencing-fields).", Type: config.ParameterTypeString, Validations: []config.Validation{}, }, - "tls.ca.cert": { + decodeConfigTlsCaCert: { Default: "", Description: "The path to a file containing PEM encoded CA certificates. If this option is empty,\nConduit falls back to using the host's root CA set.", Type: config.ParameterTypeString, Validations: []config.Validation{}, }, - "tls.client.cert": { + decodeConfigTlsClientCert: { Default: "", Description: "The path to a file containing a PEM encoded certificate. This option is required\nif tls.client.key contains a value. If both tls.client.cert and tls.client.key are empty\nTLS is disabled.", Type: config.ParameterTypeString, Validations: []config.Validation{}, }, - "tls.client.key": { + decodeConfigTlsClientKey: { Default: "", Description: "The path to a file containing a PEM encoded private key. This option is required\nif tls.client.cert contains a value. If both tls.client.cert and tls.client.key are empty\nTLS is disabled.", Type: config.ParameterTypeString, Validations: []config.Validation{}, }, - "url": { + decodeConfigUrl: { Default: "", Description: "URL of the schema registry (e.g. http://localhost:8085)", Type: config.ParameterTypeString, diff --git a/pkg/plugin/processor/builtin/impl/avro/encode.go b/pkg/plugin/processor/builtin/impl/avro/encode.go index 28a52d696..1639459ac 100644 --- a/pkg/plugin/processor/builtin/impl/avro/encode.go +++ b/pkg/plugin/processor/builtin/impl/avro/encode.go @@ -25,9 +25,10 @@ import ( sdk "github.com/conduitio/conduit-processor-sdk" "github.com/conduitio/conduit/pkg/foundation/cerrors" "github.com/conduitio/conduit/pkg/foundation/log" - "github.com/conduitio/conduit/pkg/plugin/processor/builtin/impl/avro/schemaregistry" + "github.com/conduitio/conduit/pkg/plugin/processor/builtin/impl/avro/internal" + "github.com/conduitio/conduit/pkg/schemaregistry" "github.com/goccy/go-json" - "github.com/lovromazgon/franz-go/pkg/sr" + "github.com/twmb/franz-go/pkg/sr" ) type encoder interface { @@ -50,8 +51,8 @@ type encodeConfig struct { fieldResolver sdk.ReferenceResolver } -func (c encodeConfig) ClientOptions() []sr.Opt { - clientOpts := []sr.Opt{sr.URLs(c.URL), sr.Normalize()} +func (c encodeConfig) ClientOptions() []sr.ClientOpt { + clientOpts := []sr.ClientOpt{sr.URLs(c.URL)} if c.Auth.Username != "" && c.Auth.Password != "" { clientOpts = append(clientOpts, sr.BasicAuth(c.Auth.Username, c.Auth.Password)) } @@ -157,11 +158,12 @@ func (p *encodeProcessor) Configure(ctx context.Context, m map[string]string) er } func (p *encodeProcessor) Open(context.Context) error { + // TODO: somehow inject the schemaregistry.Registry that Conduit created. client, err := schemaregistry.NewClient(p.logger, p.cfg.ClientOptions()...) if err != nil { return cerrors.Errorf("could not create schema registry client: %w", err) } - p.encoder = schemaregistry.NewEncoder(client, p.logger, &sr.Serde{}, p.cfg.Schema.strategy) + p.encoder = internal.NewEncoder(client, p.logger, p.cfg.Schema.strategy) return nil } diff --git a/pkg/plugin/processor/builtin/impl/avro/encode_examples_test.go b/pkg/plugin/processor/builtin/impl/avro/encode_examples_test.go index 1dabaaf8e..ddfb87c2a 100644 --- a/pkg/plugin/processor/builtin/impl/avro/encode_examples_test.go +++ b/pkg/plugin/processor/builtin/impl/avro/encode_examples_test.go @@ -23,14 +23,15 @@ import ( "github.com/conduitio/conduit-commons/opencdc" sdk "github.com/conduitio/conduit-processor-sdk" "github.com/conduitio/conduit/pkg/foundation/log" - "github.com/conduitio/conduit/pkg/plugin/processor/builtin/impl/avro/schemaregistry" "github.com/conduitio/conduit/pkg/plugin/processor/builtin/internal/exampleutil" - "github.com/lovromazgon/franz-go/pkg/sr" + "github.com/conduitio/conduit/pkg/schemaregistry" + "github.com/conduitio/conduit/pkg/schemaregistry/schemaregistrytest" + "github.com/twmb/franz-go/pkg/sr" ) //nolint:govet // a more descriptive example description func ExampleEncodeProcessor_autoRegister() { - url, cleanup := schemaregistry.ExampleSchemaRegistryURL("ExampleEncodeProcessor_autoRegister", 54322) + url, cleanup := schemaregistrytest.ExampleSchemaRegistryURL("ExampleEncodeProcessor_autoRegister", 54322) defer cleanup() p := NewEncodeProcessor(log.Nop()) @@ -109,7 +110,7 @@ and registered on the fly under the subject ` + "`example-autoRegister`" + `.`, //nolint:govet // a more descriptive example description func ExampleEncodeProcessor_preRegistered() { - url, cleanup := schemaregistry.ExampleSchemaRegistryURL("ExampleEncodeProcessor_preRegistered", 54322) + url, cleanup := schemaregistrytest.ExampleSchemaRegistryURL("ExampleEncodeProcessor_preRegistered", 54322) defer cleanup() client, err := schemaregistry.NewClient(log.Nop(), sr.URLs(url)) diff --git a/pkg/plugin/processor/builtin/impl/avro/encode_paramgen.go b/pkg/plugin/processor/builtin/impl/avro/encode_paramgen.go index 29f1fd73d..a15e47d19 100644 --- a/pkg/plugin/processor/builtin/impl/avro/encode_paramgen.go +++ b/pkg/plugin/processor/builtin/impl/avro/encode_paramgen.go @@ -7,39 +7,53 @@ import ( "github.com/conduitio/conduit-commons/config" ) +const ( + encodeConfigAuthBasicPassword = "auth.basic.password" + encodeConfigAuthBasicUsername = "auth.basic.username" + encodeConfigField = "field" + encodeConfigSchemaAutoRegisterSubject = "schema.autoRegister.subject" + encodeConfigSchemaPreRegisteredSubject = "schema.preRegistered.subject" + encodeConfigSchemaPreRegisteredVersion = "schema.preRegistered.version" + encodeConfigSchemaStrategy = "schema.strategy" + encodeConfigTlsCaCert = "tls.ca.cert" + encodeConfigTlsClientCert = "tls.client.cert" + encodeConfigTlsClientKey = "tls.client.key" + encodeConfigUrl = "url" +) + func (encodeConfig) Parameters() map[string]config.Parameter { return map[string]config.Parameter{ - "auth.basic.password": { + encodeConfigAuthBasicPassword: { Default: "", Description: "The password to use with basic authentication. This option is required if\nauth.basic.username contains a value. If both auth.basic.username and auth.basic.password\nare empty basic authentication is disabled.", Type: config.ParameterTypeString, Validations: []config.Validation{}, }, - "auth.basic.username": { + encodeConfigAuthBasicUsername: { Default: "", Description: "The username to use with basic authentication. This option is required if\nauth.basic.password contains a value. If both auth.basic.username and auth.basic.password\nare empty basic authentication is disabled.", Type: config.ParameterTypeString, Validations: []config.Validation{}, }, - "field": { + encodeConfigField: { Default: ".Payload.After", Description: "The field that will be encoded.\n\nFor more information about the format, see [Referencing fields](https://conduit.io/docs/processors/referencing-fields).", Type: config.ParameterTypeString, Validations: []config.Validation{}, }, - "schema.autoRegister.subject": { + encodeConfigSchemaAutoRegisterSubject: { Default: "", Description: "The subject name under which the inferred schema will be registered in the schema registry.", Type: config.ParameterTypeString, Validations: []config.Validation{}, }, - "schema.preRegistered.subject": { + encodeConfigSchemaPreRegisteredSubject: { Default: "", Description: "The subject of the schema in the schema registry used to encode the record.", Type: config.ParameterTypeString, Validations: []config.Validation{}, }, - "schema.preRegistered.version": { + encodeConfigSchemaPreRegisteredVersion: { Default: "", Description: "The version of the schema in the schema registry used to encode the record.", Type: config.ParameterTypeInt, @@ -47,7 +61,7 @@ func (encodeConfig) Parameters() map[string]config.Parameter { config.ValidationGreaterThan{V: 0}, }, }, - "schema.strategy": { + encodeConfigSchemaStrategy: { Default: "", Description: "Strategy to use to determine the schema for the record.\nAvailable strategies are:\n* `preRegistered` (recommended) - Download an existing schema from the schema registry.\n This strategy is further configured with options starting with `schema.preRegistered.*`.\n* `autoRegister` (for development purposes) - Infer the schema from the record and register it\n in the schema registry. This strategy is further configured with options starting with\n `schema.autoRegister.*`.\n\nFor more information about the behavior of each strategy read the main processor description.", Type: config.ParameterTypeString, @@ -56,25 +70,25 @@ func (encodeConfig) Parameters() map[string]config.Parameter { config.ValidationInclusion{List: []string{"preRegistered", "autoRegister"}}, }, }, - "tls.ca.cert": { + encodeConfigTlsCaCert: { Default: "", Description: "The path to a file containing PEM encoded CA certificates. If this option is empty,\nConduit falls back to using the host's root CA set.", Type: config.ParameterTypeString, Validations: []config.Validation{}, }, - "tls.client.cert": { + encodeConfigTlsClientCert: { Default: "", Description: "The path to a file containing a PEM encoded certificate. This option is required\nif tls.client.key contains a value. If both tls.client.cert and tls.client.key are empty\nTLS is disabled.", Type: config.ParameterTypeString, Validations: []config.Validation{}, }, - "tls.client.key": { + encodeConfigTlsClientKey: { Default: "", Description: "The path to a file containing a PEM encoded private key. This option is required\nif tls.client.cert contains a value. If both tls.client.cert and tls.client.key are empty\nTLS is disabled.", Type: config.ParameterTypeString, Validations: []config.Validation{}, }, - "url": { + encodeConfigUrl: { Default: "", Description: "URL of the schema registry (e.g. http://localhost:8085)", Type: config.ParameterTypeString, diff --git a/pkg/plugin/processor/builtin/impl/avro/internal/decoder.go b/pkg/plugin/processor/builtin/impl/avro/internal/decoder.go new file mode 100644 index 000000000..01fd2a278 --- /dev/null +++ b/pkg/plugin/processor/builtin/impl/avro/internal/decoder.go @@ -0,0 +1,59 @@ +// Copyright © 2024 Meroxa, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package internal + +import ( + "context" + + "github.com/conduitio/conduit-commons/opencdc" + "github.com/conduitio/conduit/pkg/foundation/cerrors" + "github.com/conduitio/conduit/pkg/foundation/log" + "github.com/conduitio/conduit/pkg/schemaregistry" + "github.com/conduitio/conduit/pkg/schemaregistry/toschema" + "github.com/twmb/franz-go/pkg/sr" +) + +type Decoder struct { + registry schemaregistry.Registry + logger log.CtxLogger +} + +func NewDecoder(registry schemaregistry.Registry, logger log.CtxLogger) *Decoder { + return &Decoder{ + registry: registry, + logger: logger.WithComponent("avro.internal.Decoder"), + } +} + +func (d *Decoder) Decode(ctx context.Context, b opencdc.RawData) (opencdc.StructuredData, error) { + id, data, err := (&sr.ConfluentHeader{}).DecodeID(b.Bytes()) + if err != nil { + return nil, cerrors.Errorf("failed to decode header: %w", err) + } + + s, err := d.registry.SchemaByID(ctx, id) + if err != nil { + return nil, cerrors.Errorf("failed to get schema: %w", err) + } + + sch := toschema.SrSchema(s) + var out opencdc.StructuredData + err = sch.Unmarshal(data, &out) + if err != nil { + return nil, cerrors.Errorf("failed to unmarshal data with schema (ID: %v): %w", id, err) + } + + return out, nil +} diff --git a/pkg/plugin/processor/builtin/impl/avro/internal/encoder.go b/pkg/plugin/processor/builtin/impl/avro/internal/encoder.go new file mode 100644 index 000000000..26922f99c --- /dev/null +++ b/pkg/plugin/processor/builtin/impl/avro/internal/encoder.go @@ -0,0 +1,103 @@ +// Copyright © 2024 Meroxa, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package internal + +import ( + "context" + + "github.com/conduitio/conduit-commons/opencdc" + "github.com/conduitio/conduit-commons/schema" + "github.com/conduitio/conduit/pkg/foundation/cerrors" + "github.com/conduitio/conduit/pkg/foundation/log" + "github.com/conduitio/conduit/pkg/schemaregistry" + "github.com/conduitio/conduit/pkg/schemaregistry/toschema" + "github.com/twmb/franz-go/pkg/sr" +) + +type Encoder struct { + registry schemaregistry.Registry + logger log.CtxLogger + + SchemaStrategy +} + +type SchemaStrategy interface { + GetSchema(context.Context, schemaregistry.Registry, log.CtxLogger, opencdc.StructuredData) (schema.Schema, error) +} + +func NewEncoder(registry schemaregistry.Registry, logger log.CtxLogger, strategy SchemaStrategy) *Encoder { + return &Encoder{ + registry: registry, + logger: logger.WithComponent("avro.internal.Encoder"), + SchemaStrategy: strategy, + } +} + +func (e *Encoder) Encode(ctx context.Context, sd opencdc.StructuredData) (opencdc.RawData, error) { + s, err := e.GetSchema(ctx, e.registry, e.logger, sd) + if err != nil { + return opencdc.RawData{}, cerrors.Errorf("failed to get schema: %w", err) + } + + // TODO note that we need to pass in the index when adding support for protobuf + header, err := (&sr.ConfluentHeader{}).AppendEncode(nil, s.ID, nil) + if err != nil { + return nil, cerrors.Errorf("failed to encode header: %w", err) + } + + data, err := s.Marshal(sd) + if err != nil { + return nil, cerrors.Errorf("failed to marshal data with schema (ID: %v, subject: %v, version: %v): %w", s.ID, s.Subject, s.Version, err) + } + + return append(header, data...), nil +} + +type ExtractAndUploadSchemaStrategy struct { + Subject string +} + +func (str ExtractAndUploadSchemaStrategy) GetSchema(ctx context.Context, registry schemaregistry.Registry, _ log.CtxLogger, sd opencdc.StructuredData) (schema.Schema, error) { + s, err := schema.KnownSerdeFactories[schema.TypeAvro].SerdeForType(sd) + if err != nil { + return schema.Schema{}, cerrors.Errorf("could not extract avro schema: %w", err) + } + + ss, err := registry.CreateSchema(ctx, str.Subject, sr.Schema{ + Schema: s.String(), + Type: sr.TypeAvro, + }) + if err != nil { + return schema.Schema{}, cerrors.Errorf("could not create schema: %w", err) + } + + return toschema.SrSubjectSchema(ss), nil +} + +type DownloadSchemaStrategy struct { + Subject string + // TODO add support for specifying "latest" - https://github.com/ConduitIO/conduit/issues/1095 + Version int +} + +func (str DownloadSchemaStrategy) GetSchema(ctx context.Context, registry schemaregistry.Registry, _ log.CtxLogger, _ opencdc.StructuredData) (schema.Schema, error) { + // get schema from registry + ss, err := registry.SchemaBySubjectVersion(ctx, str.Subject, str.Version) + if err != nil { + return schema.Schema{}, cerrors.Errorf("could not get schema with subject %q and version %q: %w", str.Subject, str.Version, err) + } + + return toschema.SrSubjectSchema(ss), nil +} diff --git a/pkg/plugin/processor/builtin/impl/avro/schemaregistry/encoder_test.go b/pkg/plugin/processor/builtin/impl/avro/internal/encoder_test.go similarity index 72% rename from pkg/plugin/processor/builtin/impl/avro/schemaregistry/encoder_test.go rename to pkg/plugin/processor/builtin/impl/avro/internal/encoder_test.go index a0cd4dd0a..ef5515486 100644 --- a/pkg/plugin/processor/builtin/impl/avro/schemaregistry/encoder_test.go +++ b/pkg/plugin/processor/builtin/impl/avro/internal/encoder_test.go @@ -1,4 +1,4 @@ -// Copyright © 2023 Meroxa, Inc. +// Copyright © 2024 Meroxa, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package schemaregistry +package internal import ( "context" @@ -20,8 +20,10 @@ import ( "github.com/conduitio/conduit-commons/opencdc" "github.com/conduitio/conduit/pkg/foundation/log" - "github.com/lovromazgon/franz-go/pkg/sr" + "github.com/conduitio/conduit/pkg/schemaregistry" + "github.com/conduitio/conduit/pkg/schemaregistry/schemaregistrytest" "github.com/matryer/is" + "github.com/twmb/franz-go/pkg/sr" ) func TestEncodeDecode_ExtractAndUploadSchemaStrategy(t *testing.T) { @@ -29,8 +31,7 @@ func TestEncodeDecode_ExtractAndUploadSchemaStrategy(t *testing.T) { ctx := context.Background() logger := log.Nop() - var serde sr.Serde - client, err := NewClient(logger, sr.URLs(TestSchemaRegistryURL(t))) + client, err := schemaregistry.NewClient(logger, sr.URLs(schemaregistrytest.TestSchemaRegistryURL(t))) is.NoErr(err) have := opencdc.StructuredData{ @@ -62,24 +63,18 @@ func TestEncodeDecode_ExtractAndUploadSchemaStrategy(t *testing.T) { "mySlice": []any{1, 2, 3}, // slice without type } - for schemaType := range DefaultSchemaFactories { - t.Run(schemaType.String(), func(t *testing.T) { - is := is.New(t) - enc := NewEncoder(client, logger, &serde, ExtractAndUploadSchemaStrategy{ - Type: schemaType, - Subject: "test1" + schemaType.String(), - }) - dec := NewDecoder(client, logger, &serde) + enc := NewEncoder(client, logger, ExtractAndUploadSchemaStrategy{ + Subject: "test1", + }) + dec := NewDecoder(client, logger) - bytes, err := enc.Encode(ctx, have) - is.NoErr(err) + bytes, err := enc.Encode(ctx, have) + is.NoErr(err) - got, err := dec.Decode(ctx, bytes) - is.NoErr(err) + got, err := dec.Decode(ctx, bytes) + is.NoErr(err) - is.Equal(want, got) - }) - } + is.Equal(want, got) } func TestEncodeDecode_DownloadStrategy_Avro(t *testing.T) { @@ -87,8 +82,7 @@ func TestEncodeDecode_DownloadStrategy_Avro(t *testing.T) { ctx := context.Background() logger := log.Nop() - var serde sr.Serde - client, err := NewClient(logger, sr.URLs(TestSchemaRegistryURL(t))) + client, err := schemaregistry.NewClient(logger, sr.URLs(schemaregistrytest.TestSchemaRegistryURL(t))) is.NoErr(err) have := opencdc.StructuredData{ @@ -113,11 +107,11 @@ func TestEncodeDecode_DownloadStrategy_Avro(t *testing.T) { }) is.NoErr(err) - enc := NewEncoder(client, logger, &serde, DownloadSchemaStrategy{ + enc := NewEncoder(client, logger, DownloadSchemaStrategy{ Subject: ss.Subject, Version: ss.Version, }) - dec := NewDecoder(client, logger, &serde) + dec := NewDecoder(client, logger) bytes, err := enc.Encode(ctx, have) is.NoErr(err) diff --git a/pkg/plugin/processor/builtin/impl/avro/schemaregistry/avro/extractor.go b/pkg/plugin/processor/builtin/impl/avro/schemaregistry/avro/extractor.go deleted file mode 100644 index 80614efc3..000000000 --- a/pkg/plugin/processor/builtin/impl/avro/schemaregistry/avro/extractor.go +++ /dev/null @@ -1,388 +0,0 @@ -// Copyright © 2023 Meroxa, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package avro - -import ( - "fmt" - "reflect" - "strings" - "time" - - "github.com/conduitio/conduit-commons/opencdc" - "github.com/conduitio/conduit/pkg/foundation/cerrors" - "github.com/hamba/avro/v2" -) - -var ( - structuredDataType = reflect.TypeFor[opencdc.StructuredData]() - byteType = reflect.TypeFor[byte]() - timeType = reflect.TypeFor[time.Time]() -) - -// extractor exposes a way to extract an Avro schema from a Go value. -type extractor struct{} - -// Extract uses reflection to traverse the value and type of v and extract an -// Avro schema from it. There are some limitations that will cause this function -// to return an error, here are all known cases: -// - A fixed array of a type other than byte (e.g. [4]int). -// - A map with a key type other than string (e.g. map[int]any). -// - We only support built-in Avro types, which means that the following Go -// types are NOT supported: -// uint, uint64, complex64, complex128, chan, func, uintptr -// -// The function does its best to infer the schema, but it's working with limited -// information and has to make some assumptions: -// - If a map does not specify the type of its values (e.g. map[string]any), -// Extract will traverse all values in the map, extract their types and -// combine them in a union type. If the map is empty, the extracted value -// type will default to a nullable string (union type of string and null). -// - If a slice does not specify the type of its values (e.g. []any), Extract -// will traverse all values in the slice, extract their types and combine -// them in a union type. If the slice is empty, the extracted value type -// will default to a nullable string (union type of string and null). -// - If Extract encounters a value with the type of opencdc.StructuredData it -// will treat it as a record and extract a record schema, where each key in -// the structured data is extracted into its own record field. -func (e extractor) Extract(v any) (avro.Schema, error) { - return e.extract([]string{"record"}, reflect.ValueOf(v), reflect.TypeOf(v)) -} - -func (e extractor) extract(path []string, v reflect.Value, t reflect.Type) (avro.Schema, error) { - if t == nil { - return nil, cerrors.Errorf("%s: can't get schema for untyped nil", strings.Join(path, ".")) // untyped nil - } - switch t.Kind() { - case reflect.Bool: - return avro.NewPrimitiveSchema(avro.Boolean, nil), nil - case reflect.Int64, reflect.Uint32: - return avro.NewPrimitiveSchema(avro.Long, nil), nil - case reflect.Int, reflect.Int32, reflect.Int16, reflect.Uint16, reflect.Int8, reflect.Uint8: - return avro.NewPrimitiveSchema(avro.Int, nil), nil - case reflect.Float32: - return avro.NewPrimitiveSchema(avro.Float, nil), nil - case reflect.Float64: - return avro.NewPrimitiveSchema(avro.Double, nil), nil - case reflect.String: - return avro.NewPrimitiveSchema(avro.String, nil), nil - case reflect.Pointer: - return e.extractPointer(path, v, t) - case reflect.Interface: - return e.extractInterface(path, v, t) - case reflect.Array: - if t.Elem() != byteType { - return nil, cerrors.Errorf("%s: arrays with value type %v not supported, avro only supports bytes as values", strings.Join(path, "."), t.Elem().String()) - } - return avro.NewFixedSchema(strings.Join(path, "."), "", t.Len(), nil) - case reflect.Slice: - return e.extractSlice(path, v, t) - case reflect.Map: - return e.extractMap(path, v, t) - case reflect.Struct: - if t == timeType { - return avro.NewPrimitiveSchema( - avro.Long, - avro.NewPrimitiveLogicalSchema(avro.TimestampMicros), - ), nil - } - return e.extractStruct(path, v, t) - } - // Invalid, Uintptr, UnsafePointer, Uint64, Uint, Complex64, Complex128, Chan, Func - return nil, cerrors.Errorf("%s: unsupported type: %v", strings.Join(path, "."), t) -} - -// extractPointer extracts the schema behind the pointer and makes it nullable -// (if it's not already nullable). -func (e extractor) extractPointer(path []string, v reflect.Value, t reflect.Type) (avro.Schema, error) { - var vElem reflect.Value - if v.IsValid() { - vElem = v.Elem() - } - s, err := e.extract(path, vElem, t.Elem()) - if err != nil { - return nil, err - } - - var schemas avro.Schemas - if us, ok := s.(*avro.UnionSchema); ok && us.Nullable() { - // it's already a nullable schema - return s, nil - } else if ok { - // take types from union schema - schemas = us.Types() - } else if s.Type() != avro.Null { - // non-nil type - schemas = avro.Schemas{s} - } - - s, err = avro.NewUnionSchema(append(schemas, &avro.NullSchema{})) - if err != nil { - return nil, err - } - - return s, nil -} - -// extractInterface ignores the type, since an interface doesn't say anything -// about the concrete type behind it. Instead, it looks at the value behind the -// interface and tries to extract the schema based on its actual type. -// If the value is nil we have no way of knowing the actual type, but since we -// need to be able to encode untyped nil values, we default to a nullable string. -func (e extractor) extractInterface(path []string, v reflect.Value, _ reflect.Type) (avro.Schema, error) { - if !v.IsValid() || v.IsNil() { - // unknown type, fall back to nullable string - return avro.NewUnionSchema([]avro.Schema{ - avro.NewPrimitiveSchema(avro.String, nil), - &avro.NullSchema{}, - }) - } - return e.extract(path, v.Elem(), v.Elem().Type()) -} - -// extractSlice tries to extract the schema based on the slice value type. If -// that type is an interface it falls back to looping through all values, -// extracting their types and combining them into a nullable union schema. -func (e extractor) extractSlice(path []string, v reflect.Value, t reflect.Type) (avro.Schema, error) { - if t.Elem().Kind() == reflect.Uint8 { - return avro.NewPrimitiveSchema(avro.Bytes, nil), nil - } - - // try getting value type based on the slice type - if t.Elem().Kind() != reflect.Interface { - vs, err := e.extract(append(path, "item"), reflect.Value{}, t.Elem()) - if err != nil { - return nil, err - } - return avro.NewArraySchema(vs), nil - } - - // this is []any, loop through all values and extracting their types - // into a union schema, null is included by default - types := []avro.Schema{&avro.NullSchema{}} - for i := 0; i < v.Len(); i++ { - itemSchema, err := e.extract( - append(path, fmt.Sprintf("item%d", i)), - v.Index(i), t.Elem(), - ) - if err != nil { - return nil, err - } - types = append(types, itemSchema) - } - // we could have duplicate schemas, deduplicate them - types, err := e.deduplicate(types) - if err != nil { - return nil, err - } - - if v.Len() == 0 { - // it's an empty slice, add string to types to have a valid schema - types = append(types, avro.NewPrimitiveSchema(avro.String, nil)) - } - - itemsSchema, err := avro.NewUnionSchema(types) - if err != nil { - return nil, cerrors.Errorf("%s: %w", strings.Join(path, "."), err) - } - return avro.NewArraySchema(itemsSchema), nil -} - -// extractMap tries to extract the schema based on the map value type. If that -// type is an interface it falls back to looping through all values, extracting -// their types and combining them into a nullable union schema. -// If the key of the map is not a string, this function returns an error. If the -// type of the map is opencdc.StructuredData it will treat it as a record and -// extract a record schema, where each key in the structured data is extracted -// into its own record field. -func (e extractor) extractMap(path []string, v reflect.Value, t reflect.Type) (avro.Schema, error) { - if t == structuredDataType { - // special case - we treat StructuredData like a struct - var fields []*avro.Field - valType := t.Elem() - for _, keyValue := range v.MapKeys() { - fs, err := e.extract(append(path, keyValue.String()), v.MapIndex(keyValue), valType) - if err != nil { - return nil, err - } - field, err := avro.NewField(keyValue.String(), fs) - if err != nil { - return nil, err - } - fields = append(fields, field) - } - rs, err := avro.NewRecordSchema(strings.Join(path, "."), "", fields) - if err != nil { - return nil, err - } - return rs, nil - } - if t.Key().Kind() != reflect.String { - return nil, cerrors.Errorf("%s: maps with key type %v not supported, avro only supports strings as keys", strings.Join(path, "."), t.Key().Kind()) - } - // try getting value type based on the map type - if t.Elem().Kind() != reflect.Interface { - vs, err := e.extract(append(path, "value"), reflect.Value{}, t.Elem()) - if err != nil { - return nil, err - } - return avro.NewMapSchema(vs), nil - } - - // this is map[string]any, loop through all values and extracting their - // types into a union schema, null is included by default - types := []avro.Schema{&avro.NullSchema{}} - for _, kv := range v.MapKeys() { - valValue := v.MapIndex(kv) - vs, err := e.extract(append(path, "value"), valValue, t.Elem()) - if err != nil { - return nil, err - } - types = append(types, vs) - } - // we could have duplicate schemas, deduplicate them - types, err := e.deduplicate(types) - if err != nil { - return nil, err - } - - if len(v.MapKeys()) == 0 { - // it's an empty map, add string to types to have a valid schema - types = append(types, avro.NewPrimitiveSchema(avro.String, nil)) - } - vs, err := avro.NewUnionSchema(types) - if err != nil { - return nil, cerrors.Errorf("%s: %w", strings.Join(path, "."), err) - } - return avro.NewMapSchema(vs), nil -} - -// extractStruct traverses the struct fields, extracts the schema for each field -// and combines them into a record schema. If the field contains a json tag, -// that tag is used for the extracted name of the field, otherwise it is the -// name of the Go struct field. If the json tag of a field contains "-" (i.e. -// ignored field), then the field is skipped. -func (e extractor) extractStruct(path []string, v reflect.Value, t reflect.Type) (avro.Schema, error) { - var fields []*avro.Field - for i := 0; i < t.NumField(); i++ { - sf := t.Field(i) - name, ok := e.getStructFieldJSONName(sf) - if !ok { - continue // skip this field - } - var vfi reflect.Value - if v.IsValid() { - vfi = v.Field(i) - } - fs, err := e.extract(append(path, name), vfi, t.Field(i).Type) - if err != nil { - return nil, err - } - - field, err := avro.NewField(name, fs) - if err != nil { - return nil, err - } - fields = append(fields, field) - } - rs, err := avro.NewRecordSchema(strings.Join(path, "."), "", fields) - if err != nil { - return nil, err - } - return rs, nil -} - -func (e extractor) deduplicate(schemas []avro.Schema) ([]avro.Schema, error) { - out := make([]avro.Schema, 0, len(schemas)) - typesSet := make(map[[32]byte]struct{}) - - var appendSchema func(schema avro.Schema) error - appendSchema = func(schema avro.Schema) error { - if _, ok := typesSet[schema.Fingerprint()]; ok { - return nil - } - if us, ok := schema.(*avro.UnionSchema); ok { - for _, st := range us.Types() { - if err := appendSchema(st); err != nil { - return err - } - } - return nil - } - for _, s := range out { - if s.Type() == schema.Type() { - switch s := s.(type) { - case *avro.ArraySchema: - // we are combining two array schemas with different item - // schemas, combine them and create a new array schema - schema := schema.(*avro.ArraySchema) - itemsSchema, err := e.deduplicate([]avro.Schema{s.Items(), schema.Items()}) - if err != nil { - return err - } - if len(itemsSchema) == 1 { - *s = *avro.NewArraySchema(itemsSchema[0]) - } else { - itemsUnionSchema, err := avro.NewUnionSchema(itemsSchema) - if err != nil { - return err - } - *s = *avro.NewArraySchema(itemsUnionSchema) - } - case *avro.MapSchema: - schema := schema.(*avro.MapSchema) - valuesSchema, err := e.deduplicate([]avro.Schema{s.Values(), schema.Values()}) - if err != nil { - return err - } - if len(valuesSchema) == 1 { - *s = *avro.NewMapSchema(valuesSchema[0]) - } else { - valuesUnionSchema, err := avro.NewUnionSchema(valuesSchema) - if err != nil { - return err - } - *s = *avro.NewMapSchema(valuesUnionSchema) - } - default: - return cerrors.Errorf("can't combine schemas of type %T", s) - } - return nil - } - } - - // schema does not exist yet - out = append(out, schema) - typesSet[schema.Fingerprint()] = struct{}{} - return nil - } - - for _, schema := range schemas { - if err := appendSchema(schema); err != nil { - return nil, err - } - } - return out, nil -} - -func (extractor) getStructFieldJSONName(sf reflect.StructField) (string, bool) { - jsonTag := strings.Split(sf.Tag.Get("json"), ",")[0] // ignore tag options (omitempty) - if jsonTag == "-" { - return "", false - } - if jsonTag != "" { - return jsonTag, true - } - return sf.Name, true -} diff --git a/pkg/plugin/processor/builtin/impl/avro/schemaregistry/avro/schema.go b/pkg/plugin/processor/builtin/impl/avro/schemaregistry/avro/schema.go deleted file mode 100644 index d51a3d3b9..000000000 --- a/pkg/plugin/processor/builtin/impl/avro/schemaregistry/avro/schema.go +++ /dev/null @@ -1,100 +0,0 @@ -// Copyright © 2023 Meroxa, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package avro - -import ( - "github.com/conduitio/conduit/pkg/foundation/cerrors" - "github.com/hamba/avro/v2" - "github.com/lovromazgon/franz-go/pkg/sr" -) - -const Type = sr.TypeAvro - -// Schema represents an Avro schema. It exposes methods for marshaling and -// unmarshaling data. -type Schema struct { - schema avro.Schema - unionResolver UnionResolver -} - -// Marshal returns the Avro encoding of v. Note that this function may mutate v. -// Limitations: -// - Map keys need to be of type string -// - Array values need to be of type uint8 (byte) -func (s *Schema) Marshal(v any) ([]byte, error) { - err := s.unionResolver.BeforeMarshal(v) - if err != nil { - return nil, err - } - bytes, err := avro.Marshal(s.schema, v) - if err != nil { - return nil, err - } - return bytes, nil -} - -// Unmarshal parses the Avro encoded data and stores the result in the value -// pointed to by v. If v is nil or not a pointer, Unmarshal returns an error. -// Note that arrays and maps are unmarshaled into slices and maps with untyped -// values (i.e. []any and map[string]any). This is a limitation of the Avro -// library used for encoding/decoding the payload. -func (s *Schema) Unmarshal(b []byte, v any) error { - err := avro.Unmarshal(s.schema, b, v) - if err != nil { - return err - } - err = s.unionResolver.AfterUnmarshal(v) - if err != nil { - return err - } - return nil -} - -// String returns the canonical form of the schema. -func (s *Schema) String() string { - return s.schema.String() -} - -// Sort fields in the schema. It can be used in tests to ensure the schemas can -// be compared. -func (s *Schema) Sort() { - traverseSchema(s.schema, sortFn) -} - -// Parse parses a schema string. -func Parse(text string) (*Schema, error) { - schema, err := avro.Parse(text) - if err != nil { - return nil, cerrors.Errorf("could not parse avro schema: %w", err) - } - return &Schema{ - schema: schema, - unionResolver: NewUnionResolver(schema), - }, nil -} - -// SchemaForType uses reflection to extract an Avro schema from v. Maps are -// regarded as structs. -func SchemaForType(v any) (*Schema, error) { - schema, err := extractor{}.Extract(v) - if err != nil { - return nil, err - } - traverseSchema(schema, sortFn) - return &Schema{ - schema: schema, - unionResolver: NewUnionResolver(schema), - }, nil -} diff --git a/pkg/plugin/processor/builtin/impl/avro/schemaregistry/avro/schema_test.go b/pkg/plugin/processor/builtin/impl/avro/schemaregistry/avro/schema_test.go deleted file mode 100644 index 5281bfeed..000000000 --- a/pkg/plugin/processor/builtin/impl/avro/schemaregistry/avro/schema_test.go +++ /dev/null @@ -1,658 +0,0 @@ -// Copyright © 2023 Meroxa, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package avro - -import ( - "fmt" - "testing" - "time" - - "github.com/conduitio/conduit-commons/opencdc" - "github.com/conduitio/conduit/pkg/foundation/cerrors" - "github.com/hamba/avro/v2" - "github.com/matryer/is" -) - -func TestSchema_MarshalUnmarshal(t *testing.T) { - now := time.Now().UTC() - - testCases := []struct { - name string - // haveValue is the value we use to extract the schema and which gets marshaled - haveValue any - // wantValue is the expected value we get when haveValue gets marshaled and unmarshaled - wantValue any - // wantSchema is the schema expected to be extracted from haveValue - wantSchema avro.Schema - }{{ - name: "boolean", - haveValue: true, - wantValue: true, - wantSchema: avro.NewPrimitiveSchema(avro.Boolean, nil), - }, { - name: "boolean ptr (false)", - haveValue: func() *bool { var v bool; return &v }(), - wantValue: false, // ptr is unmarshalled into value - wantSchema: must(avro.NewUnionSchema( - []avro.Schema{ - avro.NewPrimitiveSchema(avro.Boolean, nil), - avro.NewPrimitiveSchema(avro.Null, nil), - }, - )), - }, { - name: "boolean ptr (nil)", - haveValue: (*bool)(nil), - wantValue: nil, // when unmarshaling we get an untyped nil - wantSchema: must(avro.NewUnionSchema( - []avro.Schema{ - avro.NewPrimitiveSchema(avro.Boolean, nil), - avro.NewPrimitiveSchema(avro.Null, nil), - }, - )), - }, { - name: "int", - haveValue: int(1), - wantValue: int(1), - wantSchema: avro.NewPrimitiveSchema(avro.Int, nil), - }, { - name: "int ptr (0)", - haveValue: func() *int { var v int; return &v }(), - wantValue: 0, // ptr is unmarshalled into value - wantSchema: must(avro.NewUnionSchema( - []avro.Schema{ - avro.NewPrimitiveSchema(avro.Int, nil), - avro.NewPrimitiveSchema(avro.Null, nil), - }, - )), - }, { - name: "int ptr (nil)", - haveValue: (*int)(nil), - wantValue: nil, // when unmarshaling we get an untyped nil - wantSchema: must(avro.NewUnionSchema( - []avro.Schema{ - avro.NewPrimitiveSchema(avro.Int, nil), - avro.NewPrimitiveSchema(avro.Null, nil), - }, - )), - }, { - name: "int64", - haveValue: int64(1), - wantValue: int64(1), - wantSchema: avro.NewPrimitiveSchema(avro.Long, nil), - }, { - name: "int64 ptr (0)", - haveValue: func() *int64 { var v int64; return &v }(), - wantValue: int64(0), // ptr is unmarshalled into value - wantSchema: must(avro.NewUnionSchema( - []avro.Schema{ - avro.NewPrimitiveSchema(avro.Long, nil), - avro.NewPrimitiveSchema(avro.Null, nil), - }, - )), - }, { - name: "int64 ptr (nil)", - haveValue: (*int64)(nil), - wantValue: nil, // when unmarshaling we get an untyped nil - wantSchema: must(avro.NewUnionSchema( - []avro.Schema{ - avro.NewPrimitiveSchema(avro.Long, nil), - avro.NewPrimitiveSchema(avro.Null, nil), - }, - )), - }, { - name: "int32", - haveValue: int32(1), - wantValue: int(1), - wantSchema: avro.NewPrimitiveSchema(avro.Int, nil), - }, { - name: "int32 ptr (0)", - haveValue: func() *int32 { var v int32; return &v }(), - wantValue: int(0), // ptr is unmarshalled into value - wantSchema: must(avro.NewUnionSchema( - []avro.Schema{ - avro.NewPrimitiveSchema(avro.Int, nil), - avro.NewPrimitiveSchema(avro.Null, nil), - }, - )), - }, { - name: "int32 ptr (nil)", - haveValue: (*int32)(nil), - wantValue: nil, // when unmarshaling we get an untyped nil - wantSchema: must(avro.NewUnionSchema( - []avro.Schema{ - avro.NewPrimitiveSchema(avro.Int, nil), - avro.NewPrimitiveSchema(avro.Null, nil), - }, - )), - }, { - name: "int16", - haveValue: int16(1), - wantValue: int(1), - wantSchema: avro.NewPrimitiveSchema(avro.Int, nil), - }, { - name: "int16 ptr (0)", - haveValue: func() *int16 { var v int16; return &v }(), - wantValue: int(0), // ptr is unmarshalled into value - wantSchema: must(avro.NewUnionSchema( - []avro.Schema{ - avro.NewPrimitiveSchema(avro.Int, nil), - avro.NewPrimitiveSchema(avro.Null, nil), - }, - )), - }, { - name: "int16 ptr (nil)", - haveValue: (*int16)(nil), - wantValue: nil, // when unmarshaling we get an untyped nil - wantSchema: must(avro.NewUnionSchema( - []avro.Schema{ - avro.NewPrimitiveSchema(avro.Int, nil), - avro.NewPrimitiveSchema(avro.Null, nil), - }, - )), - }, { - name: "int8", - haveValue: int8(1), - wantValue: int(1), - wantSchema: avro.NewPrimitiveSchema(avro.Int, nil), - }, { - name: "int8 ptr (0)", - haveValue: func() *int8 { var v int8; return &v }(), - wantValue: int(0), // ptr is unmarshalled into value - wantSchema: must(avro.NewUnionSchema( - []avro.Schema{ - avro.NewPrimitiveSchema(avro.Int, nil), - avro.NewPrimitiveSchema(avro.Null, nil), - }, - )), - }, { - name: "int8 ptr (nil)", - haveValue: (*int8)(nil), - wantValue: nil, // when unmarshaling we get an untyped nil - wantSchema: must(avro.NewUnionSchema( - []avro.Schema{ - avro.NewPrimitiveSchema(avro.Int, nil), - avro.NewPrimitiveSchema(avro.Null, nil), - }, - )), - }, { - name: "uint32", - haveValue: uint32(1), - wantValue: int64(1), - wantSchema: avro.NewPrimitiveSchema(avro.Long, nil), - }, { - name: "uint32 ptr (0)", - haveValue: func() *uint32 { var v uint32; return &v }(), - wantValue: int64(0), // ptr is unmarshalled into value - wantSchema: must(avro.NewUnionSchema( - []avro.Schema{ - avro.NewPrimitiveSchema(avro.Long, nil), - avro.NewPrimitiveSchema(avro.Null, nil), - }, - )), - }, { - name: "uint32 ptr (nil)", - haveValue: (*uint32)(nil), - wantValue: nil, // when unmarshaling we get an untyped nil - wantSchema: must(avro.NewUnionSchema( - []avro.Schema{ - avro.NewPrimitiveSchema(avro.Long, nil), - avro.NewPrimitiveSchema(avro.Null, nil), - }, - )), - }, { - name: "uint16", - haveValue: uint16(1), - wantValue: int(1), - wantSchema: avro.NewPrimitiveSchema(avro.Int, nil), - }, { - name: "uint16 ptr (0)", - haveValue: func() *uint16 { var v uint16; return &v }(), - wantValue: int(0), // ptr is unmarshalled into value - wantSchema: must(avro.NewUnionSchema( - []avro.Schema{ - avro.NewPrimitiveSchema(avro.Int, nil), - avro.NewPrimitiveSchema(avro.Null, nil), - }, - )), - }, { - name: "uint16 ptr (nil)", - haveValue: (*uint16)(nil), - wantValue: nil, // when unmarshaling we get an untyped nil - wantSchema: must(avro.NewUnionSchema( - []avro.Schema{ - avro.NewPrimitiveSchema(avro.Int, nil), - avro.NewPrimitiveSchema(avro.Null, nil), - }, - )), - }, { - name: "uint8", - haveValue: uint8(1), - wantValue: int(1), - wantSchema: avro.NewPrimitiveSchema(avro.Int, nil), - }, { - name: "uint8 ptr (0)", - haveValue: func() *uint8 { var v uint8; return &v }(), - wantValue: int(0), // ptr is unmarshalled into value - wantSchema: must(avro.NewUnionSchema( - []avro.Schema{ - avro.NewPrimitiveSchema(avro.Int, nil), - avro.NewPrimitiveSchema(avro.Null, nil), - }, - )), - }, { - name: "uint8 ptr (nil)", - haveValue: (*uint8)(nil), - wantValue: nil, // when unmarshaling we get an untyped nil - wantSchema: must(avro.NewUnionSchema( - []avro.Schema{ - avro.NewPrimitiveSchema(avro.Int, nil), - avro.NewPrimitiveSchema(avro.Null, nil), - }, - )), - }, { - name: "float64", - haveValue: float64(1), - wantValue: float64(1), - wantSchema: avro.NewPrimitiveSchema(avro.Double, nil), - }, { - name: "float64 ptr (0)", - haveValue: func() *float64 { var v float64; return &v }(), - wantValue: float64(0), // ptr is unmarshalled into value - wantSchema: must(avro.NewUnionSchema( - []avro.Schema{ - avro.NewPrimitiveSchema(avro.Double, nil), - avro.NewPrimitiveSchema(avro.Null, nil), - }, - )), - }, { - name: "float64 ptr (nil)", - haveValue: (*float64)(nil), - wantValue: nil, // when unmarshaling we get an untyped nil - wantSchema: must(avro.NewUnionSchema( - []avro.Schema{ - avro.NewPrimitiveSchema(avro.Double, nil), - avro.NewPrimitiveSchema(avro.Null, nil), - }, - )), - }, { - name: "float32", - haveValue: float32(1), - wantValue: float32(1), - wantSchema: avro.NewPrimitiveSchema(avro.Float, nil), - }, { - name: "float32 ptr (0)", - haveValue: func() *float32 { var v float32; return &v }(), - wantValue: float32(0), // ptr is unmarshalled into value - wantSchema: must(avro.NewUnionSchema( - []avro.Schema{ - avro.NewPrimitiveSchema(avro.Float, nil), - avro.NewPrimitiveSchema(avro.Null, nil), - }, - )), - }, { - name: "float32 ptr (nil)", - haveValue: (*float32)(nil), - wantValue: nil, // when unmarshaling we get an untyped nil - wantSchema: must(avro.NewUnionSchema( - []avro.Schema{ - avro.NewPrimitiveSchema(avro.Float, nil), - avro.NewPrimitiveSchema(avro.Null, nil), - }, - )), - }, { - name: "string", - haveValue: "1", - wantValue: "1", - wantSchema: avro.NewPrimitiveSchema(avro.String, nil), - }, { - name: "string ptr (empty)", - haveValue: func() *string { var v string; return &v }(), - wantValue: "", // ptr is unmarshalled into value - wantSchema: must(avro.NewUnionSchema( - []avro.Schema{ - avro.NewPrimitiveSchema(avro.String, nil), - avro.NewPrimitiveSchema(avro.Null, nil), - }, - )), - }, { - name: "string ptr (nil)", - haveValue: (*string)(nil), - wantValue: nil, // when unmarshaling we get an untyped nil - wantSchema: must(avro.NewUnionSchema( - []avro.Schema{ - avro.NewPrimitiveSchema(avro.String, nil), - avro.NewPrimitiveSchema(avro.Null, nil), - }, - )), - }, { - name: "[]byte", - haveValue: []byte{1, 2, 3}, - wantValue: []byte{1, 2, 3}, - wantSchema: avro.NewPrimitiveSchema(avro.Bytes, nil), - }, { - name: "[4]byte", - haveValue: [4]byte{1, 2, 3, 4}, - wantValue: [4]byte{1, 2, 3, 4}, - wantSchema: must(avro.NewFixedSchema("record.foo", "", 4, nil)), - }, { - name: "nil", - haveValue: nil, - wantValue: nil, - wantSchema: must(avro.NewUnionSchema( // untyped nils default to nullable strings - []avro.Schema{ - avro.NewPrimitiveSchema(avro.String, nil), - avro.NewPrimitiveSchema(avro.Null, nil), - }, - )), - }, { - name: "[]int", - haveValue: []int{1, 2, 3}, - wantValue: []any{1, 2, 3}, - wantSchema: avro.NewArraySchema(avro.NewPrimitiveSchema(avro.Int, nil)), - }, { - name: "[]any (with data)", - haveValue: []any{1, "foo"}, - wantValue: []any{1, "foo"}, - wantSchema: avro.NewArraySchema(must(avro.NewUnionSchema( - []avro.Schema{ - avro.NewPrimitiveSchema(avro.String, nil), - avro.NewPrimitiveSchema(avro.Int, nil), - avro.NewPrimitiveSchema(avro.Null, nil), - }, - ))), - }, { - name: "[]any (no data)", - haveValue: []any{}, - wantValue: []any(nil), // TODO: smells like a bug, should be []any{} - wantSchema: avro.NewArraySchema(must(avro.NewUnionSchema( // empty slice values default to nullable strings - []avro.Schema{ - avro.NewPrimitiveSchema(avro.String, nil), - avro.NewPrimitiveSchema(avro.Null, nil), - }, - ))), - }, { - name: "[][]int", - haveValue: [][]int{{1}, {2, 3}}, - wantValue: []any{[]any{1}, []any{2, 3}}, - wantSchema: avro.NewArraySchema(avro.NewArraySchema(avro.NewPrimitiveSchema(avro.Int, nil))), - }, { - name: "map[string]int", - haveValue: map[string]int{ - "foo": 1, - "bar": 2, - }, - wantValue: map[string]any{ // all maps are unmarshaled into map[string]any - "foo": 1, - "bar": 2, - }, - wantSchema: avro.NewMapSchema(avro.NewPrimitiveSchema(avro.Int, nil)), - }, { - name: "map[string]any (with primitive data)", - haveValue: map[string]any{ - "foo": "bar", - "foo2": "bar2", - "bar": 1, - "baz": true, - }, - wantValue: map[string]any{ - "foo": "bar", - "foo2": "bar2", - "bar": 1, - "baz": true, - }, - wantSchema: avro.NewMapSchema(must(avro.NewUnionSchema([]avro.Schema{ - &avro.NullSchema{}, - avro.NewPrimitiveSchema(avro.Int, nil), - avro.NewPrimitiveSchema(avro.String, nil), - avro.NewPrimitiveSchema(avro.Boolean, nil), - }))), - }, { - name: "map[string]any (with primitive array)", - haveValue: map[string]any{ - "foo": "bar", - "foo2": "bar2", - "bar": 1, - "baz": []int{1, 2, 3}, - }, - wantValue: map[string]any{ - "foo": "bar", - "foo2": "bar2", - "bar": 1, - "baz": []any{1, 2, 3}, - }, - wantSchema: avro.NewMapSchema(must(avro.NewUnionSchema([]avro.Schema{ - &avro.NullSchema{}, - avro.NewPrimitiveSchema(avro.Int, nil), - avro.NewPrimitiveSchema(avro.String, nil), - avro.NewArraySchema(avro.NewPrimitiveSchema(avro.Int, nil)), - }))), - }, { - name: "map[string]any (with union array)", - haveValue: map[string]any{ - "foo": "bar", - "foo2": "bar2", - "bar": 1, - "baz": []int{1, 2, 3}, - "baz2": []any{"foo", true}, - }, - wantValue: map[string]any{ - "foo": "bar", - "foo2": "bar2", - "bar": 1, - "baz": []any{1, 2, 3}, - "baz2": []any{"foo", true}, - }, - wantSchema: avro.NewMapSchema(must(avro.NewUnionSchema([]avro.Schema{ - &avro.NullSchema{}, - avro.NewPrimitiveSchema(avro.Int, nil), - avro.NewPrimitiveSchema(avro.String, nil), - avro.NewArraySchema(must(avro.NewUnionSchema([]avro.Schema{ - &avro.NullSchema{}, - avro.NewPrimitiveSchema(avro.Int, nil), - avro.NewPrimitiveSchema(avro.String, nil), - avro.NewPrimitiveSchema(avro.Boolean, nil), - }))), - }))), - }, { - name: "map[string]any (no data)", - haveValue: map[string]any{}, - wantValue: map[string]any{}, - wantSchema: avro.NewMapSchema(must(avro.NewUnionSchema([]avro.Schema{ // empty map values default to nullable strings - &avro.NullSchema{}, - avro.NewPrimitiveSchema(avro.String, nil), - }))), - }, { - name: "map[string]any (nested)", - haveValue: map[string]any{ - "foo": map[string]any{ - "bar": "baz", - "baz": 1, - }, - }, - wantValue: map[string]any{ - "foo": map[string]any{ - "bar": "baz", - "baz": 1, - }, - }, - wantSchema: avro.NewMapSchema(must(avro.NewUnionSchema([]avro.Schema{ - &avro.NullSchema{}, - avro.NewMapSchema(must(avro.NewUnionSchema([]avro.Schema{ - &avro.NullSchema{}, - avro.NewPrimitiveSchema(avro.Int, nil), - avro.NewPrimitiveSchema(avro.String, nil), - }))), - }))), - }, { - name: "opencdc.StructuredData", - haveValue: opencdc.StructuredData{ - "foo": "bar", - "bar": 1, - "baz": []int{1, 2, 3}, - "tz": now, - }, - wantValue: map[string]any{ // structured data is unmarshaled into a map - "foo": "bar", - "bar": 1, - "baz": []any{1, 2, 3}, - "tz": now.Truncate(time.Microsecond), // Avro cannot does not support nanoseconds - }, - wantSchema: must(avro.NewRecordSchema( - "record.foo", - "", - []*avro.Field{ - must(avro.NewField("foo", avro.NewPrimitiveSchema(avro.String, nil))), - must(avro.NewField("bar", avro.NewPrimitiveSchema(avro.Int, nil))), - must(avro.NewField("baz", avro.NewArraySchema(avro.NewPrimitiveSchema(avro.Int, nil)))), - must(avro.NewField("tz", avro.NewPrimitiveSchema(avro.Long, avro.NewPrimitiveLogicalSchema(avro.TimestampMicros)))), - }, - )), - }} - - newRecord := func(v any) opencdc.StructuredData { - return opencdc.StructuredData{"foo": v} - } - - for _, tc := range testCases { - t.Run(tc.name, func(t *testing.T) { - is := is.New(t) - - // create new record with haveValue in field "foo" - haveValue := newRecord(tc.haveValue) - - // extract schema and ensure it matches the expectation - gotSchema, err := SchemaForType(haveValue) - is.NoErr(err) - - wantSchema := &Schema{ - schema: must(avro.NewRecordSchema("record", "", - []*avro.Field{must(avro.NewField("foo", tc.wantSchema))}, - )), - } - wantSchema.Sort() - gotSchema.Sort() - is.Equal(wantSchema.String(), gotSchema.String()) - - // now try to marshal the value with the schema - bytes, err := gotSchema.Marshal(haveValue) - is.NoErr(err) - - // unmarshal the bytes back into structured data and compare the value - var gotValue opencdc.StructuredData - err = gotSchema.Unmarshal(bytes, &gotValue) - is.NoErr(err) - - wantValue := newRecord(tc.wantValue) - is.Equal(wantValue, gotValue) - }) - } -} - -func TestSchemaForType_NestedStructuredData(t *testing.T) { - is := is.New(t) - - have := opencdc.StructuredData{ - "foo": "bar", - "level1": opencdc.StructuredData{ - "foo": "bar", - "level2": opencdc.StructuredData{ - "foo": "bar", - "level3": opencdc.StructuredData{ - "foo": "bar", - "regularMap": map[string]bool{}, - }, - }, - }, - } - - want := &Schema{schema: must(avro.NewRecordSchema( - "record", "", - []*avro.Field{ - must(avro.NewField("foo", avro.NewPrimitiveSchema(avro.String, nil))), - must(avro.NewField("level1", - must(avro.NewRecordSchema( - "record.level1", "", - []*avro.Field{ - must(avro.NewField("foo", avro.NewPrimitiveSchema(avro.String, nil))), - must(avro.NewField("level2", - must(avro.NewRecordSchema( - "record.level1.level2", "", - []*avro.Field{ - must(avro.NewField("foo", avro.NewPrimitiveSchema(avro.String, nil))), - must(avro.NewField("level3", - must(avro.NewRecordSchema( - "record.level1.level2.level3", "", - []*avro.Field{ - must(avro.NewField("foo", avro.NewPrimitiveSchema(avro.String, nil))), - must(avro.NewField("regularMap", avro.NewMapSchema( - avro.NewPrimitiveSchema(avro.Boolean, nil), - ))), - }, - )), - )), - }, - )), - )), - }, - )), - )), - }, - ))} - want.Sort() - - got, err := SchemaForType(have) - is.NoErr(err) - is.Equal(want.String(), got.String()) - - bytes, err := got.Marshal(have) - is.NoErr(err) - // only try to unmarshal to ensure there's no error, other tests assert that - // umarshaled data matches the expectations - var unmarshaled opencdc.StructuredData - err = got.Unmarshal(bytes, &unmarshaled) - is.NoErr(err) -} - -func TestSchemaForType_UnsupportedTypes(t *testing.T) { - testCases := []struct { - val any - wantErr error - }{ - // avro only supports fixed byte arrays - {val: [4]int{}, wantErr: cerrors.New("record: arrays with value type int not supported, avro only supports bytes as values")}, - {val: [4]bool{}, wantErr: cerrors.New("record: arrays with value type bool not supported, avro only supports bytes as values")}, - // avro only supports maps with string keys - {val: map[int]string{}, wantErr: cerrors.New("record: maps with key type int not supported, avro only supports strings as keys")}, - {val: map[bool]string{}, wantErr: cerrors.New("record: maps with key type bool not supported, avro only supports strings as keys")}, - // avro only supports signed integers - {val: uint64(1), wantErr: cerrors.New("record: unsupported type: uint64")}, - {val: uint(1), wantErr: cerrors.New("record: unsupported type: uint")}, - } - for _, tc := range testCases { - t.Run(fmt.Sprintf("%T", tc.val), func(t *testing.T) { - is := is.New(t) - _, err := SchemaForType(tc.val) - is.True(err != nil) - is.Equal(err.Error(), tc.wantErr.Error()) - }) - } -} - -func must[T any](f T, err error) T { - if err != nil { - panic(err) - } - return f -} diff --git a/pkg/plugin/processor/builtin/impl/avro/schemaregistry/avro/traverse.go b/pkg/plugin/processor/builtin/impl/avro/schemaregistry/avro/traverse.go deleted file mode 100644 index 7beb0c427..000000000 --- a/pkg/plugin/processor/builtin/impl/avro/schemaregistry/avro/traverse.go +++ /dev/null @@ -1,193 +0,0 @@ -// Copyright © 2023 Meroxa, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package avro - -import ( - "fmt" - "reflect" - "sort" - - "github.com/conduitio/conduit-commons/opencdc" - "github.com/conduitio/conduit/pkg/foundation/cerrors" - "github.com/hamba/avro/v2" -) - -type ( - // path represents a path from the root to a certain type in an avro schema. - path []leg - // leg is a single leg of a path. - leg struct { - schema avro.Schema - field *avro.Field - } -) - -// traverseSchema is a utility for traversing an avro schema and executing fn on -// every schema in the tree. -func traverseSchema(s avro.Schema, fn func(path)) { - var traverse func(avro.Schema, path) - traverse = func(s avro.Schema, p path) { - p = append(p, leg{s, nil}) - fn(p) - - // traverse deeper into types that have nested types - switch s := s.(type) { - case *avro.MapSchema: - traverse(s.Values(), p) - case *avro.ArraySchema: - traverse(s.Items(), p) - case *avro.RefSchema: - traverse(s.Schema(), p) - case *avro.RecordSchema: - fields := s.Fields() - p = p[:len(p)-1] - for _, field := range fields { - p = append(p, leg{s, field}) - traverse(field.Type(), p) - p = p[:len(p)-1] - } - case *avro.UnionSchema: - for _, st := range s.Types() { - traverse(st, p) - } - } - } - traverse(s, nil) -} - -// sortFn can be passed to traverse to deterministically sort fields in every -// record and types in every union. -func sortFn(p path) { - switch s := p[len(p)-1].schema.(type) { - case *avro.RecordSchema: - fields := s.Fields() - sort.Slice(fields, func(i, j int) bool { - return fields[i].Name() < fields[j].Name() - }) - case *avro.UnionSchema: - schemas := s.Types() - sort.Slice(schemas, func(i, j int) bool { - return schemas[i].String() < schemas[j].String() - }) - } -} - -// traverseValue is a utility to traverse val down to the path and call fn with -// all values found at the end of the path. If hasEncodedUnions is set to true, -// any map and array with a union type is expected to contain a map[string]any -// with a single key representing the name of the type it contains -// (e.g. {"int": 1}). -// If the value structure does not match the path p, traverseValue returns an -// error. -// -//nolint:gocyclo // need to switch on avro type and have a case for each type -func traverseValue(val any, p path, hasEncodedUnions bool, fn func(v any)) error { - var traverse func(any, int) error - traverse = func(val any, index int) error { - if index == len(p)-1 { - // reached the end of the path, call fn - fn(val) - return nil - } - if val == nil { - return nil // can't traverse further, not an error though - } - switch l := p[index]; l.schema.Type() { - case avro.Record: - switch val := val.(type) { - case map[string]any: - return traverse(val[l.field.Name()], index+1) - case opencdc.StructuredData: - return traverse(val[l.field.Name()], index+1) - case *map[string]any: - return traverse(*val, index) // traverse value - case *opencdc.StructuredData: - return traverse(*val, index) // traverse value - } - return newUnexpectedTypeError(avro.Record, map[string]any{}, val) - case avro.Array: - valArr, ok := val.([]any) - if !ok { - return newUnexpectedTypeError(avro.Array, []any{}, val) - } - for _, item := range valArr { - if err := traverse(item, index+1); err != nil { - return err - } - } - case avro.Map: - valMap, ok := val.(map[string]any) - if !ok { - return newUnexpectedTypeError(avro.Map, map[string]any{}, val) - } - for _, v := range valMap { - if err := traverse(v, index+1); err != nil { - return err - } - } - case avro.Ref: - // ignore ref and go deeper - return traverse(val, index+1) - case avro.Union: - if hasEncodedUnions && index > 0 && - (p[index-1].schema.Type() == avro.Map || p[index-1].schema.Type() == avro.Array) { - // it's a union value encoded as a map, traverse it - valMap, ok := val.(map[string]any) - if !ok { - return newUnexpectedTypeError(avro.Union, map[string]any{}, val) - } - if len(valMap) != 1 { - return cerrors.Errorf("expected single value encoded as a map, got %d elements", len(valMap)) - } - for _, v := range valMap { - return traverse(v, index+1) // there's only one value, return - } - } - - // values are encoded normally, skip union - err := traverse(val, index+1) - var uterr *unexpectedTypeError - if cerrors.As(err, &uterr) { - // We allow unexpected type errors, we could be traversing a - // different branch in the union type that does not have the - // same structure. - return nil - } - return err - default: - return cerrors.Errorf("unexpected avro type %s, can not traverse deeper", l.schema.Type()) - } - return nil - } - return traverse(val, 0) -} - -type unexpectedTypeError struct { - avroType avro.Type - expectedGoType string - actualGoType string -} - -func newUnexpectedTypeError(avroType avro.Type, expected any, actual any) *unexpectedTypeError { - return &unexpectedTypeError{ - avroType: avroType, - expectedGoType: reflect.TypeOf(expected).String(), - actualGoType: reflect.TypeOf(actual).String(), - } -} - -func (e *unexpectedTypeError) Error() string { - return fmt.Sprintf("expected Go type %s for avro type %s, got %s", e.expectedGoType, e.avroType, e.actualGoType) -} diff --git a/pkg/plugin/processor/builtin/impl/avro/schemaregistry/avro/union.go b/pkg/plugin/processor/builtin/impl/avro/schemaregistry/avro/union.go deleted file mode 100644 index 6c420ca39..000000000 --- a/pkg/plugin/processor/builtin/impl/avro/schemaregistry/avro/union.go +++ /dev/null @@ -1,482 +0,0 @@ -// Copyright © 2023 Meroxa, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package avro - -import ( - "reflect" - - "github.com/conduitio/conduit-commons/opencdc" - "github.com/conduitio/conduit/pkg/foundation/cerrors" - "github.com/hamba/avro/v2" - "github.com/modern-go/reflect2" -) - -// UnionResolver provides hooks before marshaling and after unmarshaling a value -// with an Avro schema, which make sure that values under the schema type Union -// are in the correct shape (see https://github.com/hamba/avro#unions). -// NB: It currently supports union types nested in maps, but not nested in -// slices. For example, hooks will not work for values like []any{[]any{"foo"}}. -type UnionResolver struct { - mapUnionPaths []path - arrayUnionPaths []path - nullUnionPaths []path - resolver *avro.TypeResolver -} - -// NewUnionResolver takes a schema and extracts the paths to all maps and arrays -// with union types. With this information the resolver can traverse the values -// in BeforeMarshal and AfterUnmarshal directly to the value that needs to be -// substituted. -func NewUnionResolver(schema avro.Schema) UnionResolver { - var mapUnionPaths []path - var arrayUnionPaths []path - var nullUnionPaths []path - // traverse the schema and extract paths to all maps and arrays with a union - // as the value type - traverseSchema(schema, func(p path) { - switch { - case isMapUnion(p[len(p)-1].schema): - // path points to a map with a union type, copy and store it - pCopy := make(path, len(p)) - copy(pCopy, p) - mapUnionPaths = append(mapUnionPaths, pCopy) - case isArrayUnion(p[len(p)-1].schema): - // path points to an array with a union type, copy and store it - pCopy := make(path, len(p)) - copy(pCopy, p) - arrayUnionPaths = append(arrayUnionPaths, pCopy) - case isNullUnion(p[len(p)-1].schema): - // path points to a null union, copy and store it - pCopy := make(path, len(p)-1) - copy(pCopy, p[:len(p)-1]) - nullUnionPaths = append(nullUnionPaths, pCopy) - } - }) - return UnionResolver{ - mapUnionPaths: mapUnionPaths, - arrayUnionPaths: arrayUnionPaths, - nullUnionPaths: nullUnionPaths, - resolver: avro.NewTypeResolver(), - } -} - -// AfterUnmarshal traverses the value using the schema and finds all values that -// have the Avro type Union. Those values are unmarshaled into a map with a -// single key that contains the name of the type -// (e.g. map[string]any{"string":"foo"}). This function takes that map and -// extracts the actual value from it (e.g. "foo"). -func (r UnionResolver) AfterUnmarshal(val any) error { - if len(r.mapUnionPaths) == 0 && - len(r.arrayUnionPaths) == 0 && - len(r.nullUnionPaths) == 0 { - return nil // shortcut - } - - substitutions, err := r.afterUnmarshalMapSubstitutions(val, nil) - if err != nil { - return err - } - substitutions, err = r.afterUnmarshalArraySubstitutions(val, substitutions) - if err != nil { - return err - } - substitutions, err = r.afterUnmarshalNullUnionSubstitutions(val, substitutions) - if err != nil { - return err - } - - // We now have a list of substitutions, simply apply them. - for _, sub := range substitutions { - sub.substitute() - } - return nil -} - -func (r UnionResolver) afterUnmarshalMapSubstitutions(val any, substitutions []substitution) ([]substitution, error) { - for _, p := range r.mapUnionPaths { - // first collect all maps that have a union type in the schema - var maps []map[string]any - err := traverseValue(val, p, true, func(v any) { - if mapUnion, ok := v.(map[string]any); ok { - maps = append(maps, mapUnion) - } - }) - if err != nil { - return nil, err - } - - // Loop through collected maps and collect all substitutions. These maps - // contain values encoded as maps with a single key:value pair, where - // key is the type name (e.g. {"int":1}). We want to replace all these - // maps with the actual value (e.g. 1). - // We don't replace them in the loop, because we want to make sure all - // maps actually contain only 1 value. - for i, mapUnion := range maps { - for k, v := range mapUnion { - if v == nil { - // do no change nil values - continue - } - vmap, ok := v.(map[string]any) - if !ok { - return nil, cerrors.Errorf("expected map[string]any, got %T", v) - } - if len(vmap) != 1 { - return nil, cerrors.Errorf("expected single value encoded as a map, got %d elements", len(vmap)) - } - - // this is a map with a single value, store the substitution - for _, actualVal := range vmap { - substitutions = append(substitutions, mapSubstitution{ - m: maps[i], - key: k, - val: actualVal, - }) - break - } - } - } - } - return substitutions, nil -} - -func (r UnionResolver) afterUnmarshalArraySubstitutions(val any, substitutions []substitution) ([]substitution, error) { - for _, p := range r.arrayUnionPaths { - // first collect all arrays that have a union type in the schema - var arrays [][]any - err := traverseValue(val, p, true, func(v any) { - if arrayUnion, ok := v.([]any); ok { - arrays = append(arrays, arrayUnion) - } - }) - if err != nil { - return nil, err - } - - // Loop through collected arrays and collect all substitutions. These - // arrays contain values encoded as maps with a single key:value pair, - // where key is the type name (e.g. {"int":1}). We want to replace all - // these maps with the actual value (e.g. 1). - // We don't replace them in the loop, because we want to make sure all - // maps actually contain only 1 value. - for i, arrayUnion := range arrays { - for index, v := range arrayUnion { - if v == nil { - // do no change nil values - continue - } - vmap, ok := v.(map[string]any) - if !ok { - return nil, cerrors.Errorf("expected map[string]any, got %T", v) - } - if len(vmap) != 1 { - return nil, cerrors.Errorf("expected single value encoded as a map, got %d elements", len(vmap)) - } - - // this is a map with a single value, store the substitution - for _, actualVal := range vmap { - substitutions = append(substitutions, arraySubstitution{ - a: arrays[i], - index: index, - val: actualVal, - }) - break - } - } - } - } - return substitutions, nil -} - -func (r UnionResolver) afterUnmarshalNullUnionSubstitutions(val any, substitutions []substitution) ([]substitution, error) { - for _, p := range r.nullUnionPaths { - // first collect all values that are nullable - var maps []map[string]any - err := traverseValue(val, p, true, func(v any) { - switch v := v.(type) { - case map[string]any: - maps = append(maps, v) - case *map[string]any: - maps = append(maps, *v) - case *opencdc.StructuredData: - maps = append(maps, *v) - } - }) - if err != nil { - return nil, err - } - - // Loop through collected maps and collect all substitutions. These maps - // contain values encoded as maps with a single key:value pair, where - // key is the type name (e.g. {"int":1}). We want to replace all these - // maps with the actual value (e.g. 1). - // We don't replace them in the loop, because we want to make sure all - // maps actually contain only 1 value. - for i, mapUnion := range maps { - for k, v := range mapUnion { - if v == nil { - // do no change nil values - continue - } - vmap, ok := v.(map[string]any) - if !ok { - // if the value is not a map, it's not a nil value - continue - } - if len(vmap) != 1 { - return nil, cerrors.Errorf("expected single value encoded as a map, got %d elements", len(vmap)) - } - - // this is a map with a single value, store the substitution - for _, actualVal := range vmap { - substitutions = append(substitutions, mapSubstitution{ - m: maps[i], - key: k, - val: actualVal, - }) - break - } - } - } - } - return substitutions, nil -} - -// BeforeMarshal traverses the value using the schema and finds all values that -// have the Avro type Union. Those values need to be changed to a map with a -// single key that contains the name of the type. This function takes that value -// (e.g. "foo") and hoists it into a map (e.g. map[string]any{"string":"foo"}). -func (r UnionResolver) BeforeMarshal(val any) error { - if len(r.mapUnionPaths) == 0 && len(r.arrayUnionPaths) == 0 { - return nil // shortcut - } - - substitutions, err := r.beforeMarshalMapSubstitutions(val, nil) - if err != nil { - return err - } - substitutions, err = r.beforeMarshalArraySubstitutions(val, substitutions) - if err != nil { - return err - } - - // We now have a list of substitutions, simply apply them. - for _, sub := range substitutions { - sub.substitute() - } - return nil -} - -func (r UnionResolver) beforeMarshalMapSubstitutions(val any, substitutions []substitution) ([]substitution, error) { - for _, p := range r.mapUnionPaths { - mapSchema, ok := p[len(p)-1].schema.(*avro.MapSchema) - if !ok { - return nil, cerrors.Errorf("expected *avro.MapSchema, got %T", p[len(p)-1].schema) - } - unionSchema, ok := mapSchema.Values().(*avro.UnionSchema) - if !ok { - return nil, cerrors.Errorf("expected *avro.UnionSchema, got %T", mapSchema.Values()) - } - - // first collect all maps that have a union type in the schema - var maps []map[string]any - err := traverseValue(val, p, false, func(v any) { - if mapUnion, ok := v.(map[string]any); ok { - maps = append(maps, mapUnion) - } - }) - if err != nil { - return nil, err - } - - // Loop through collected maps and collect all substitutions. We want - // to replace all non-nil values in these maps with maps that contain a - // single value, the key corresponds to the resolved name. - // We don't replace them in the loop, because we want to make sure all - // type names can be resolved first. - for i, mapUnion := range maps { - for k, v := range mapUnion { - if v == nil { - // do no change nil values - continue - } - valTypeName, err := r.resolveNameForType(v, unionSchema) - if err != nil { - return nil, err - } - substitutions = append(substitutions, mapSubstitution{ - m: maps[i], - key: k, - val: map[string]any{valTypeName: v}, - }) - } - } - } - return substitutions, nil -} - -func (r UnionResolver) beforeMarshalArraySubstitutions(val any, substitutions []substitution) ([]substitution, error) { - for _, p := range r.arrayUnionPaths { - arraySchema, ok := p[len(p)-1].schema.(*avro.ArraySchema) - if !ok { - return nil, cerrors.Errorf("expected *avro.ArraySchema, got %T", p[len(p)-1].schema) - } - unionSchema, ok := arraySchema.Items().(*avro.UnionSchema) - if !ok { - return nil, cerrors.Errorf("expected *avro.UnionSchema, got %T", arraySchema.Items()) - } - - // first collect all array that have a union type in the schema - var arrays [][]any - err := traverseValue(val, p, false, func(v any) { - if arrayUnion, ok := v.([]any); ok { - arrays = append(arrays, arrayUnion) - } - }) - if err != nil { - return nil, err - } - - // Loop through collected arrays and collect all substitutions. We want - // to replace all non-nil values in these arrays with maps that contain a - // single value, the key corresponds to the resolved name. - // We don't replace them in the loop, because we want to make sure all - // type names can be resolved first. - for i, arrayUnion := range arrays { - for index, v := range arrayUnion { - if v == nil { - // do no change nil values - continue - } - valTypeName, err := r.resolveNameForType(v, unionSchema) - if err != nil { - return nil, err - } - substitutions = append(substitutions, arraySubstitution{ - a: arrays[i], - index: index, - val: map[string]any{valTypeName: v}, - }) - } - } - } - return substitutions, nil -} - -func (r UnionResolver) resolveNameForType(v any, us *avro.UnionSchema) (string, error) { - var names []string - - t := reflect2.TypeOf(v) - switch t.Kind() { - case reflect.Map: - names = []string{"map"} - case reflect.Slice: - if !t.Type1().Elem().AssignableTo(byteType) { // []byte is handled differently - names = []string{"array"} - break - } - fallthrough - default: - var err error - names, err = r.resolver.Name(t) - if err != nil { - return "", err - } - } - - for _, n := range names { - _, pos := us.Types().Get(n) - if pos > -1 { - return n, nil - } - } - return "", cerrors.Errorf("can't resolve %v in union type %v", names, us.String()) -} - -func isMapUnion(schema avro.Schema) bool { - s, ok := schema.(*avro.MapSchema) - if !ok { - return false - } - us, ok := s.Values().(*avro.UnionSchema) - if !ok { - return false - } - for _, s := range us.Types() { - // at least one of the types in the union must be a map or array for this - // to count as a map with a union type - if s.Type() == avro.Array || s.Type() == avro.Map { - return true - } - } - return false -} - -func isArrayUnion(schema avro.Schema) bool { - s, ok := schema.(*avro.ArraySchema) - if !ok { - return false - } - us, ok := s.Items().(*avro.UnionSchema) - if !ok { - return false - } - for _, s := range us.Types() { - // at least one of the types in the union must be a map or array for this - // to count as a map with a union type - if s.Type() == avro.Array || s.Type() == avro.Map { - return true - } - } - return false -} - -func isNullUnion(schema avro.Schema) bool { - s, ok := schema.(*avro.UnionSchema) - if !ok { - return false - } - if len(s.Types()) != 2 { - return false - } - for _, s := range s.Types() { - // at least one of the types in the union must be a map or array for this - // to count as a map with a union type - if s.Type() == avro.Null { - return true - } - } - return false -} - -type substitution interface { - substitute() -} - -type mapSubstitution struct { - m map[string]any - key string - val any -} - -func (s mapSubstitution) substitute() { s.m[s.key] = s.val } - -type arraySubstitution struct { - a []any - index int - val any -} - -func (s arraySubstitution) substitute() { s.a[s.index] = s.val } diff --git a/pkg/plugin/processor/builtin/impl/avro/schemaregistry/avro/union_test.go b/pkg/plugin/processor/builtin/impl/avro/schemaregistry/avro/union_test.go deleted file mode 100644 index eea56a1e6..000000000 --- a/pkg/plugin/processor/builtin/impl/avro/schemaregistry/avro/union_test.go +++ /dev/null @@ -1,155 +0,0 @@ -// Copyright © 2023 Meroxa, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package avro - -import ( - "reflect" - "testing" - - "github.com/conduitio/conduit-commons/opencdc" - "github.com/matryer/is" -) - -func TestUnionResolver(t *testing.T) { - is := is.New(t) - - testCases := []struct { - name string - have any - want any - }{{ - name: "string", - have: "foo", - want: map[string]any{"string": "foo"}, - }, { - name: "int", - have: 123, - want: map[string]any{"int": 123}, - }, { - name: "boolean", - have: true, - want: map[string]any{"boolean": true}, - }, { - name: "double", - have: 1.23, - want: map[string]any{"double": 1.23}, - }, { - name: "float", - have: float32(1.23), - want: map[string]any{"float": float32(1.23)}, - }, { - name: "long", - have: int64(321), - want: map[string]any{"long": int64(321)}, - }, { - name: "bytes", - have: []byte{1, 2, 3, 4}, - want: map[string]any{"bytes": []byte{1, 2, 3, 4}}, - }, { - name: "null", - have: nil, - want: nil, - }, { - name: "int array", - have: []int{1, 2, 3, 4}, - want: map[string]any{"array": []int{1, 2, 3, 4}}, - }, { - name: "nil bool array", - have: []bool(nil), - want: map[string]any{"array": []bool(nil)}, - }} - - isSlice := func(a any) bool { - if a == nil { - return false - } - // returns true if the type is a slice and not a byte slice - t := reflect.TypeOf(a) - return t.Kind() == reflect.Slice && !t.Elem().AssignableTo(byteType) - } - - for _, tc := range testCases { - t.Run(tc.name, func(t *testing.T) { - is := is.New(t) - - newRecord := func() opencdc.StructuredData { - sd := opencdc.StructuredData{ - "foo1": tc.have, - "map1": map[string]any{ - "foo2": tc.have, - "map2": map[string]any{ - "foo3": tc.have, - }, - }, - "arr1": []any{ - tc.have, - []any{tc.have}, - }, - } - return sd - } - want := opencdc.StructuredData{ - "foo1": tc.have, // normal field shouldn't change - "map1": map[string]any{ - "foo2": tc.want, - "map2": map[string]any{ - "map": map[string]any{ - "foo3": func() any { - // if the original value is a slice, we consider - // the type a union and wrap it in a map, otherwise - // we keep the original value - if isSlice(tc.have) { - return tc.want - } - return tc.have - }(), - }, - }, - }, - "arr1": []any{ - tc.want, - map[string]any{ - "array": []any{ - func() any { - // if the original value is a slice, we consider - // the type a union and wrap it in a map, otherwise - // we keep the original value - if isSlice(tc.have) { - return tc.want - } - return tc.have - }(), - }, - }, - }, - } - have := newRecord() - - schema, err := SchemaForType(have) - is.NoErr(err) - mur := NewUnionResolver(schema.schema) - - // before marshal we should change the nested map - err = mur.BeforeMarshal(have) - is.NoErr(err) - is.Equal(want, have) - - // after unmarshal we should have the same record as at the start - err = mur.AfterUnmarshal(have) - is.NoErr(err) - is.Equal(newRecord(), have) - }) - } -} diff --git a/pkg/plugin/processor/builtin/impl/avro/schemaregistry/decoder.go b/pkg/plugin/processor/builtin/impl/avro/schemaregistry/decoder.go deleted file mode 100644 index 0555d458c..000000000 --- a/pkg/plugin/processor/builtin/impl/avro/schemaregistry/decoder.go +++ /dev/null @@ -1,90 +0,0 @@ -// Copyright © 2023 Meroxa, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package schemaregistry - -import ( - "context" - - "github.com/conduitio/conduit-commons/opencdc" - "github.com/conduitio/conduit/pkg/foundation/cerrors" - "github.com/conduitio/conduit/pkg/foundation/log" - "github.com/lovromazgon/franz-go/pkg/sr" -) - -type Decoder struct { - client *Client - serde *sr.Serde - logger log.CtxLogger -} - -func NewDecoder(client *Client, logger log.CtxLogger, serde *sr.Serde) *Decoder { - return &Decoder{ - client: client, - serde: serde, - logger: logger.WithComponent("schemaregistry.Decoder"), - } -} - -func (d *Decoder) Decode(ctx context.Context, b opencdc.RawData) (opencdc.StructuredData, error) { - var out opencdc.StructuredData - err := d.serde.Decode(b.Bytes(), &out) - if cerrors.Is(err, sr.ErrNotRegistered) { - err = d.findAndRegisterSchema(ctx, b) - if err != nil { - return nil, err - } - // retry decoding - err = d.serde.Decode(b.Bytes(), &out) - } - if err != nil { - return nil, cerrors.Errorf("failed to decode raw data: %w", err) - } - - return out, nil -} - -func (d *Decoder) findAndRegisterSchema(ctx context.Context, b opencdc.RawData) error { - id, _, _ := d.serde.Header().DecodeID(b.Bytes()) // we know this won't throw an error since Decode didn't return ErrBadHeader - s, err := d.client.SchemaByID(ctx, id) - if err != nil { - return cerrors.Errorf("failed to get schema: %w", err) - } - sf, ok := DefaultSchemaFactories[s.Type] - if !ok { - return cerrors.Errorf("unknown schema type %q (%d)", s.Type.String(), s.Type) - } - schema, err := sf.Parse(s.Schema) - if err != nil { - return cerrors.Errorf("failed to parse schema: %w", err) - } - - d.serde.Register( - id, - opencdc.StructuredData{}, - sr.EncodeFn(encodeFn(schema, sr.SubjectSchema{ID: id})), - sr.DecodeFn(decodeFn(schema, sr.SubjectSchema{ID: id})), - ) - return nil -} - -func decodeFn(schema Schema, ss sr.SubjectSchema) func(b []byte, a any) error { - return func(b []byte, a any) error { - err := schema.Unmarshal(b, a) - if err != nil { - return cerrors.Errorf("failed to unmarshal data with schema (ID: %v, subject: %v, version: %v): %w", ss.ID, ss.Subject, ss.Version, err) - } - return nil - } -} diff --git a/pkg/plugin/processor/builtin/impl/avro/schemaregistry/encoder.go b/pkg/plugin/processor/builtin/impl/avro/schemaregistry/encoder.go deleted file mode 100644 index 6eef353db..000000000 --- a/pkg/plugin/processor/builtin/impl/avro/schemaregistry/encoder.go +++ /dev/null @@ -1,133 +0,0 @@ -// Copyright © 2023 Meroxa, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package schemaregistry - -import ( - "context" - - "github.com/conduitio/conduit-commons/opencdc" - "github.com/conduitio/conduit/pkg/foundation/cerrors" - "github.com/conduitio/conduit/pkg/foundation/log" - "github.com/lovromazgon/franz-go/pkg/sr" -) - -type Encoder struct { - client *Client - serde *sr.Serde - logger log.CtxLogger - - SchemaStrategy -} - -type SchemaStrategy interface { - GetSchema(context.Context, *Client, log.CtxLogger, opencdc.StructuredData) (Schema, sr.SubjectSchema, error) -} - -func NewEncoder(client *Client, logger log.CtxLogger, serde *sr.Serde, strategy SchemaStrategy) *Encoder { - return &Encoder{ - client: client, - serde: serde, - logger: logger.WithComponent("schemaregistry.Encoder"), - SchemaStrategy: strategy, - } -} - -func (e *Encoder) Encode(ctx context.Context, sd opencdc.StructuredData) (opencdc.RawData, error) { - s, ss, err := e.GetSchema(ctx, e.client, e.logger, sd) - if err != nil { - return opencdc.RawData{}, cerrors.Errorf("failed to get schema: %w", err) - } - - b, err := e.serde.Encode(sd, sr.ID(ss.ID)) - if cerrors.Is(err, sr.ErrNotRegistered) { - // TODO note that we need to register specific indexes when adding support for protobuf - e.serde.Register( - ss.ID, - opencdc.StructuredData{}, - sr.EncodeFn(encodeFn(s, ss)), - sr.DecodeFn(decodeFn(s, ss)), - ) - - // try to encode again - b, err = e.serde.Encode(sd, sr.ID(ss.ID)) - } - if err != nil { - return opencdc.RawData{}, cerrors.Errorf("failed to encode data: %w", err) - } - return opencdc.RawData(b), nil -} - -type ExtractAndUploadSchemaStrategy struct { - Type sr.SchemaType - Subject string -} - -func (str ExtractAndUploadSchemaStrategy) GetSchema(ctx context.Context, client *Client, _ log.CtxLogger, sd opencdc.StructuredData) (Schema, sr.SubjectSchema, error) { - sf, ok := DefaultSchemaFactories[str.Type] - if !ok { - return nil, sr.SubjectSchema{}, cerrors.Errorf("unknown schema type %q (%d)", str.Type.String(), str.Type) - } - - s, err := sf.SchemaForType(sd) - if err != nil { - return nil, sr.SubjectSchema{}, cerrors.Errorf("could not extract avro schema: %w", err) - } - - ss, err := client.CreateSchema(ctx, str.Subject, sr.Schema{ - Schema: s.String(), - Type: str.Type, - References: nil, - }) - if err != nil { - return nil, sr.SubjectSchema{}, cerrors.Errorf("could not create schema: %w", err) - } - - return s, ss, nil -} - -type DownloadSchemaStrategy struct { - Subject string - // TODO add support for specifying "latest" - https://github.com/ConduitIO/conduit/issues/1095 - Version int -} - -func (str DownloadSchemaStrategy) GetSchema(ctx context.Context, client *Client, _ log.CtxLogger, _ opencdc.StructuredData) (Schema, sr.SubjectSchema, error) { - // fetch schema from registry - ss, err := client.SchemaBySubjectVersion(ctx, str.Subject, str.Version) - if err != nil { - return nil, sr.SubjectSchema{}, cerrors.Errorf("could not fetch schema with subject %q and version %q: %w", str.Subject, str.Version, err) - } - - sf, ok := DefaultSchemaFactories[ss.Type] - if !ok { - return nil, sr.SubjectSchema{}, cerrors.Errorf("unknown schema type %q (%d)", ss.Type.String(), ss.Type) - } - - s, err := sf.Parse(ss.Schema.Schema) - if err != nil { - return nil, sr.SubjectSchema{}, err - } - return s, ss, nil -} - -func encodeFn(schema Schema, ss sr.SubjectSchema) func(v any) ([]byte, error) { - return func(v any) ([]byte, error) { - b, err := schema.Marshal(v) - if err != nil { - return nil, cerrors.Errorf("failed to marshal data with schema (ID: %v, subject: %v, version: %v): %w", ss.ID, ss.Subject, ss.Version, err) - } - return b, nil - } -} diff --git a/pkg/plugin/processor/builtin/impl/avro/schemaregistry/fake.go b/pkg/plugin/processor/builtin/impl/avro/schemaregistry/fake.go deleted file mode 100644 index 149e5c6cc..000000000 --- a/pkg/plugin/processor/builtin/impl/avro/schemaregistry/fake.go +++ /dev/null @@ -1,400 +0,0 @@ -// Copyright © 2023 Meroxa, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build !integration - -package schemaregistry - -import ( - "fmt" - "net" - "net/http" - "net/http/httptest" - "strconv" - "strings" - "sync" - "testing" - - "github.com/conduitio/conduit/pkg/foundation/cerrors" - "github.com/conduitio/conduit/pkg/plugin/processor/builtin/impl/avro/schemaregistry/internal" - "github.com/goccy/go-json" - "github.com/lovromazgon/franz-go/pkg/sr" -) - -var ( - fakeServerByTest = make(map[string]*httptest.Server) - fakeServerByTestLock sync.Mutex -) - -// ExampleSchemaRegistryURL creates a fake in-memory schema registry server and -// returns its address and a cleanup function which should be executed in a -// deferred call. -// -// This method is only used if examples are run without --tags=integration. It -// is meant as a utility to allow faster iteration when developing, please run -// integration tests to ensure the code works with a real schema registry. -func ExampleSchemaRegistryURL(exampleName string, port int) (string, func()) { - // discard all schema registry logs in examples - logf := func(_ string, _ ...any) {} - return fakeSchemaRegistryURL(exampleName, logf, port) -} - -// TestSchemaRegistryURL creates a fake in-memory schema registry server and -// returns its address. -// -// This method is only used if the tests are run without -// --tags=integration. It is meant as a utility to allow faster iteration when -// developing, please run integration tests to ensure the code works with a real -// schema registry. -func TestSchemaRegistryURL(t testing.TB) string { - url, cleanup := fakeSchemaRegistryURL(t.Name(), t.Logf, 0) - t.Cleanup(cleanup) - return url -} - -func fakeSchemaRegistryURL(name string, logf func(format string, args ...any), port int) (string, func()) { - fakeServerByTestLock.Lock() - defer fakeServerByTestLock.Unlock() - - srv := fakeServerByTest[name] - cleanup := func() {} - if srv == nil { - srv = httptest.NewUnstartedServer(newFakeServer(logf)) - if port > 0 { - // NewUnstartedServer creates a listener. Close that listener and replace - // with a custom one. - _ = srv.Listener.Close() - l, err := net.Listen("tcp", fmt.Sprintf("127.0.0.1:%d", port)) - if err != nil { - panic(fmt.Sprintf("failed starting test server on port %d: %v", port, err)) - } - srv.Listener = l - } - - srv.Start() - fakeServerByTest[name] = srv - cleanup = srv.Close - } - return srv.URL, cleanup -} - -const ( - errorCodeSubjectNotFound = 40401 - errorCodeSchemaNotFound = 40403 -) - -// fakeRegistry is a simple fake registry meant to be used in tests. It stores -// schemas in memory and supports only the basic functionality needed in our -// tests and supported by our client. -type fakeRegistry struct { - schemas []sr.SubjectSchema - fingerprintIDCache map[uint64]int - idSequence int - - m sync.Mutex - initOnce sync.Once -} - -func (fr *fakeRegistry) init() { - fr.initOnce.Do(func() { - fr.m.Lock() - defer fr.m.Unlock() - fr.schemas = make([]sr.SubjectSchema, 0) - fr.fingerprintIDCache = make(map[uint64]int) - }) -} - -func (fr *fakeRegistry) CreateSchema(subject string, schema sr.Schema) sr.SubjectSchema { - fr.init() - fr.m.Lock() - defer fr.m.Unlock() - - fp := internal.Rabin([]byte(schema.Schema)) - id, ok := fr.fingerprintIDCache[fp] - if ok { - // schema exists, see if subject matches - ss, ok := fr.findBySubjectID(subject, id) - if ok { - // schema exists for this subject, return it - return ss - } - } - if !ok { - // schema does not exist yet - id = fr.nextID() - } - version := fr.nextVersion(subject) - - ss := sr.SubjectSchema{ - Subject: subject, - Version: version, - ID: id, - Schema: schema, - } - - fr.schemas = append(fr.schemas, ss) - fr.fingerprintIDCache[fp] = id - - return ss -} - -func (fr *fakeRegistry) SchemaByID(id int) (sr.Schema, bool) { - fr.init() - fr.m.Lock() - defer fr.m.Unlock() - - s, ok := fr.findOneByID(id) - return s, ok -} - -func (fr *fakeRegistry) SchemaBySubjectVersion(subject string, version int) (sr.SubjectSchema, bool) { - fr.init() - fr.m.Lock() - defer fr.m.Unlock() - - return fr.findBySubjectVersion(subject, version) -} - -func (fr *fakeRegistry) SubjectVersionsByID(id int) []sr.SubjectSchema { - fr.init() - fr.m.Lock() - defer fr.m.Unlock() - - return fr.findAllByID(id) -} - -func (fr *fakeRegistry) nextID() int { - fr.idSequence++ - return fr.idSequence -} - -func (fr *fakeRegistry) nextVersion(subject string) int { - return len(fr.findBySubject(subject)) + 1 -} - -func (fr *fakeRegistry) findBySubject(subject string) []sr.SubjectSchema { - var sss []sr.SubjectSchema - for _, ss := range fr.schemas { - if ss.Subject == subject { - sss = append(sss, ss) - } - } - return sss -} - -func (fr *fakeRegistry) findOneByID(id int) (sr.Schema, bool) { - for _, ss := range fr.schemas { - if ss.ID == id { - return ss.Schema, true - } - } - return sr.Schema{}, false -} - -func (fr *fakeRegistry) findAllByID(id int) []sr.SubjectSchema { - var sss []sr.SubjectSchema - for _, ss := range fr.schemas { - if ss.ID == id { - sss = append(sss, ss) - } - } - return sss -} - -func (fr *fakeRegistry) findBySubjectID(subject string, id int) (sr.SubjectSchema, bool) { - for _, ss := range fr.schemas { - if ss.Subject == subject && ss.ID == id { - return ss, true - } - } - return sr.SubjectSchema{}, false -} - -func (fr *fakeRegistry) findBySubjectVersion(subject string, version int) (sr.SubjectSchema, bool) { - for _, ss := range fr.schemas { - if ss.Subject == subject && ss.Version == version { - return ss, true - } - } - return sr.SubjectSchema{}, false -} - -// fakeServer is a fake schema registry server. -type fakeServer struct { - fr fakeRegistry - logf func(format string, args ...any) -} - -func newFakeServer(logf func(format string, args ...any)) *fakeServer { - fs := &fakeServer{ - logf: func(format string, args ...any) { /* no op */ }, - } - if logf != nil { - fs.logf = logf - } - return fs -} - -func (fs *fakeServer) ServeHTTP(w http.ResponseWriter, r *http.Request) { - fs.logf("%s %s", r.Method, r.RequestURI) - - var ( - id int - subject string - version int - ) - p := r.URL.Path - switch { - case fs.match(p, "/schemas/ids/+", &id) && r.Method == http.MethodGet: - fs.schemaByID(w, r, id) - case fs.match(p, "/schemas/ids/+/versions", &id) && r.Method == http.MethodGet: - fs.subjectVersionsByID(w, r, id) - case fs.match(p, "/subjects/+/versions", &subject) && r.Method == http.MethodPost: - fs.createSchema(w, r, subject) - case fs.match(p, "/subjects/+/versions/+", &subject, &version) && r.Method == http.MethodGet: - fs.schemaBySubjectVersion(w, r, subject, version) - case fs.match(p, "/config/+", &subject) && r.Method == http.MethodPut: - fs.updateConfig(w, r) - default: - http.NotFound(w, r) - } -} - -// match reports whether path matches the given pattern, which is a -// path with '+' wildcards wherever you want to use a parameter. Path -// parameters are assigned to the pointers in vars (len(vars) must be -// the number of wildcards), which must be of type *string or *int. -// Source: https://github.com/benhoyt/go-routing/blob/master/match/route.go -func (*fakeServer) match(path, pattern string, vars ...interface{}) bool { - for ; pattern != "" && path != ""; pattern = pattern[1:] { - switch pattern[0] { - case '+': - // '+' matches till next slash in path - slash := strings.IndexByte(path, '/') - if slash < 0 { - slash = len(path) - } - segment := path[:slash] - path = path[slash:] - switch p := vars[0].(type) { - case *string: - *p = segment - case *int: - n, err := strconv.Atoi(segment) - if err != nil || n < 0 { - return false - } - *p = n - default: - panic("vars must be *string or *int") - } - vars = vars[1:] - case path[0]: - // non-'+' pattern byte must match path byte - path = path[1:] - default: - return false - } - } - return path == "" && pattern == "" -} - -func (fs *fakeServer) createSchema(w http.ResponseWriter, r *http.Request, subject string) { - // POST /subjects/{subject}/versions => returns ID - defer r.Body.Close() - var s sr.Schema - err := json.NewDecoder(r.Body).Decode(&s) - if err != nil { - fs.error(w, http.StatusInternalServerError, err) - return - } - - ss := fs.fr.CreateSchema(subject, s) - fs.json(w, map[string]any{"id": ss.ID}) -} - -func (fs *fakeServer) schemaBySubjectVersion(w http.ResponseWriter, _ *http.Request, subject string, version int) { - // GET /subjects/{subject}/versions/{version} - ss, ok := fs.fr.SchemaBySubjectVersion(subject, version) - if !ok { - fs.errorWithCode(w, http.StatusNotFound, errorCodeSubjectNotFound, cerrors.New("subject not found")) - return - } - fs.json(w, ss) -} - -func (fs *fakeServer) schemaByID(w http.ResponseWriter, _ *http.Request, id int) { - // GET /schemas/ids/{id} - s, ok := fs.fr.SchemaByID(id) - if !ok { - fs.errorWithCode(w, http.StatusNotFound, errorCodeSchemaNotFound, cerrors.New("schema not found")) - return - } - fs.json(w, s) -} - -func (fs *fakeServer) subjectVersionsByID(w http.ResponseWriter, _ *http.Request, id int) { - // GET /schemas/ids/{id}/versions - sss := fs.fr.SubjectVersionsByID(id) - fs.json(w, sss) -} - -func (fs *fakeServer) updateConfig(w http.ResponseWriter, r *http.Request) { - // PUT /config/{subject} - defer r.Body.Close() - var c struct { - Compatibility string `json:"compatibility"` - } - err := json.NewDecoder(r.Body).Decode(&c) - if err != nil { - fs.error(w, http.StatusInternalServerError, err) - return - } - - valid := map[string]bool{ - "BACKWARD": true, - "BACKWARD_TRANSITIVE": true, - "FORWARD": true, - "FORWARD_TRANSITIVE": true, - "FULL": true, - "FULL_TRANSITIVE": true, - "NONE": true, - }[c.Compatibility] - if !valid { - fs.errorWithCode(w, 42203, http.StatusUnprocessableEntity, cerrors.New("invalid compatibility level")) - return - } - fs.json(w, c) -} - -func (fs *fakeServer) json(w http.ResponseWriter, v any) { - b, err := json.Marshal(v) - if err != nil { - fs.error(w, http.StatusInternalServerError, err) - return - } - _, _ = w.Write(b) -} - -func (fs *fakeServer) error(w http.ResponseWriter, status int, err error) { - fs.errorWithCode(w, status, 50001, err) -} - -func (fs *fakeServer) errorWithCode(w http.ResponseWriter, status int, code int, err error) { - w.WriteHeader(status) - _ = json.NewEncoder(w).Encode(map[string]any{ - "error_code": code, - "message": err.Error(), - }) -} diff --git a/pkg/plugin/processor/builtin/impl/avro/schemaregistry/internal/cache.go b/pkg/plugin/processor/builtin/impl/avro/schemaregistry/internal/cache.go deleted file mode 100644 index a46baec8d..000000000 --- a/pkg/plugin/processor/builtin/impl/avro/schemaregistry/internal/cache.go +++ /dev/null @@ -1,91 +0,0 @@ -// Copyright © 2023 Meroxa, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package internal - -import ( - "fmt" - "sync" - - "github.com/lovromazgon/franz-go/pkg/sr" - "github.com/twmb/go-cache/cache" -) - -type ( - subjectVersion string - subjectFingerprint string -) - -func newSubjectVersion(subject string, version int) subjectVersion { - return subjectVersion(fmt.Sprintf("%s:%d", subject, version)) -} - -func newSubjectFingerprint(subject string, text string) subjectFingerprint { - fingerprint := Rabin([]byte(text)) - return subjectFingerprint(fmt.Sprintf("%s:%d", subject, fingerprint)) -} - -// SchemaCache caches schemas by ID, their subject/fingerprint and -// subject/version. Fingerprints are calculated using the Rabin algorithm. -type SchemaCache struct { - initOnce sync.Once - - idCache *cache.Cache[int, sr.Schema] - subjectFingerprintCache *cache.Cache[subjectFingerprint, sr.SubjectSchema] - subjectVersionCache *cache.Cache[subjectVersion, sr.SubjectSchema] -} - -func (c *SchemaCache) init() { - c.initOnce.Do(func() { - c.idCache = cache.New[int, sr.Schema]() - c.subjectFingerprintCache = cache.New[subjectFingerprint, sr.SubjectSchema]() - c.subjectVersionCache = cache.New[subjectVersion, sr.SubjectSchema]() - }) -} - -func (c *SchemaCache) GetByID(id int, miss func() (sr.Schema, error)) (sr.Schema, error) { - c.init() - s, err, _ := c.idCache.Get(id, miss) - return s, err -} - -func (c *SchemaCache) GetBySubjectText(subject string, text string, miss func() (sr.SubjectSchema, error)) (sr.SubjectSchema, error) { - c.init() - sfp := newSubjectFingerprint(subject, text) - ss, err, _ := c.subjectFingerprintCache.Get(sfp, func() (sr.SubjectSchema, error) { - ss, err := miss() - if err != nil { - return ss, err - } - c.idCache.Set(ss.ID, ss.Schema) - c.subjectVersionCache.Set(newSubjectVersion(ss.Subject, ss.Version), ss) - return ss, nil - }) - return ss, err -} - -func (c *SchemaCache) GetBySubjectVersion(subject string, version int, miss func() (sr.SubjectSchema, error)) (sr.SubjectSchema, error) { - c.init() - sv := newSubjectVersion(subject, version) - ss, err, _ := c.subjectVersionCache.Get(sv, func() (sr.SubjectSchema, error) { - ss, err := miss() - if err != nil { - return ss, err - } - c.idCache.Set(ss.ID, ss.Schema) - c.subjectFingerprintCache.Set(newSubjectFingerprint(ss.Subject, ss.Schema.Schema), ss) - return ss, nil - }) - return ss, err -} diff --git a/pkg/plugin/processor/builtin/impl/avro/schemaregistry/internal/cache_test.go b/pkg/plugin/processor/builtin/impl/avro/schemaregistry/internal/cache_test.go deleted file mode 100644 index 40ab03c53..000000000 --- a/pkg/plugin/processor/builtin/impl/avro/schemaregistry/internal/cache_test.go +++ /dev/null @@ -1,186 +0,0 @@ -// Copyright © 2023 Meroxa, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package internal - -import ( - "sync/atomic" - "testing" - - "github.com/conduitio/conduit/pkg/foundation/cerrors" - "github.com/lovromazgon/franz-go/pkg/sr" - "github.com/matryer/is" -) - -func TestSchemaCache_GetByID(t *testing.T) { - is := is.New(t) - cache := &SchemaCache{} - - id := 1 - want := sr.Schema{ - Schema: `"string"`, - Type: sr.TypeAvro, - References: nil, - } - var missCount atomic.Int32 - got, err := cache.GetByID(id, func() (sr.Schema, error) { - missCount.Add(1) - return want, nil - }) - - is.NoErr(err) - is.Equal(want, got) - is.Equal(missCount.Load(), int32(1)) - - t.Run("GetByID is cached", func(t *testing.T) { - is := is.New(t) - got, err := cache.GetByID(id, nil) - is.NoErr(err) - is.Equal(want, got) - is.Equal(missCount.Load(), int32(1)) - }) - - // other methods can't be cached, as the subject and version are unknown -} - -func TestSchemaCache_GetBySubjectText(t *testing.T) { - cache := &SchemaCache{} - - want := sr.SubjectSchema{ - Subject: "test", - Version: 1, - ID: 2, - Schema: sr.Schema{ - Schema: `"string"`, - Type: sr.TypeAvro, - References: nil, - }, - } - - is := is.New(t) - var missCount atomic.Int32 - got, err := cache.GetBySubjectText(want.Subject, want.Schema.Schema, func() (sr.SubjectSchema, error) { - missCount.Add(1) - return want, nil - }) - is.NoErr(err) - is.Equal(got, want) - is.Equal(missCount.Load(), int32(1)) - - t.Run("GetByID is cached", func(t *testing.T) { - is := is.New(t) - got, err := cache.GetByID(want.ID, nil) - is.NoErr(err) - is.Equal(want.Schema, got) - is.Equal(missCount.Load(), int32(1)) - }) - - t.Run("GetBySubjectText is cached", func(t *testing.T) { - is := is.New(t) - got, err := cache.GetBySubjectText(want.Subject, want.Schema.Schema, nil) - is.NoErr(err) - is.Equal(want, got) - is.Equal(missCount.Load(), int32(1)) - }) - - t.Run("GetBySubjectVersion is cached", func(t *testing.T) { - is := is.New(t) - got, err := cache.GetBySubjectVersion(want.Subject, want.Version, nil) - is.NoErr(err) - is.Equal(want, got) - is.Equal(missCount.Load(), int32(1)) - }) -} - -func TestSchemaCache_GetBySubjectVersion(t *testing.T) { - cache := &SchemaCache{} - - want := sr.SubjectSchema{ - Subject: "test", - Version: 1, - ID: 2, - Schema: sr.Schema{ - Schema: `"string"`, - Type: sr.TypeAvro, - References: nil, - }, - } - - is := is.New(t) - var missCount atomic.Int32 - got, err := cache.GetBySubjectVersion(want.Subject, want.Version, func() (sr.SubjectSchema, error) { - missCount.Add(1) - return want, nil - }) - is.NoErr(err) - is.Equal(got, want) - is.Equal(missCount.Load(), int32(1)) - - t.Run("GetByID is cached", func(t *testing.T) { - is := is.New(t) - got, err := cache.GetByID(want.ID, nil) - is.NoErr(err) - is.Equal(want.Schema, got) - is.Equal(missCount.Load(), int32(1)) - }) - - t.Run("GetBySubjectText is cached", func(t *testing.T) { - is := is.New(t) - got, err := cache.GetBySubjectText(want.Subject, want.Schema.Schema, nil) - is.NoErr(err) - is.Equal(want, got) - is.Equal(missCount.Load(), int32(1)) - }) - - t.Run("GetBySubjectVersion is cached", func(t *testing.T) { - is := is.New(t) - got, err := cache.GetBySubjectVersion(want.Subject, want.Version, nil) - is.NoErr(err) - is.Equal(want, got) - is.Equal(missCount.Load(), int32(1)) - }) -} - -func TestSchemaCache_Miss(t *testing.T) { - cache := &SchemaCache{} - var want sr.SubjectSchema - wantErr := cerrors.New("test error") - - t.Run("GetByID", func(t *testing.T) { - is := is.New(t) - got, err := cache.GetByID(1, func() (sr.Schema, error) { - return sr.Schema{}, wantErr - }) - is.Equal(err, wantErr) - is.Equal(want.Schema, got) - }) - - t.Run("GetBySubjectVersion", func(t *testing.T) { - is := is.New(t) - got, err := cache.GetBySubjectVersion("test", 1, func() (sr.SubjectSchema, error) { - return sr.SubjectSchema{}, wantErr - }) - is.Equal(err, wantErr) - is.Equal(want, got) - }) - - t.Run("GetBySubjectText", func(t *testing.T) { - is := is.New(t) - got, err := cache.GetBySubjectText("foo", `"string"`, func() (sr.SubjectSchema, error) { - return sr.SubjectSchema{}, wantErr - }) - is.Equal(err, wantErr) - is.Equal(want, got) - }) -} diff --git a/pkg/plugin/processor/builtin/impl/avro/schemaregistry/internal/rabin.go b/pkg/plugin/processor/builtin/impl/avro/schemaregistry/internal/rabin.go deleted file mode 100644 index 47029f23f..000000000 --- a/pkg/plugin/processor/builtin/impl/avro/schemaregistry/internal/rabin.go +++ /dev/null @@ -1,49 +0,0 @@ -// Copyright © 2023 Meroxa, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package internal - -const rabinEmpty = uint64(0xc15d213aa4d7a795) - -// rabinTable is initialized in init and used to compute the CRC-64-AVRO -// fingerprint. -var rabinTable [256]uint64 - -func init() { - rabinTable = newRabinFingerprintTable() -} - -// newRabinFingerprintTable initializes the fingerprint table according to the -// spec: https://avro.apache.org/docs/1.8.2/spec.html#schema_fingerprints -func newRabinFingerprintTable() [256]uint64 { - fpTable := [256]uint64{} - for i := 0; i < 256; i++ { - fp := uint64(i) - for j := 0; j < 8; j++ { - fp = (fp >> 1) ^ (rabinEmpty & -(fp & 1)) - } - fpTable[i] = fp - } - return fpTable -} - -// Rabin creates a Rabin fingerprint according to the spec: -// https://avro.apache.org/docs/1.8.2/spec.html#schema_fingerprints -func Rabin(buf []byte) uint64 { - fp := rabinEmpty - for i := 0; i < len(buf); i++ { - fp = (fp >> 8) ^ rabinTable[(byte(fp)^buf[i])&0xff] - } - return fp -} diff --git a/pkg/plugin/processor/builtin/impl/avro/schemaregistry/schema.go b/pkg/plugin/processor/builtin/impl/avro/schemaregistry/schema.go deleted file mode 100644 index cd00d1df2..000000000 --- a/pkg/plugin/processor/builtin/impl/avro/schemaregistry/schema.go +++ /dev/null @@ -1,45 +0,0 @@ -// Copyright © 2023 Meroxa, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package schemaregistry - -import ( - "github.com/conduitio/conduit/pkg/plugin/processor/builtin/impl/avro/schemaregistry/avro" - "github.com/lovromazgon/franz-go/pkg/sr" -) - -type Schema interface { - // Marshal returns the encoded representation of v. - Marshal(v any) ([]byte, error) - // Unmarshal parses encoded data and stores the result in the value pointed - // to by v. If v is nil or not a pointer, Unmarshal returns an error. - Unmarshal(b []byte, v any) error - // String returns the textual representation of the schema. - String() string -} - -type SchemaFactory struct { - // Parse takes the textual representation of the schema and parses it into - // a Schema. - Parse func(string) (Schema, error) - // SchemaForType returns a Schema that matches the structure of v. - SchemaForType func(v any) (Schema, error) -} - -var DefaultSchemaFactories = map[sr.SchemaType]SchemaFactory{ - avro.Type: { - Parse: func(s string) (Schema, error) { return avro.Parse(s) }, - SchemaForType: func(v any) (Schema, error) { return avro.SchemaForType(v) }, - }, -} diff --git a/pkg/plugin/processor/builtin/impl/base64/decode_paramgen.go b/pkg/plugin/processor/builtin/impl/base64/decode_paramgen.go index a70272526..b5e698990 100644 --- a/pkg/plugin/processor/builtin/impl/base64/decode_paramgen.go +++ b/pkg/plugin/processor/builtin/impl/base64/decode_paramgen.go @@ -7,9 +7,13 @@ import ( "github.com/conduitio/conduit-commons/config" ) +const ( + decodeConfigField = "field" +) + func (decodeConfig) Parameters() map[string]config.Parameter { return map[string]config.Parameter{ - "field": { + decodeConfigField: { Default: "", Description: "Field is the reference to the target field. Note that it is not allowed to\nbase64 decode the `.Position` field.\n\nFor more information about the format, see [Referencing fields](https://conduit.io/docs/processors/referencing-fields).", Type: config.ParameterTypeString, diff --git a/pkg/plugin/processor/builtin/impl/base64/encode_paramgen.go b/pkg/plugin/processor/builtin/impl/base64/encode_paramgen.go index cf6315f52..b22844786 100644 --- a/pkg/plugin/processor/builtin/impl/base64/encode_paramgen.go +++ b/pkg/plugin/processor/builtin/impl/base64/encode_paramgen.go @@ -7,9 +7,13 @@ import ( "github.com/conduitio/conduit-commons/config" ) +const ( + encodeConfigField = "field" +) + func (encodeConfig) Parameters() map[string]config.Parameter { return map[string]config.Parameter{ - "field": { + encodeConfigField: { Default: "", Description: "Field is a reference to the target field. Note that it is not allowed to\nbase64 encode the `.Position` field.\n\nFor more information about the format, see [Referencing fields](https://conduit.io/docs/processors/referencing-fields).", Type: config.ParameterTypeString, diff --git a/pkg/plugin/processor/builtin/impl/custom/javascript_paramgen.go b/pkg/plugin/processor/builtin/impl/custom/javascript_paramgen.go index 879601708..96d7f1b86 100644 --- a/pkg/plugin/processor/builtin/impl/custom/javascript_paramgen.go +++ b/pkg/plugin/processor/builtin/impl/custom/javascript_paramgen.go @@ -7,15 +7,20 @@ import ( "github.com/conduitio/conduit-commons/config" ) +const ( + javascriptConfigScript = "script" + javascriptConfigScriptPath = "script.path" +) + func (javascriptConfig) Parameters() map[string]config.Parameter { return map[string]config.Parameter{ - "script": { + javascriptConfigScript: { Default: "", Description: "JavaScript code for this processor.\nIt needs to have a function `process()` that accept\na record and returns a record.\nThe `process()` function can either modify the input record and return it,\nor create a new record.\nIf a record needs to be filtered (dropped from the pipeline),\nthen the `process()` function should return `null`.", Type: config.ParameterTypeString, Validations: []config.Validation{}, }, - "script.path": { + javascriptConfigScriptPath: { Default: "", Description: "The path to a .js file containing the processor code.", Type: config.ParameterTypeString, diff --git a/pkg/plugin/processor/builtin/impl/error_paramgen.go b/pkg/plugin/processor/builtin/impl/error_paramgen.go index 9e4351892..7fd5a0055 100644 --- a/pkg/plugin/processor/builtin/impl/error_paramgen.go +++ b/pkg/plugin/processor/builtin/impl/error_paramgen.go @@ -7,9 +7,13 @@ import ( "github.com/conduitio/conduit-commons/config" ) +const ( + errorConfigMessage = "message" +) + func (errorConfig) Parameters() map[string]config.Parameter { return map[string]config.Parameter{ - "message": { + errorConfigMessage: { Default: "error processor triggered", Description: "Error message to be returned. This can be a Go [template](https://pkg.go.dev/text/template)\nexecuted on each [`Record`](https://pkg.go.dev/github.com/conduitio/conduit-commons/opencdc#Record)\nbeing processed.", Type: config.ParameterTypeString, diff --git a/pkg/plugin/processor/builtin/impl/field/convert_paramgen.go b/pkg/plugin/processor/builtin/impl/field/convert_paramgen.go index d4b5454bd..9ec880ef6 100644 --- a/pkg/plugin/processor/builtin/impl/field/convert_paramgen.go +++ b/pkg/plugin/processor/builtin/impl/field/convert_paramgen.go @@ -9,9 +9,14 @@ import ( "github.com/conduitio/conduit-commons/config" ) +const ( + convertConfigField = "field" + convertConfigType = "type" +) + func (convertConfig) Parameters() map[string]config.Parameter { return map[string]config.Parameter{ - "field": { + convertConfigField: { Default: "", Description: "Field is the target field that should be converted.\nNote that you can only convert fields in structured data under `.Key` and\n`.Payload`.\n\nFor more information about the format, see [Referencing fields](https://conduit.io/docs/processors/referencing-fields).", Type: config.ParameterTypeString, @@ -20,7 +25,7 @@ func (convertConfig) Parameters() map[string]config.Parameter { config.ValidationRegex{Regex: regexp.MustCompile("^\\.(Payload|Key).*")}, }, }, - "type": { + convertConfigType: { Default: "", Description: "Type is the target field type after conversion, available options are: string, int, float, bool.", Type: config.ParameterTypeString, diff --git a/pkg/plugin/processor/builtin/impl/field/exclude_paramgen.go b/pkg/plugin/processor/builtin/impl/field/exclude_paramgen.go index 83d55b812..7e46b6d42 100644 --- a/pkg/plugin/processor/builtin/impl/field/exclude_paramgen.go +++ b/pkg/plugin/processor/builtin/impl/field/exclude_paramgen.go @@ -7,9 +7,13 @@ import ( "github.com/conduitio/conduit-commons/config" ) +const ( + excludeConfigFields = "fields" +) + func (excludeConfig) Parameters() map[string]config.Parameter { return map[string]config.Parameter{ - "fields": { + excludeConfigFields: { Default: "", Description: "Fields is a comma separated list of target fields which should be excluded.\n\nFor more information about the format, see [Referencing fields](https://conduit.io/docs/processors/referencing-fields).", Type: config.ParameterTypeString, diff --git a/pkg/plugin/processor/builtin/impl/field/rename_paramgen.go b/pkg/plugin/processor/builtin/impl/field/rename_paramgen.go index eb8f255cd..0cf1939c3 100644 --- a/pkg/plugin/processor/builtin/impl/field/rename_paramgen.go +++ b/pkg/plugin/processor/builtin/impl/field/rename_paramgen.go @@ -7,9 +7,13 @@ import ( "github.com/conduitio/conduit-commons/config" ) +const ( + renameConfigMapping = "mapping" +) + func (renameConfig) Parameters() map[string]config.Parameter { return map[string]config.Parameter{ - "mapping": { + renameConfigMapping: { Default: "", Description: "Mapping is a comma separated list of keys and values for fields and their\nnew names (keys and values are separated by colons \":\").\n\nFor example: `.Metadata.key:id,.Payload.After.foo:bar`.\n\nFor more information about the format, see [Referencing fields](https://conduit.io/docs/processors/referencing-fields).", Type: config.ParameterTypeString, diff --git a/pkg/plugin/processor/builtin/impl/field/set_paramgen.go b/pkg/plugin/processor/builtin/impl/field/set_paramgen.go index dcf0b629d..0ad1389a5 100644 --- a/pkg/plugin/processor/builtin/impl/field/set_paramgen.go +++ b/pkg/plugin/processor/builtin/impl/field/set_paramgen.go @@ -7,9 +7,14 @@ import ( "github.com/conduitio/conduit-commons/config" ) +const ( + setConfigField = "field" + setConfigValue = "value" +) + func (setConfig) Parameters() map[string]config.Parameter { return map[string]config.Parameter{ - "field": { + setConfigField: { Default: "", Description: "Field is the target field that will be set. Note that it is not allowed\nto set the `.Position` field.\n\nFor more information about the format, see [Referencing fields](https://conduit.io/docs/processors/referencing-fields).", Type: config.ParameterTypeString, @@ -18,7 +23,7 @@ func (setConfig) Parameters() map[string]config.Parameter { config.ValidationExclusion{List: []string{".Position"}}, }, }, - "value": { + setConfigValue: { Default: "", Description: "Value is a Go template expression which will be evaluated and stored in `field` (e.g. `{{ .Payload.After }}`).", Type: config.ParameterTypeString, diff --git a/pkg/plugin/processor/builtin/impl/json/decode_paramgen.go b/pkg/plugin/processor/builtin/impl/json/decode_paramgen.go index 70527ccf9..eee6b7307 100644 --- a/pkg/plugin/processor/builtin/impl/json/decode_paramgen.go +++ b/pkg/plugin/processor/builtin/impl/json/decode_paramgen.go @@ -9,9 +9,13 @@ import ( "github.com/conduitio/conduit-commons/config" ) +const ( + decodeConfigField = "field" +) + func (decodeConfig) Parameters() map[string]config.Parameter { return map[string]config.Parameter{ - "field": { + decodeConfigField: { Default: "", Description: "Field is a reference to the target field. Only fields that are under\n`.Key` and `.Payload` can be decoded.\n\nFor more information about the format, see [Referencing fields](https://conduit.io/docs/processors/referencing-fields).", Type: config.ParameterTypeString, diff --git a/pkg/plugin/processor/builtin/impl/json/encode_paramgen.go b/pkg/plugin/processor/builtin/impl/json/encode_paramgen.go index ef7491b22..ca8430871 100644 --- a/pkg/plugin/processor/builtin/impl/json/encode_paramgen.go +++ b/pkg/plugin/processor/builtin/impl/json/encode_paramgen.go @@ -9,9 +9,13 @@ import ( "github.com/conduitio/conduit-commons/config" ) +const ( + encodeConfigField = "field" +) + func (encodeConfig) Parameters() map[string]config.Parameter { return map[string]config.Parameter{ - "field": { + encodeConfigField: { Default: "", Description: "Field is a reference to the target field. Only fields that are under\n`.Key` and `.Payload` can be encoded.\n\nFor more information about the format, see [Referencing fields](https://conduit.io/docs/processors/referencing-fields).", Type: config.ParameterTypeString, diff --git a/pkg/plugin/processor/builtin/impl/unwrap/debezium_paramgen.go b/pkg/plugin/processor/builtin/impl/unwrap/debezium_paramgen.go index 18f62ef54..29ad20850 100644 --- a/pkg/plugin/processor/builtin/impl/unwrap/debezium_paramgen.go +++ b/pkg/plugin/processor/builtin/impl/unwrap/debezium_paramgen.go @@ -9,9 +9,13 @@ import ( "github.com/conduitio/conduit-commons/config" ) +const ( + debeziumConfigField = "field" +) + func (debeziumConfig) Parameters() map[string]config.Parameter { return map[string]config.Parameter{ - "field": { + debeziumConfigField: { Default: ".Payload.After", Description: "Field is a reference to the field that contains the Debezium record.\n\nFor more information about the format, see [Referencing fields](https://conduit.io/docs/processors/referencing-fields).", Type: config.ParameterTypeString, diff --git a/pkg/plugin/processor/builtin/impl/unwrap/kafka_connect_paramgen.go b/pkg/plugin/processor/builtin/impl/unwrap/kafka_connect_paramgen.go index 9b41b9a59..223fdec00 100644 --- a/pkg/plugin/processor/builtin/impl/unwrap/kafka_connect_paramgen.go +++ b/pkg/plugin/processor/builtin/impl/unwrap/kafka_connect_paramgen.go @@ -9,9 +9,13 @@ import ( "github.com/conduitio/conduit-commons/config" ) +const ( + kafkaConnectConfigField = "field" +) + func (kafkaConnectConfig) Parameters() map[string]config.Parameter { return map[string]config.Parameter{ - "field": { + kafkaConnectConfigField: { Default: ".Payload.After", Description: "Field is a reference to the field that contains the Kafka Connect record.\n\nFor more information about the format, see [Referencing fields](https://conduit.io/docs/processors/referencing-fields).", Type: config.ParameterTypeString, diff --git a/pkg/plugin/processor/builtin/impl/unwrap/opencdc_paramgen.go b/pkg/plugin/processor/builtin/impl/unwrap/opencdc_paramgen.go index 7a3a33dfb..1ba2868e9 100644 --- a/pkg/plugin/processor/builtin/impl/unwrap/opencdc_paramgen.go +++ b/pkg/plugin/processor/builtin/impl/unwrap/opencdc_paramgen.go @@ -7,9 +7,13 @@ import ( "github.com/conduitio/conduit-commons/config" ) +const ( + openCDCConfigField = "field" +) + func (openCDCConfig) Parameters() map[string]config.Parameter { return map[string]config.Parameter{ - "field": { + openCDCConfigField: { Default: ".Payload.After", Description: "Field is a reference to the field that contains the OpenCDC record.\n\nFor more information about the format, see [Referencing fields](https://conduit.io/docs/processors/referencing-fields).", Type: config.ParameterTypeString, diff --git a/pkg/plugin/processor/builtin/impl/webhook/http_paramgen.go b/pkg/plugin/processor/builtin/impl/webhook/http_paramgen.go index b695cc430..b2644c0cf 100644 --- a/pkg/plugin/processor/builtin/impl/webhook/http_paramgen.go +++ b/pkg/plugin/processor/builtin/impl/webhook/http_paramgen.go @@ -7,9 +7,23 @@ import ( "github.com/conduitio/conduit-commons/config" ) +const ( + httpConfigBackoffRetryCount = "backoffRetry.count" + httpConfigBackoffRetryFactor = "backoffRetry.factor" + httpConfigBackoffRetryMax = "backoffRetry.max" + httpConfigBackoffRetryMin = "backoffRetry.min" + httpConfigHeaders = "headers.*" + httpConfigRequestBody = "request.body" + httpConfigRequestContentType = "request.contentType" + httpConfigRequestMethod = "request.method" + httpConfigRequestUrl = "request.url" + httpConfigResponseBody = "response.body" + httpConfigResponseStatus = "response.status" +) + func (httpConfig) Parameters() map[string]config.Parameter { return map[string]config.Parameter{ - "backoffRetry.count": { + httpConfigBackoffRetryCount: { Default: "0", Description: "Maximum number of retries for an individual record when backing off following an error.", Type: config.ParameterTypeFloat, @@ -17,7 +31,7 @@ func (httpConfig) Parameters() map[string]config.Parameter { config.ValidationGreaterThan{V: -1}, }, }, - "backoffRetry.factor": { + httpConfigBackoffRetryFactor: { Default: "2", Description: "The multiplying factor for each increment step.", Type: config.ParameterTypeFloat, @@ -25,43 +39,43 @@ func (httpConfig) Parameters() map[string]config.Parameter { config.ValidationGreaterThan{V: 0}, }, }, - "backoffRetry.max": { + httpConfigBackoffRetryMax: { Default: "5s", Description: "The maximum waiting time before retrying.", Type: config.ParameterTypeDuration, Validations: []config.Validation{}, }, - "backoffRetry.min": { + httpConfigBackoffRetryMin: { Default: "100ms", Description: "The minimum waiting time before retrying.", Type: config.ParameterTypeDuration, Validations: []config.Validation{}, }, - "headers.*": { + httpConfigHeaders: { Default: "", Description: "Headers to add to the request, use `headers.*` to specify the header and its value (e.g. `headers.Authorization: \"Bearer key\"`).", Type: config.ParameterTypeString, Validations: []config.Validation{}, }, - "request.body": { + httpConfigRequestBody: { Default: "", Description: "Specifies the body that will be sent in the HTTP request. The field accepts\na Go [templates](https://pkg.go.dev/text/template) that's evaluated using the\n[opencdc.Record](https://pkg.go.dev/github.com/conduitio/conduit-commons/opencdc#Record)\nas input. By default, the body is empty.\n\nTo send the whole record as JSON you can use `{{ toJson . }}`.", Type: config.ParameterTypeString, Validations: []config.Validation{}, }, - "request.contentType": { + httpConfigRequestContentType: { Default: "", Description: "Deprecated: use `headers.Content-Type` instead.", Type: config.ParameterTypeString, Validations: []config.Validation{}, }, - "request.method": { + httpConfigRequestMethod: { Default: "GET", Description: "Method is the HTTP request method to be used.", Type: config.ParameterTypeString, Validations: []config.Validation{}, }, - "request.url": { + httpConfigRequestUrl: { Default: "", Description: "URL is a Go template expression for the URL used in the HTTP request, using Go [templates](https://pkg.go.dev/text/template).\nThe value provided to the template is [opencdc.Record](https://pkg.go.dev/github.com/conduitio/conduit-commons/opencdc#Record),\nso the template has access to all its fields (e.g. `.Position`, `.Key`, `.Metadata`, and so on). We also inject all template functions provided by [sprig](https://masterminds.github.io/sprig/)\nto make it easier to write templates.", Type: config.ParameterTypeString, @@ -69,13 +83,13 @@ func (httpConfig) Parameters() map[string]config.Parameter { config.ValidationRequired{}, }, }, - "response.body": { + httpConfigResponseBody: { Default: ".Payload.After", Description: "Specifies in which field should the response body be saved.\n\nFor more information about the format, see [Referencing fields](https://conduit.io/docs/processors/referencing-fields).", Type: config.ParameterTypeString, Validations: []config.Validation{}, }, - "response.status": { + httpConfigResponseStatus: { Default: "", Description: "Specifies in which field should the response status be saved. If no value\nis set, then the response status will NOT be saved.\n\nFor more information about the format, see [Referencing fields](https://conduit.io/docs/processors/referencing-fields).", Type: config.ParameterTypeString, diff --git a/pkg/plugin/processor/builtin/registry.go b/pkg/plugin/processor/builtin/registry.go index 6e4024b87..ee631a9b4 100644 --- a/pkg/plugin/processor/builtin/registry.go +++ b/pkg/plugin/processor/builtin/registry.go @@ -70,7 +70,7 @@ type blueprint struct { type ProcessorPluginConstructor func(log.CtxLogger) sdk.Processor func NewRegistry(logger log.CtxLogger, constructors map[string]ProcessorPluginConstructor) *Registry { - logger = logger.WithComponent("builtin.Registry") + logger = logger.WithComponent("plugin.processor.builtin.Registry") buildInfo, ok := debug.ReadBuildInfo() if !ok { // we are using modules, build info should always be available, we are staying on the safe side @@ -82,7 +82,7 @@ func NewRegistry(logger log.CtxLogger, constructors map[string]ProcessorPluginCo plugins: loadPlugins(buildInfo, constructors), logger: logger, } - logger.Info(context.Background()).Int("count", len(r.List())).Msg("builtin plugins initialized") + logger.Info(context.Background()).Int("count", len(r.List())).Msg("builtin processor plugins initialized") return r } diff --git a/pkg/provisioning/service_test.go b/pkg/provisioning/service_test.go index 6067b54b6..8ea502eab 100644 --- a/pkg/provisioning/service_test.go +++ b/pkg/provisioning/service_test.go @@ -20,6 +20,7 @@ import ( "testing" "time" + schemaregistry "github.com/conduitio/conduit-schema-registry" "github.com/conduitio/conduit/pkg/connector" "github.com/conduitio/conduit/pkg/foundation/cerrors" "github.com/conduitio/conduit/pkg/foundation/ctxutil" @@ -28,6 +29,7 @@ import ( "github.com/conduitio/conduit/pkg/pipeline" conn_plugin "github.com/conduitio/conduit/pkg/plugin/connector" "github.com/conduitio/conduit/pkg/plugin/connector/builtin" + "github.com/conduitio/conduit/pkg/plugin/connector/connutils" "github.com/conduitio/conduit/pkg/plugin/connector/standalone" proc_plugin "github.com/conduitio/conduit/pkg/plugin/processor" proc_builtin "github.com/conduitio/conduit/pkg/plugin/processor/builtin" @@ -485,9 +487,10 @@ func TestService_IntegrationTestServices(t *testing.T) { is.NoErr(err) }) + schemaService := connutils.NewSchemaService(logger, schemaregistry.NewSchemaRegistry()) connPluginService := conn_plugin.NewPluginService( logger, - builtin.NewRegistry(logger, builtin.DefaultDispenserFactories), + builtin.NewRegistry(logger, builtin.DefaultBuiltinConnectors, schemaService), standalone.NewRegistry(logger, ""), ) diff --git a/pkg/plugin/processor/builtin/impl/avro/schemaregistry/client.go b/pkg/schemaregistry/client.go similarity index 51% rename from pkg/plugin/processor/builtin/impl/avro/schemaregistry/client.go rename to pkg/schemaregistry/client.go index 5e4036772..06ef7701d 100644 --- a/pkg/plugin/processor/builtin/impl/avro/schemaregistry/client.go +++ b/pkg/schemaregistry/client.go @@ -16,38 +16,63 @@ package schemaregistry import ( "context" + "fmt" + "github.com/conduitio/conduit-commons/rabin" "github.com/conduitio/conduit/pkg/foundation/cerrors" "github.com/conduitio/conduit/pkg/foundation/log" - "github.com/conduitio/conduit/pkg/plugin/processor/builtin/impl/avro/schemaregistry/internal" - "github.com/lovromazgon/franz-go/pkg/sr" + "github.com/twmb/franz-go/pkg/sr" + "github.com/twmb/go-cache/cache" ) // Client is a schema registry client that caches schemas. It is safe for -// concurrent use. +// concurrent use. The client caches schemas by ID, their subject/fingerprint and +// subject/version. Fingerprints are calculated using the Rabin algorithm. type Client struct { logger log.CtxLogger client sr.Client - cache internal.SchemaCache + idCache *cache.Cache[int, sr.Schema] + subjectFingerprintCache *cache.Cache[subjectFingerprint, sr.SubjectSchema] + subjectVersionCache *cache.Cache[subjectVersion, sr.SubjectSchema] } +type ( + subjectVersion string + subjectFingerprint string +) + +func newSubjectVersion(subject string, version int) subjectVersion { + return subjectVersion(fmt.Sprintf("%s:%d", subject, version)) +} + +func newSubjectFingerprint(subject string, text string) subjectFingerprint { + fingerprint := rabin.Bytes([]byte(text)) + return subjectFingerprint(fmt.Sprintf("%s:%d", subject, fingerprint)) +} + +var _ RegistryWithCheck = (*Client)(nil) + // NewClient creates a new client using the provided logger and schema registry // client options. -func NewClient(logger log.CtxLogger, opts ...sr.Opt) (*Client, error) { - defaultOpts := []sr.Opt{ +func NewClient(logger log.CtxLogger, opts ...sr.ClientOpt) (*Client, error) { + defaultOpts := []sr.ClientOpt{ sr.UserAgent("conduit"), sr.URLs(), // disable default URL } client, err := sr.NewClient(append(defaultOpts, opts...)...) if err != nil { - return nil, err + return nil, cerrors.Errorf("failed to create schema registry client: %w", err) } return &Client{ - logger: logger, + logger: logger.WithComponent("schemaregistry.Client"), client: *client, + + idCache: cache.New[int, sr.Schema](), + subjectFingerprintCache: cache.New[subjectFingerprint, sr.SubjectSchema](), + subjectVersionCache: cache.New[subjectVersion, sr.SubjectSchema](), }, nil } @@ -57,28 +82,38 @@ func NewClient(logger log.CtxLogger, opts ...sr.Opt) (*Client, error) { // was successful. func (c *Client) CreateSchema(ctx context.Context, subject string, schema sr.Schema) (sr.SubjectSchema, error) { logEvent := c.logger.Trace(ctx).Str("operation", "CreateSchema").Str("subject", subject) - ss, err := c.cache.GetBySubjectText(subject, schema.Schema, func() (sr.SubjectSchema, error) { + + sfp := newSubjectFingerprint(subject, schema.Schema) + ss, err, _ := c.subjectFingerprintCache.Get(sfp, func() (sr.SubjectSchema, error) { logEvent.Msg("schema cache miss") logEvent = nil // disable output for hit // Check if the subject exists. Ignore the error as this is not critical - // for creating a schema, we assume the subject exists in case of an error. - versions, _ := c.client.SubjectVersions(ctx, subject, sr.ShowDeleted) + // for creating a schema, we assume the subject doesn't exist in case of an error. + versions, _ := c.client.SubjectVersions(sr.WithParams(ctx, sr.ShowDeleted), subject) subjectExists := len(versions) > 0 ss, err := c.client.CreateSchema(ctx, subject, schema) if err != nil { - return ss, err + return ss, cerrors.Errorf("failed to create schema with subject %q: %w", subject, err) } if !subjectExists { - // if we are created the schema we need to disable compatibility checks - c.client.SetCompatibilityLevel(ctx, sr.CompatNone, subject) + // if we created the schema we need to disable compatibility checks + result := c.client.SetCompatibility(ctx, sr.SetCompatibility{Level: sr.CompatNone}, subject) + for _, res := range result { + if res.Err != nil { + // only log error, don't return it + c.logger.Warn(ctx).Err(res.Err).Str("subject", subject).Msg("failed to set compatibility to none, might create issues if an incompatible schema change happens in the future") + } + } } + c.idCache.Set(ss.ID, ss.Schema) + c.subjectVersionCache.Set(newSubjectVersion(ss.Subject, ss.Version), ss) return ss, nil }) if err != nil { - return sr.SubjectSchema{}, cerrors.Errorf("failed to create schema with subject %q: %w", subject, err) + return sr.SubjectSchema{}, err } logEvent.Msg("schema cache hit") return ss, nil @@ -91,13 +126,18 @@ func (c *Client) CreateSchema(ctx context.Context, subject string, schema sr.Sch // cache will not have an effect on methods that return a sr.SubjectSchema. func (c *Client) SchemaByID(ctx context.Context, id int) (sr.Schema, error) { logEvent := c.logger.Trace(ctx).Str("operation", "SchemaByID").Int("id", id) - s, err := c.cache.GetByID(id, func() (sr.Schema, error) { + + s, err, _ := c.idCache.Get(id, func() (sr.Schema, error) { logEvent.Msg("schema cache miss") logEvent = nil // disable output for hit - return c.client.SchemaByID(ctx, id) + ss, err := c.client.SchemaByID(ctx, id) + if err != nil { + return sr.Schema{}, cerrors.Errorf("failed to get schema with ID %q: %w", id, err) + } + return ss, nil }) if err != nil { - return sr.Schema{}, cerrors.Errorf("failed to get schema with ID %q: %w", id, err) + return sr.Schema{}, err } logEvent.Msg("schema cache hit") return s, nil @@ -110,14 +150,28 @@ func (c *Client) SchemaBySubjectVersion(ctx context.Context, subject string, ver // TODO handle latest version separately, let caller define timeout after // which the latest cached version should be downloaded again from upstream logEvent := c.logger.Trace(ctx).Str("operation", "SchemaBySubjectVersion").Str("subject", subject).Int("version", version) - ss, err := c.cache.GetBySubjectVersion(subject, version, func() (sr.SubjectSchema, error) { + + sv := newSubjectVersion(subject, version) + ss, err, _ := c.subjectVersionCache.Get(sv, func() (sr.SubjectSchema, error) { logEvent.Msg("schema cache miss") logEvent = nil // disable output for hit - return c.client.SchemaByVersion(ctx, subject, version, sr.HideDeleted) + ss, err := c.client.SchemaByVersion(ctx, subject, version) + if err != nil { + return ss, cerrors.Errorf("failed to get schema with subject %q and version %q: %w", subject, version, err) + } + c.idCache.Set(ss.ID, ss.Schema) + c.subjectFingerprintCache.Set(newSubjectFingerprint(ss.Subject, ss.Schema.Schema), ss) + return ss, nil }) if err != nil { - return sr.SubjectSchema{}, cerrors.Errorf("failed to get schema with subject %q and version %q: %w", subject, version, err) + return sr.SubjectSchema{}, err } logEvent.Msg("schema cache hit") return ss, nil } + +// Check checks if the schema registry is reachable. +func (c *Client) Check(ctx context.Context) error { + _, err := c.client.Subjects(ctx) // just check if we can list subjects + return err +} diff --git a/pkg/plugin/processor/builtin/impl/avro/schemaregistry/client_test.go b/pkg/schemaregistry/client_test.go similarity index 95% rename from pkg/plugin/processor/builtin/impl/avro/schemaregistry/client_test.go rename to pkg/schemaregistry/client_test.go index b5e407699..fb6dd879f 100644 --- a/pkg/plugin/processor/builtin/impl/avro/schemaregistry/client_test.go +++ b/pkg/schemaregistry/client_test.go @@ -23,8 +23,9 @@ import ( "github.com/conduitio/conduit/pkg/foundation/cerrors" "github.com/conduitio/conduit/pkg/foundation/log" - "github.com/lovromazgon/franz-go/pkg/sr" + "github.com/conduitio/conduit/pkg/schemaregistry/schemaregistrytest" "github.com/matryer/is" + "github.com/twmb/franz-go/pkg/sr" ) func TestClient_NotFound(t *testing.T) { @@ -36,7 +37,7 @@ func TestClient_NotFound(t *testing.T) { c, err := NewClient( logger, sr.HTTPClient(&http.Client{Transport: rtr}), - sr.URLs(TestSchemaRegistryURL(t)), + sr.URLs(schemaregistrytest.TestSchemaRegistryURL(t)), ) is.NoErr(err) @@ -95,7 +96,7 @@ func TestClient_CacheMiss(t *testing.T) { // register schema in the schema registry but not in the client, to get a // cache miss but fetch from registry should return the schema - srClient, err := sr.NewClient(sr.URLs(TestSchemaRegistryURL(t))) + srClient, err := sr.NewClient(sr.URLs(schemaregistrytest.TestSchemaRegistryURL(t))) is.NoErr(err) want, err := srClient.CreateSchema(ctx, "test-cache-miss", sr.Schema{ Schema: `"string"`, @@ -109,7 +110,7 @@ func TestClient_CacheMiss(t *testing.T) { c, err := NewClient( logger, sr.HTTPClient(&http.Client{Transport: rtr}), - sr.URLs(TestSchemaRegistryURL(t)), + sr.URLs(schemaregistrytest.TestSchemaRegistryURL(t)), ) is.NoErr(err) @@ -180,7 +181,7 @@ func TestClient_CacheHit(t *testing.T) { c, err := NewClient( logger, sr.HTTPClient(&http.Client{Transport: rtr}), - sr.URLs(TestSchemaRegistryURL(t)), + sr.URLs(schemaregistrytest.TestSchemaRegistryURL(t)), ) is.NoErr(err) @@ -217,7 +218,7 @@ func TestClient_CacheHit(t *testing.T) { ) rtr.AssertRecord(is, 4, assertMethod("PUT"), - assertRequestURI("/config/test-cache-hit?defaultToGlobal=true"), + assertRequestURI("/config/test-cache-hit"), assertResponseStatus(200), assertError(nil), ) diff --git a/pkg/schemaregistry/fromschema/sr.go b/pkg/schemaregistry/fromschema/sr.go new file mode 100644 index 000000000..fe136adfe --- /dev/null +++ b/pkg/schemaregistry/fromschema/sr.go @@ -0,0 +1,48 @@ +// Copyright © 2024 Meroxa, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package fromschema + +import ( + "github.com/conduitio/conduit-commons/schema" + "github.com/twmb/franz-go/pkg/sr" +) + +func SrSubjectSchema(s schema.Schema) sr.SubjectSchema { + return sr.SubjectSchema{ + Subject: s.Subject, + Version: s.Version, + ID: s.ID, + Schema: SrSchema(s), + } +} + +func SrSchema(s schema.Schema) sr.Schema { + return sr.Schema{ + Schema: string(s.Bytes), + Type: SrSchemaType(s.Type), + References: nil, + SchemaMetadata: nil, + SchemaRuleSet: nil, + } +} + +func SrSchemaType(t schema.Type) sr.SchemaType { + switch t { + case schema.TypeAvro: + return sr.TypeAvro + default: + return sr.SchemaType(-1) // unknown + } +} diff --git a/pkg/plugin/processor/builtin/impl/avro/schemaregistry/internal/rabin_test.go b/pkg/schemaregistry/registry.go similarity index 54% rename from pkg/plugin/processor/builtin/impl/avro/schemaregistry/internal/rabin_test.go rename to pkg/schemaregistry/registry.go index 1f9545330..5bf4caf5d 100644 --- a/pkg/plugin/processor/builtin/impl/avro/schemaregistry/internal/rabin_test.go +++ b/pkg/schemaregistry/registry.go @@ -1,4 +1,4 @@ -// Copyright © 2023 Meroxa, Inc. +// Copyright © 2024 Meroxa, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,29 +12,21 @@ // See the License for the specific language governing permissions and // limitations under the License. -package internal +package schemaregistry import ( - "testing" + "context" - "github.com/matryer/is" + "github.com/twmb/franz-go/pkg/sr" ) -func TestRabin(t *testing.T) { - testCases := []struct { - have string - want uint64 - }{ - {have: `"int"`, want: 0x7275d51a3f395c8f}, - {have: `"string"`, want: 0x8f014872634503c7}, - {have: `"bool"`, want: 0x4a1c6b80ca0bcf48}, - } +type Registry interface { + CreateSchema(ctx context.Context, subject string, schema sr.Schema) (sr.SubjectSchema, error) + SchemaByID(ctx context.Context, id int) (sr.Schema, error) + SchemaBySubjectVersion(ctx context.Context, subject string, version int) (sr.SubjectSchema, error) +} - for _, tc := range testCases { - t.Run(tc.have, func(t *testing.T) { - is := is.New(t) - got := Rabin([]byte(tc.have)) - is.Equal(tc.want, got) - }) - } +type RegistryWithCheck interface { + Registry + Check(ctx context.Context) error } diff --git a/pkg/plugin/processor/builtin/impl/avro/schemaregistry/integration.go b/pkg/schemaregistry/schemaregistrytest/confluent.go similarity index 95% rename from pkg/plugin/processor/builtin/impl/avro/schemaregistry/integration.go rename to pkg/schemaregistry/schemaregistrytest/confluent.go index 774f4828e..558688a43 100644 --- a/pkg/plugin/processor/builtin/impl/avro/schemaregistry/integration.go +++ b/pkg/schemaregistry/schemaregistrytest/confluent.go @@ -1,4 +1,4 @@ -// Copyright © 2023 Meroxa, Inc. +// Copyright © 2024 Meroxa, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -14,7 +14,7 @@ //go:build integration -package schemaregistry +package schemaregistrytest import "testing" diff --git a/pkg/schemaregistry/schemaregistrytest/inmemory.go b/pkg/schemaregistry/schemaregistrytest/inmemory.go new file mode 100644 index 000000000..9a56df910 --- /dev/null +++ b/pkg/schemaregistry/schemaregistrytest/inmemory.go @@ -0,0 +1,93 @@ +// Copyright © 2024 Meroxa, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build !integration + +package schemaregistrytest + +import ( + "fmt" + "io" + "log/slog" + "net" + "net/http" + "net/http/httptest" + "sync" + "testing" + + schemaregistry "github.com/conduitio/conduit-schema-registry" + "github.com/neilotoole/slogt" +) + +var ( + serverByTest = make(map[string]*httptest.Server) + serverByTestLock sync.Mutex +) + +// ExampleSchemaRegistryURL creates a fake in-memory schema registry server and +// returns its address and a cleanup function which should be executed in a +// deferred call. +// +// This method is only used if examples are run without --tags=integration. It +// is meant as a utility to allow faster iteration when developing, please run +// integration tests to ensure the code works with a real schema registry. +func ExampleSchemaRegistryURL(exampleName string, port int) (string, func()) { + // Discard all schema registry logs in examples. + // Related proposal: https://github.com/golang/go/issues/62005 + // Until the proposal goes through, we need to discard logs through the text handler. + logger := slog.New(slog.NewTextHandler(io.Discard, nil)) + return inMemorySchemaRegistryURL(exampleName, logger, port) +} + +// TestSchemaRegistryURL creates a fake in-memory schema registry server and +// returns its address. +// +// This method is only used if the tests are run without +// --tags=integration. It is meant as a utility to allow faster iteration when +// developing, please run integration tests to ensure the code works with a real +// schema registry. +func TestSchemaRegistryURL(t testing.TB) string { + url, cleanup := inMemorySchemaRegistryURL(t.Name(), slogt.New(t), 0) + t.Cleanup(cleanup) + return url +} + +func inMemorySchemaRegistryURL(name string, logger *slog.Logger, port int) (string, func()) { + serverByTestLock.Lock() + defer serverByTestLock.Unlock() + + srv := serverByTest[name] + cleanup := func() {} + if srv == nil { + mux := http.NewServeMux() + schemaSrv := schemaregistry.NewServer(logger, schemaregistry.NewSchemaRegistry()) + schemaSrv.RegisterHandlers(mux) + srv = httptest.NewUnstartedServer(mux) + if port > 0 { + // NewUnstartedServer creates a listener. Close that listener and replace + // with a custom one. + _ = srv.Listener.Close() + l, err := net.Listen("tcp", fmt.Sprintf("127.0.0.1:%d", port)) + if err != nil { + panic(fmt.Sprintf("failed starting test server on port %d: %v", port, err)) + } + srv.Listener = l + } + + srv.Start() + serverByTest[name] = srv + cleanup = srv.Close + } + return srv.URL, cleanup +} diff --git a/pkg/schemaregistry/toschema/sr.go b/pkg/schemaregistry/toschema/sr.go new file mode 100644 index 000000000..d117ae403 --- /dev/null +++ b/pkg/schemaregistry/toschema/sr.go @@ -0,0 +1,56 @@ +// Copyright © 2024 Meroxa, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package toschema + +import ( + "github.com/conduitio/conduit-commons/schema" + "github.com/twmb/franz-go/pkg/sr" +) + +func SrSubjectSchema(s sr.SubjectSchema) schema.Schema { + return schema.Schema{ + Subject: s.Subject, + Version: s.Version, + ID: s.ID, + Type: SrSchemaType(s.Schema.Type), + Bytes: []byte(s.Schema.Schema), + } +} + +// SrSchema only partially populates schema.Schema, as it doesn't contain +// information about the id, subject and version. The schema can still be used +// to marshal and unmarshal data. +func SrSchema(s sr.Schema) schema.Schema { + return schema.Schema{ + Subject: "", + Version: 0, + ID: 0, + Type: SrSchemaType(s.Type), + Bytes: []byte(s.Schema), + } +} + +func SrSchemaType(t sr.SchemaType) schema.Type { + switch t { + case sr.TypeAvro: + return schema.TypeAvro + case sr.TypeProtobuf: + return 0 // not supported yet + case sr.TypeJSON: + return 0 // not supported yet + default: + return 0 // unknown + } +}