From d21a4bc48079c51d92cd3db219024ce8c5c93f72 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alfred=20Gunnarg=C3=A5rd?= Date: Fri, 5 Nov 2021 15:48:13 +0100 Subject: [PATCH] feat: add protoc plugin for generating json BigQuery schemas --- encoding/jsonbq/marshal.go | 22 ++++++++++++++++-- encoding/protobq/schema.go | 8 +++---- go.mod | 1 + go.sum | 1 + protoc-gen-bq-json-schema/genjson/config.go | 16 +++++++++++++ protoc-gen-bq-json-schema/genjson/message.go | 24 ++++++++++++++++++++ protoc-gen-bq-json-schema/genjson/run.go | 19 ++++++++++++++++ protoc-gen-bq-json-schema/main.go | 16 +++++++++++++ 8 files changed, 101 insertions(+), 6 deletions(-) create mode 100644 protoc-gen-bq-json-schema/genjson/config.go create mode 100644 protoc-gen-bq-json-schema/genjson/message.go create mode 100644 protoc-gen-bq-json-schema/genjson/run.go create mode 100644 protoc-gen-bq-json-schema/main.go diff --git a/encoding/jsonbq/marshal.go b/encoding/jsonbq/marshal.go index cf4be7d..9e5ad24 100644 --- a/encoding/jsonbq/marshal.go +++ b/encoding/jsonbq/marshal.go @@ -22,11 +22,29 @@ type Field struct { Fields []*Field `json:"fields,omitempty"` } -// MarshalSchema marshals a BigQuery schema to a valid BigQuery JSON schema. -// The JSON conforms to the format in this example: +// MarshalSchema marshals a BigQuery schema to a valid BigQuery JSON schema based on +// default MarshalOptions. The JSON conforms to the format in this example: // https://cloud.google.com/bigquery/docs/nested-repeated#example_schema func MarshalSchema(schema bigquery.Schema) ([]byte, error) { + return MarshalOptions{}.MarshalSchema(schema) +} + +// MarshalOptions is a configurable BigQuery schema json marshaller. +type MarshalOptions struct { + // Indent specifies the set of indentation characters to use in a formatted + // output such that every entry is preceded by Indent and terminated by a newline. + // Leave Indent empty to disable formatted output. + Indent string +} + +// MarshalSchema marshals a BigQuery schema to a valid BigQuery JSON schema based on +// the given MarshalOptions. The JSON conforms to the format in this example: +// https://cloud.google.com/bigquery/docs/nested-repeated#example_schema +func (o MarshalOptions) MarshalSchema(schema bigquery.Schema) ([]byte, error) { fields := convertSchema(schema) + if o.Indent != "" { + return json.MarshalIndent(fields, "", o.Indent) + } return json.Marshal(fields) } diff --git a/encoding/protobq/schema.go b/encoding/protobq/schema.go index ab1bb8b..28884a0 100644 --- a/encoding/protobq/schema.go +++ b/encoding/protobq/schema.go @@ -29,11 +29,11 @@ type SchemaOptions struct { // InferSchema infers a BigQuery schema for the given proto.Message using options in // MarshalOptions. func (o SchemaOptions) InferSchema(msg proto.Message) bigquery.Schema { - return o.inferMessageSchema(msg.ProtoReflect().Descriptor()) + return o.InferMessageSchema(msg.ProtoReflect().Descriptor()) } -// inferMessageSchema infers the BigQuery schema for the given protoreflect.MessageDescriptor. -func (o SchemaOptions) inferMessageSchema(msg protoreflect.MessageDescriptor) bigquery.Schema { +// InferMessageSchema infers the BigQuery schema for the given protoreflect.MessageDescriptor. +func (o SchemaOptions) InferMessageSchema(msg protoreflect.MessageDescriptor) bigquery.Schema { schema := make(bigquery.Schema, 0, msg.Fields().Len()) for i := 0; i < msg.Fields().Len(); i++ { fieldSchema := o.inferFieldSchema(msg.Fields().Get(i)) @@ -64,7 +64,7 @@ func (o SchemaOptions) inferFieldSchema(field protoreflect.FieldDescriptor) *big Repeated: field.IsList(), } if fieldSchema.Type == bigquery.RecordFieldType && fieldSchema.Schema == nil { - fieldSchema.Schema = o.inferMessageSchema(field.Message()) + fieldSchema.Schema = o.InferMessageSchema(field.Message()) if len(fieldSchema.Schema) == 0 { return nil } diff --git a/go.mod b/go.mod index 7fd4b6a..c3ffcc5 100644 --- a/go.mod +++ b/go.mod @@ -7,6 +7,7 @@ require ( cloud.google.com/go/bigquery v1.22.0 github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect github.com/google/go-cmp v0.5.6 + github.com/spf13/pflag v1.0.3 golang.org/x/net v0.0.0-20210825183410-e898025ed96a // indirect golang.org/x/oauth2 v0.0.0-20210819190943-2bc19b11175f golang.org/x/text v0.3.7 // indirect diff --git a/go.sum b/go.sum index 1620279..ccbc651 100644 --- a/go.sum +++ b/go.sum @@ -173,6 +173,7 @@ github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1: github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ= github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= +github.com/spf13/pflag v1.0.3 h1:zPAT6CGy6wXeQ7NtTnaTerfKOsV6V6F8agHXFiazDkg= github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= diff --git a/protoc-gen-bq-json-schema/genjson/config.go b/protoc-gen-bq-json-schema/genjson/config.go new file mode 100644 index 0000000..ee1e380 --- /dev/null +++ b/protoc-gen-bq-json-schema/genjson/config.go @@ -0,0 +1,16 @@ +package genjson + +import "github.com/spf13/pflag" + +type Config struct { + Format bool +} + +func (c *Config) AddToFlagSet(flags *pflag.FlagSet) { + flags.BoolVar( + &c.Format, + "format", + false, + "Set to true to get a formatted json output.", + ) +} diff --git a/protoc-gen-bq-json-schema/genjson/message.go b/protoc-gen-bq-json-schema/genjson/message.go new file mode 100644 index 0000000..266f943 --- /dev/null +++ b/protoc-gen-bq-json-schema/genjson/message.go @@ -0,0 +1,24 @@ +package genjson + +import ( + "fmt" + + "go.einride.tech/protobuf-bigquery/encoding/jsonbq" + "go.einride.tech/protobuf-bigquery/encoding/protobq" + "google.golang.org/protobuf/compiler/protogen" +) + +func GenerateSchemaFile(gen *protogen.Plugin, msg *protogen.Message, config Config) error { + g := gen.NewGeneratedFile(fmt.Sprintf("%s.json", msg.Desc.Name()), "") + schema := protobq.SchemaOptions{}.InferMessageSchema(msg.Desc) + jsonOpt := jsonbq.MarshalOptions{} + if config.Format { + jsonOpt.Indent = " " + } + out, err := jsonOpt.MarshalSchema(schema) + if err != nil { + return err + } + g.P(string(out)) + return nil +} diff --git a/protoc-gen-bq-json-schema/genjson/run.go b/protoc-gen-bq-json-schema/genjson/run.go new file mode 100644 index 0000000..64443be --- /dev/null +++ b/protoc-gen-bq-json-schema/genjson/run.go @@ -0,0 +1,19 @@ +package genjson + +import ( + "google.golang.org/protobuf/compiler/protogen" +) + +func Run(gen *protogen.Plugin, config Config) error { + for _, f := range gen.Files { + if !f.Generate { + continue + } + for _, msg := range f.Messages { + if err := GenerateSchemaFile(gen, msg, config); err != nil { + return err + } + } + } + return nil +} diff --git a/protoc-gen-bq-json-schema/main.go b/protoc-gen-bq-json-schema/main.go new file mode 100644 index 0000000..0fb231d --- /dev/null +++ b/protoc-gen-bq-json-schema/main.go @@ -0,0 +1,16 @@ +package main + +import ( + "github.com/spf13/pflag" + "go.einride.tech/protobuf-bigquery/protoc-gen-bq-json-schema/genjson" + "google.golang.org/protobuf/compiler/protogen" +) + +func main() { + flagSet := pflag.NewFlagSet("protoc-gen-bq-json-schema", pflag.ContinueOnError) + var config genjson.Config + config.AddToFlagSet(flagSet) + protogen.Options{ParamFunc: flagSet.Set}.Run(func(gen *protogen.Plugin) error { + return genjson.Run(gen, config) + }) +}