From 0da7a813e1ee5927b0e67dd6b2bd6b2e08c790e5 Mon Sep 17 00:00:00 2001 From: xiangying <1984997880@qq.com> Date: Thu, 27 Jul 2023 14:25:33 +0800 Subject: [PATCH 1/2] [optimize][schema] Add schema compatibility between Go and Java client --- docs/client-libraries-schema.md | 94 ++++++++++++++++++++++++++++++++- 1 file changed, 93 insertions(+), 1 deletion(-) diff --git a/docs/client-libraries-schema.md b/docs/client-libraries-schema.md index bf080e5f7b7c..246d22b78eac 100644 --- a/docs/client-libraries-schema.md +++ b/docs/client-libraries-schema.md @@ -142,4 +142,96 @@ while True: except Exception: # Message failed to be processed consumer.negative_acknowledge(msg) -``` \ No newline at end of file +``` + +## Work with the Go schema +Working with Go schema has slight differences from Java schema. +This part will introduce the schema compatibility between Go client and Java client. + +#### Avro Schema +Avro schema in Go and Java are compatible, but there are some differences in how the schemas are defined. Go typically uses schema definitions, a string JSON, to create schemas. However, Java often uses class types for schema creation. As a result, Java allows non-primitive fields to be nullable by default, while in Go's schema definition, the nullability of fields needs to be explicitly stated. +GO: +```go +// Compatible with defining a schema in Java +exampleSchemaDefCompatible := NewAvroSchema(`{"fields": + [ + {"name":"id","type":"int"},{"default":null,"name":"name","type":["null","string"]} + ], + "name":"MyAvro","namespace":"schemaNotFoundTestCase","type":"record"}`, nil) +// Not compatible with defining a schema in Java +exampleSchemaDefIncompatible := NewAvroSchema(`{"fields": + [ + {"name":"id","type":"int"},{"default":null,"name":"name","type":["string"]} + ], + "name":"MyAvro","namespace":"schemaNotFoundTestCase","type":"record"}`, nil) +Producer := NewAvroSchema(exampleSchemaDef, nil) + +``` +JAVA: +```java +@AllArgsConstructor +@NoArgsConstructor +public static class Example { + public String name; + public int id; +} + +Producer producer = pulsarClient.newProducer(Schema.AVRO(Example.class)) + .topic(topic).create(); +``` + +#### JSON Schema +The situation with JSON schema is similar to Avro Schema. +```go +// Compatible with defining a schema in Java +exampleSchemaDefCompatible := "{\"type\":\"record\",\"name\":\"Example\",\"namespace\":\"test\"," + + "\"fields\":[{\"name\":\"ID\",\"type\":\"int\"},{\"name\":\"Name\",\"type\":[\"null\", \"string\"]}]}" + +consumerJSCompatible := NewJSONSchema(exampleSchemaDefCompatible, nil) +// Not compatible with defining a schema in Java +exampleSchemaDefIncompatible := "{\"type\":\"record\",\"name\":\"Example\",\"namespace\":\"test\"," + + "\"fields\":[{\"name\":\"ID\",\"type\":\"int\"},{\"name\":\"Name\",\"type\":\"string\"}]}" + +consumerJSIncompatible := NewJSONSchema(exampleSchemaDefIncompatible, nil) +``` + +To achieve compatibility, modify the `exampleSchemaDefIncompatible` to allow null fields and ensure that the variable names in the Java Example class match the schema definition. + +#### Proto Schema +Proto and ProtoNative schemas exhibit some incompatibility between Go and Java clients. This is because Avro Proto currently does not provide full compatibility between Java and Go. + +```proto +message TestMessage { + string stringField = 1; + int32 intField = 2; +} +``` + +Defining a schema in Java can be parsed by a class. +```json +protoSchemaDef = "{\"type\":\"record\",\"name\":\"TestMessage\",\"namespace\":\"org.apache.pulsar.client.api.schema.proto.Test\",\"fields\":[{\"name\":\"stringField\",\"type\":{\"type\":\"string\",\"avro.java.string\":\"String\"},\"default\":\"\"},{\"name\":\"intField\",\"type\":\"int\",\"default\":0}]}" + +``` + +Defining a schema in Go needs to write manually. +```json +protoSchemaDef = "{\"type\":\"record\",\"name\":\"Example\",\"namespace\":\"test\"," + + "\"fields\":[{\"name\":\"num\",\"type\":\"int\"},{\"name\":\"msf\",\"type\":\"string\"}]}" +``` +To address the incompatibility between Proto and ProtoNative types, you can follow this approach: +1. In the Java client, parse the message using the Avro Proto library to obtain the schema definition. +2. Use this obtained schema definition in the Go client to ensure both clients use the same schema definition. +```json +protoSchemaDef = "{\"type\":\"record\",\"name\":\"TestMessage\",\"namespace\":\"org.apache.pulsar.client.api.schema.proto.Test\",\"fields\":[{\"name\":\"stringField\",\"type\":{\"type\":\"string\",\"avro.java.string\":\"String\"},\"default\":\"\"},{\"name\":\"intField\",\"type\":\"int\",\"default\":0}]}" + +``` +3. Modify the Go Proto Message by adding compatibility extensions. For example, add `[(avro_java_string) = "String"]` extension to string type fields. +```proto +message TestMessage { + string stringField = 1 [(avro_java_string) = "String"]; + int32 intField = 2; +} +``` + +#### ProtoNative Schema +Similar to the Proto schema, ProtoNative schemas are also incompatible between Java and Go clients. To address this, you can use a unified schema define and add `[(avro_java_string) = "String"]` extension to the Go client's Proto message. From 61cfcb27a62b5de3ae0110cf9d346c18520ff488 Mon Sep 17 00:00:00 2001 From: xiangying <1984997880@qq.com> Date: Tue, 8 Aug 2023 01:30:01 +0800 Subject: [PATCH 2/2] optimize --- docs/client-libraries-schema.md | 63 +++++++++++++++++++++++++++------ 1 file changed, 53 insertions(+), 10 deletions(-) diff --git a/docs/client-libraries-schema.md b/docs/client-libraries-schema.md index 246d22b78eac..27d146316ef6 100644 --- a/docs/client-libraries-schema.md +++ b/docs/client-libraries-schema.md @@ -3,7 +3,10 @@ id: client-libraries-schema title: Work with schema sidebar_label: "Work with schema" --- - +````mdx-code-block +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; +```` ## Get started with schema @@ -148,9 +151,16 @@ while True: Working with Go schema has slight differences from Java schema. This part will introduce the schema compatibility between Go client and Java client. -#### Avro Schema -Avro schema in Go and Java are compatible, but there are some differences in how the schemas are defined. Go typically uses schema definitions, a string JSON, to create schemas. However, Java often uses class types for schema creation. As a result, Java allows non-primitive fields to be nullable by default, while in Go's schema definition, the nullability of fields needs to be explicitly stated. -GO: +### Avro/JSON Schema +AVRO and JSON schema in Go and Java are compatible, but there are some differences in how the schemas are defined. +````mdx-code-block + + + +Go typically uses schema definitions, a string JSON, to create schemas. However, Java often uses class types for schema creation. As a result, Java allows non-primitive fields to be nullable by default, while in Go's schema definition, the nullability of fields needs to be explicitly stated. + ```go // Compatible with defining a schema in Java exampleSchemaDefCompatible := NewAvroSchema(`{"fields": @@ -167,7 +177,6 @@ exampleSchemaDefIncompatible := NewAvroSchema(`{"fields": Producer := NewAvroSchema(exampleSchemaDef, nil) ``` -JAVA: ```java @AllArgsConstructor @NoArgsConstructor @@ -179,9 +188,19 @@ public static class Example { Producer producer = pulsarClient.newProducer(Schema.AVRO(Example.class)) .topic(topic).create(); ``` +And another way to keep compatible is use schema definition to create schema in the JAVA client too. +```java + SchemaDefinition schemaDefinition = + SchemaDefinition.builder().withPojo(Example.class).withAlwaysAllowNull(false).build(); + Schema schema = Schema.AVRO(schemaDefinition); + + Producer producer = pulsarClient.newProducer(schema) + .topic(topic).create(); +``` + + +Go typically uses schema definitions, a string JSON, to create schemas. However, Java often uses class types for schema creation. As a result, Java allows non-primitive fields to be nullable by default, while in Go's schema definition, the nullability of fields needs to be explicitly stated. -#### JSON Schema -The situation with JSON schema is similar to Avro Schema. ```go // Compatible with defining a schema in Java exampleSchemaDefCompatible := "{\"type\":\"record\",\"name\":\"Example\",\"namespace\":\"test\"," + @@ -195,9 +214,33 @@ exampleSchemaDefIncompatible := "{\"type\":\"record\",\"name\":\"Example\",\"nam consumerJSIncompatible := NewJSONSchema(exampleSchemaDefIncompatible, nil) ``` -To achieve compatibility, modify the `exampleSchemaDefIncompatible` to allow null fields and ensure that the variable names in the Java Example class match the schema definition. +```java +@AllArgsConstructor +@NoArgsConstructor +public static class Example { + public String name; + public int id; +} + +Producer producer = pulsarClient.newProducer(Schema.AVRO(Example.class)) + .topic(topic).create(); +``` + +And another way to keep compatible is use schema definition to create schema in the JAVA client too. +```java + SchemaDefinition schemaDefinition = + SchemaDefinition.builder().withPojo(Example.class).withAlwaysAllowNull(false).build(); + Schema schema = Schema.AVRO(schemaDefinition); + + Producer producer = pulsarClient.newProducer(schema) + .topic(topic).create(); +``` + + + +```` -#### Proto Schema +### Proto Schema Proto and ProtoNative schemas exhibit some incompatibility between Go and Java clients. This is because Avro Proto currently does not provide full compatibility between Java and Go. ```proto @@ -233,5 +276,5 @@ message TestMessage { } ``` -#### ProtoNative Schema +### ProtoNative Schema Similar to the Proto schema, ProtoNative schemas are also incompatible between Java and Go clients. To address this, you can use a unified schema define and add `[(avro_java_string) = "String"]` extension to the Go client's Proto message.