From 5e0b59b4f540ccdd601d7aff7480f4011cfec65c Mon Sep 17 00:00:00 2001 From: Rahul Ravindran Date: Thu, 22 May 2014 15:24:27 -0700 Subject: [PATCH 1/4] Fix for ProtobufStructObjectInspector to work correctly with Hive --- .../hive/serde/ProtobufDeserializer.java | 6 +- .../serde/ProtobufStructObjectInspector.java | 316 ++++++++++-------- .../hive/serde/ProtobufDeserializerTest.java | 32 +- 3 files changed, 197 insertions(+), 157 deletions(-) diff --git a/hive/src/main/java/com/twitter/elephantbird/hive/serde/ProtobufDeserializer.java b/hive/src/main/java/com/twitter/elephantbird/hive/serde/ProtobufDeserializer.java index d1014fe3d..d2cd3f221 100644 --- a/hive/src/main/java/com/twitter/elephantbird/hive/serde/ProtobufDeserializer.java +++ b/hive/src/main/java/com/twitter/elephantbird/hive/serde/ProtobufDeserializer.java @@ -44,8 +44,10 @@ public void initialize(Configuration job, Properties tbl) throws SerDeException .asSubclass(Message.class); protobufConverter = ProtobufConverter.newInstance(protobufClass); + Message.Builder builder = Protobufs.getMessageBuilder(protobufClass); + Descriptor descriptor = Protobufs.getMessageDescriptor(protobufClass); - objectInspector = new ProtobufStructObjectInspector(descriptor); + objectInspector = new ProtobufStructObjectInspector(descriptor, builder); } catch (Exception e) { throw new SerDeException(e); } @@ -54,7 +56,7 @@ public void initialize(Configuration job, Properties tbl) throws SerDeException @Override public Object deserialize(Writable blob) throws SerDeException { BytesWritable bytes = (BytesWritable) blob; - return protobufConverter.fromBytes(bytes.getBytes(), 0, bytes.getLength()); + return protobufConverter.fromBytes(bytes.getBytes(), 0, bytes.getLength()).toBuilder(); } @Override diff --git a/hive/src/main/java/com/twitter/elephantbird/hive/serde/ProtobufStructObjectInspector.java b/hive/src/main/java/com/twitter/elephantbird/hive/serde/ProtobufStructObjectInspector.java index 8f5977bab..dbc33718d 100644 --- a/hive/src/main/java/com/twitter/elephantbird/hive/serde/ProtobufStructObjectInspector.java +++ b/hive/src/main/java/com/twitter/elephantbird/hive/serde/ProtobufStructObjectInspector.java @@ -20,167 +20,201 @@ public final class ProtobufStructObjectInspector extends SettableStructObjectInspector { - public static class ProtobufStructField implements StructField { + public static class ProtobufStructField implements StructField { + + private ObjectInspector oi = null; + private String comment = null; + private FieldDescriptor fieldDescriptor = null; + private Descriptor descriptor = null; + private Message.Builder builder = null; + + @SuppressWarnings("unchecked") + public ProtobufStructField(FieldDescriptor fieldDescriptor) { + assert (fieldDescriptor != null); + this.fieldDescriptor = fieldDescriptor; + this.descriptor = null; + oi = this.createOIForField(); + } + + @SuppressWarnings("unchecked") + public ProtobufStructField(FieldDescriptor fd, Message.Builder builder) { + this.descriptor = fd.getMessageType(); + this.fieldDescriptor = fd; + this.builder = builder; + oi = this.createOIForField(); + } + + @Override + public String getFieldName() { + return fieldDescriptor.getName(); + } + + @Override + public ObjectInspector getFieldObjectInspector() { + return oi; + } + + @Override + public String getFieldComment() { + return comment; + } + + public FieldDescriptor getFieldDescriptor() { + return fieldDescriptor; + } - private ObjectInspector oi = null; - private String comment = null; - private FieldDescriptor fieldDescriptor; + private PrimitiveCategory getPrimitiveCategory(JavaType fieldType) { + switch (fieldType) { + case INT: + return PrimitiveCategory.INT; + case LONG: + return PrimitiveCategory.LONG; + case FLOAT: + return PrimitiveCategory.FLOAT; + case DOUBLE: + return PrimitiveCategory.DOUBLE; + case BOOLEAN: + return PrimitiveCategory.BOOLEAN; + case STRING: + return PrimitiveCategory.STRING; + case BYTE_STRING: + return PrimitiveCategory.BINARY; + case ENUM: + return PrimitiveCategory.STRING; + default: + return null; + } + } - @SuppressWarnings("unchecked") - public ProtobufStructField(FieldDescriptor fieldDescriptor) { - this.fieldDescriptor = fieldDescriptor; - oi = this.createOIForField(); + private ObjectInspector createOIForField() { + ObjectInspector elementOI = null; + if (descriptor != null) { + // this field is a Message + elementOI = new ProtobufStructObjectInspector(descriptor, builder); + } else { + JavaType fieldType = fieldDescriptor.getJavaType(); + PrimitiveCategory category = getPrimitiveCategory(fieldType); + if (category != null) { + elementOI = PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(category); + } + } + if (fieldDescriptor.isRepeated()) { + return ObjectInspectorFactory.getStandardListObjectInspector(elementOI); + } else { + return elementOI; + } + } } - @Override - public String getFieldName() { - return fieldDescriptor.getName(); + private Descriptor descriptor; + private Message.Builder builder; + private List structFields = Lists.newArrayList(); + + ProtobufStructObjectInspector(Descriptor descriptor, Message.Builder builder) { + this.descriptor = descriptor; + this.builder = builder; + for (FieldDescriptor fd : descriptor.getFields()) { + if (fd.getType() != Type.MESSAGE) { + structFields.add(new ProtobufStructField(fd)); + } else { + structFields.add(new ProtobufStructField(fd, builder.newBuilderForField(fd))); + } + } } @Override - public ObjectInspector getFieldObjectInspector() { - return oi; + public Category getCategory() { + return Category.STRUCT; } @Override - public String getFieldComment() { - return comment; + public String getTypeName() { + StringBuilder sb = new StringBuilder("struct<"); + boolean first = true; + for (StructField structField : getAllStructFieldRefs()) { + if (first) { + first = false; + } else { + sb.append(","); + } + sb.append(structField.getFieldName()).append(":") + .append(structField.getFieldObjectInspector().getTypeName()); + } + sb.append(">"); + return sb.toString(); } - public FieldDescriptor getFieldDescriptor() { - return fieldDescriptor; + @Override + public Object create() { + return builder; } - private PrimitiveCategory getPrimitiveCategory(JavaType fieldType) { - switch (fieldType) { - case INT: - return PrimitiveCategory.INT; - case LONG: - return PrimitiveCategory.LONG; - case FLOAT: - return PrimitiveCategory.FLOAT; - case DOUBLE: - return PrimitiveCategory.DOUBLE; - case BOOLEAN: - return PrimitiveCategory.BOOLEAN; - case STRING: - return PrimitiveCategory.STRING; - case BYTE_STRING: - return PrimitiveCategory.BINARY; - case ENUM: - return PrimitiveCategory.STRING; - default: - return null; - } + @Override + public Object setStructFieldData(Object data, StructField field, Object fieldValue) { + Message.Builder builder = (Message.Builder) data; + ProtobufStructField psf = (ProtobufStructField) field; + FieldDescriptor fieldDescriptor = psf.getFieldDescriptor(); + if (fieldDescriptor.getType() != Type.MESSAGE) { + builder.setField(descriptor.findFieldByName(field.getFieldName()), fieldValue); + } else { + Message.Builder subFieldBuilder = (Message.Builder)fieldValue; + builder.setField(descriptor.findFieldByName(field.getFieldName()), subFieldBuilder.build()); + } + return builder; + } - private ObjectInspector createOIForField() { - JavaType fieldType = fieldDescriptor.getJavaType(); - PrimitiveCategory category = getPrimitiveCategory(fieldType); - ObjectInspector elementOI = null; - if (category != null) { - elementOI = PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(category); - } else { - switch (fieldType) { - case MESSAGE: - elementOI = new ProtobufStructObjectInspector(fieldDescriptor.getMessageType()); - break; - default: - throw new RuntimeException("JavaType " + fieldType - + " from protobuf is not supported."); - } - } - if (fieldDescriptor.isRepeated()) { - return ObjectInspectorFactory.getStandardListObjectInspector(elementOI); - } else { - return elementOI; - } + @Override + public List getAllStructFieldRefs() { + return structFields; } - } - private Descriptor descriptor; - private List structFields = Lists.newArrayList(); + @Override + public Object getStructFieldData(Object data, StructField structField) { + if (data == null) { + return null; + } + Message.Builder m = ((Message.Builder) data); + ProtobufStructField psf = (ProtobufStructField) structField; + FieldDescriptor fieldDescriptor = psf.getFieldDescriptor(); - ProtobufStructObjectInspector(Descriptor descriptor) { - this.descriptor = descriptor; - for (FieldDescriptor fd : descriptor.getFields()) { - structFields.add(new ProtobufStructField(fd)); - } - } - - @Override - public Category getCategory() { - return Category.STRUCT; - } - - @Override - public String getTypeName() { - StringBuilder sb = new StringBuilder("struct<"); - boolean first = true; - for (StructField structField : getAllStructFieldRefs()) { - if (first) { - first = false; - } else { - sb.append(","); - } - sb.append(structField.getFieldName()).append(":") - .append(structField.getFieldObjectInspector().getTypeName()); - } - sb.append(">"); - return sb.toString(); - } - - @Override - public Object create() { - return descriptor.toProto().toBuilder().build(); - } - - @Override - public Object setStructFieldData(Object data, StructField field, Object fieldValue) { - return ((Message) data) - .toBuilder() - .setField(descriptor.findFieldByName(field.getFieldName()), fieldValue) - .build(); - } - - @Override - public List getAllStructFieldRefs() { - return structFields; - } - - @Override - public Object getStructFieldData(Object data, StructField structField) { - if (data == null) { - return null; - } - Message m = (Message) data; - ProtobufStructField psf = (ProtobufStructField) structField; - FieldDescriptor fieldDescriptor = psf.getFieldDescriptor(); - Object result = m.getField(fieldDescriptor); - if (fieldDescriptor.getType() == Type.ENUM) { - return ((EnumValueDescriptor)result).getName(); - } - if (fieldDescriptor.getType() == Type.BYTES && (result instanceof ByteString)) { - return ((ByteString)result).toByteArray(); + Object result = m.getField(fieldDescriptor); + if (fieldDescriptor.getType() == Type.ENUM) { + return ((EnumValueDescriptor) result).getName(); + } + if (fieldDescriptor.getType() == Type.BYTES && (result instanceof ByteString)) { + return ((ByteString) result).toByteArray(); + } + + if (fieldDescriptor.getType() == Type.MESSAGE && !fieldDescriptor.isRepeated()) { + result = ((Message) result).toBuilder(); + } + + return result; } - return result; - } - - @Override - public StructField getStructFieldRef(String fieldName) { - return new ProtobufStructField(descriptor.findFieldByName(fieldName)); - } - - @Override - public List getStructFieldsDataAsList(Object data) { - if (data == null) { - return null; + + @Override + public StructField getStructFieldRef(String fieldName) { + FieldDescriptor fd = descriptor.findFieldByName(fieldName); + ProtobufStructField result = null; + if (fd.getType() != Type.MESSAGE) { + result = new ProtobufStructField(fd); + } else { + result = new ProtobufStructField(fd, builder.newBuilderForField(fd)); + } + return result; } - List result = Lists.newArrayList(); - Message m = (Message) data; - for (FieldDescriptor fd : descriptor.getFields()) { - result.add(m.getField(fd)); + + @Override + public List getStructFieldsDataAsList(Object data) { + if (data == null) { + return null; + } + List result = Lists.newArrayList(); + Message.Builder m = (Message.Builder) data; + for (FieldDescriptor fd : descriptor.getFields()) { + result.add(m.getField(fd)); + } + return result; } - return result; - } } diff --git a/hive/src/test/java/com/twitter/elephantbird/hive/serde/ProtobufDeserializerTest.java b/hive/src/test/java/com/twitter/elephantbird/hive/serde/ProtobufDeserializerTest.java index 1b81d0f70..7b617c612 100644 --- a/hive/src/test/java/com/twitter/elephantbird/hive/serde/ProtobufDeserializerTest.java +++ b/hive/src/test/java/com/twitter/elephantbird/hive/serde/ProtobufDeserializerTest.java @@ -30,8 +30,8 @@ public class ProtobufDeserializerTest { - private AddressBook test_ab; - private PhoneNumber test_pn; + private AddressBook.Builder test_ab; + private PhoneNumber.Builder test_pn; private ProtobufDeserializer deserializer; private ProtobufStructObjectInspector protobufOI; @@ -40,15 +40,14 @@ public void setUp() throws SerDeException { PhoneNumber pn1 = PhoneNumber.newBuilder().setNumber("pn0001").setType(PhoneType.HOME).build(); PhoneNumber pn2 = PhoneNumber.newBuilder().setNumber("pn0002").setType(PhoneType.WORK).build(); PhoneNumber pn3 = PhoneNumber.newBuilder().setNumber("pn0003").build(); - test_pn = PhoneNumber.newBuilder().setNumber("pn0004").setType(PhoneType.MOBILE) - .build(); + test_pn = PhoneNumber.newBuilder().setNumber("pn0004").setType(PhoneType.MOBILE); Person p1 = Person.newBuilder().setName("p1").setId(1).setEmail("p1@twitter").addPhone(pn1) .addPhone(pn2).addPhone(pn3).build(); Person p2 = Person.newBuilder().setName("p2").setId(2).addPhone(test_pn).build(); Person p3 = Person.newBuilder().setName("p3").setId(3).build(); - test_ab = AddressBook.newBuilder().addPerson(p1).addPerson(p2).addPerson(p3).setByteData(ByteString.copyFrom(new byte[] {16,32,64,(byte) 128})).build(); + test_ab = AddressBook.newBuilder().addPerson(p1).addPerson(p2).addPerson(p3).setByteData(ByteString.copyFrom(new byte[]{16, 32, 64, (byte) 128})); deserializer = new ProtobufDeserializer(); Properties properties = new Properties(); @@ -60,9 +59,9 @@ public void setUp() throws SerDeException { @Test public final void testDeserializer() throws SerDeException { - BytesWritable serialized = new BytesWritable(test_ab.toByteArray()); - AddressBook ab2 = (AddressBook) deserializer.deserialize(serialized); - assertTrue(test_ab.equals(ab2)); + BytesWritable serialized = new BytesWritable(test_ab.build().toByteArray()); + AddressBook.Builder ab2 = (AddressBook.Builder) deserializer.deserialize(serialized); + assertTrue(test_ab.build().equals(ab2.build())); } @Test @@ -84,7 +83,7 @@ public final void testObjectInspector() throws SerDeException { assertEquals(persons.get(0).getId(), 1); assertEquals(persons.get(1).getPhoneCount(), 1); - assertEquals(persons.get(1).getPhone(0), test_pn); + assertEquals(persons.get(1).getPhone(0), test_pn.build()); assertEquals(persons.get(1).getPhone(0).getType(), PhoneType.MOBILE); assertEquals(persons.get(2).getPhoneCount(), 0); @@ -98,11 +97,16 @@ public final void testObjectInspectorGetStructFieldData() throws SerDeException checkFields(PhoneNumber.getDescriptor().getFields(), test_pn); } - private void checkFields(List fields, Message message) { + private void checkFields(List fields, Message.Builder builder) { + ProtobufStructField psf = null; for (FieldDescriptor fieldDescriptor : fields) { - ProtobufStructField psf = new ProtobufStructField(fieldDescriptor); - Object data = protobufOI.getStructFieldData(message, psf); - Object target = message.getField(fieldDescriptor); + if (fieldDescriptor.getJavaType() != FieldDescriptor.JavaType.MESSAGE) { + psf = new ProtobufStructField(fieldDescriptor); + } else { + psf = new ProtobufStructField(fieldDescriptor, builder.newBuilderForField(fieldDescriptor)); + } + Object data = protobufOI.getStructFieldData(builder, psf); + Object target = builder.getField(fieldDescriptor); if (fieldDescriptor.getType() == Type.ENUM) { assertEquals(String.class, data.getClass()); assertEquals(data, ((EnumValueDescriptor) target).getName()); @@ -131,7 +135,7 @@ public final void testElementObjectInspector() throws SerDeException { .getObjectInspector(); ProtobufStructObjectInspector personOI = new ProtobufStructObjectInspector( - Person.getDescriptor()); + Person.getDescriptor(), Person.newBuilder()); assertEquals(protobufOI.getStructFieldRef("person").getFieldObjectInspector().getClass(), ObjectInspectorFactory.getStandardListObjectInspector(personOI).getClass()); From fbdb41b2015135b09c986b895ba813c0c260016d Mon Sep 17 00:00:00 2001 From: Rahul Ravindran Date: Thu, 22 May 2014 19:39:40 -0700 Subject: [PATCH 2/4] Fix indentation to use 2 spaces versus 4 --- .../serde/ProtobufStructObjectInspector.java | 339 +++++++++--------- 1 file changed, 169 insertions(+), 170 deletions(-) diff --git a/hive/src/main/java/com/twitter/elephantbird/hive/serde/ProtobufStructObjectInspector.java b/hive/src/main/java/com/twitter/elephantbird/hive/serde/ProtobufStructObjectInspector.java index dbc33718d..050a9f08a 100644 --- a/hive/src/main/java/com/twitter/elephantbird/hive/serde/ProtobufStructObjectInspector.java +++ b/hive/src/main/java/com/twitter/elephantbird/hive/serde/ProtobufStructObjectInspector.java @@ -10,7 +10,6 @@ import com.google.protobuf.Descriptors.FieldDescriptor.Type; import com.google.protobuf.ByteString; import com.google.protobuf.Message; - import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; @@ -20,201 +19,201 @@ public final class ProtobufStructObjectInspector extends SettableStructObjectInspector { - public static class ProtobufStructField implements StructField { - - private ObjectInspector oi = null; - private String comment = null; - private FieldDescriptor fieldDescriptor = null; - private Descriptor descriptor = null; - private Message.Builder builder = null; - - @SuppressWarnings("unchecked") - public ProtobufStructField(FieldDescriptor fieldDescriptor) { - assert (fieldDescriptor != null); - this.fieldDescriptor = fieldDescriptor; - this.descriptor = null; - oi = this.createOIForField(); - } - - @SuppressWarnings("unchecked") - public ProtobufStructField(FieldDescriptor fd, Message.Builder builder) { - this.descriptor = fd.getMessageType(); - this.fieldDescriptor = fd; - this.builder = builder; - oi = this.createOIForField(); - } + public static class ProtobufStructField implements StructField { - @Override - public String getFieldName() { - return fieldDescriptor.getName(); - } - - @Override - public ObjectInspector getFieldObjectInspector() { - return oi; - } - - @Override - public String getFieldComment() { - return comment; - } + private ObjectInspector oi = null; + private String comment = null; + private FieldDescriptor fieldDescriptor = null; + private Descriptor descriptor = null; + private Message.Builder builder = null; - public FieldDescriptor getFieldDescriptor() { - return fieldDescriptor; - } - - private PrimitiveCategory getPrimitiveCategory(JavaType fieldType) { - switch (fieldType) { - case INT: - return PrimitiveCategory.INT; - case LONG: - return PrimitiveCategory.LONG; - case FLOAT: - return PrimitiveCategory.FLOAT; - case DOUBLE: - return PrimitiveCategory.DOUBLE; - case BOOLEAN: - return PrimitiveCategory.BOOLEAN; - case STRING: - return PrimitiveCategory.STRING; - case BYTE_STRING: - return PrimitiveCategory.BINARY; - case ENUM: - return PrimitiveCategory.STRING; - default: - return null; - } - } - - private ObjectInspector createOIForField() { - ObjectInspector elementOI = null; - if (descriptor != null) { - // this field is a Message - elementOI = new ProtobufStructObjectInspector(descriptor, builder); - } else { - JavaType fieldType = fieldDescriptor.getJavaType(); - PrimitiveCategory category = getPrimitiveCategory(fieldType); - if (category != null) { - elementOI = PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(category); - } - } - if (fieldDescriptor.isRepeated()) { - return ObjectInspectorFactory.getStandardListObjectInspector(elementOI); - } else { - return elementOI; - } - } + @SuppressWarnings("unchecked") + public ProtobufStructField(FieldDescriptor fieldDescriptor) { + assert (fieldDescriptor != null); + this.fieldDescriptor = fieldDescriptor; + this.descriptor = null; + oi = this.createOIForField(); } - private Descriptor descriptor; - private Message.Builder builder; - private List structFields = Lists.newArrayList(); - - ProtobufStructObjectInspector(Descriptor descriptor, Message.Builder builder) { - this.descriptor = descriptor; - this.builder = builder; - for (FieldDescriptor fd : descriptor.getFields()) { - if (fd.getType() != Type.MESSAGE) { - structFields.add(new ProtobufStructField(fd)); - } else { - structFields.add(new ProtobufStructField(fd, builder.newBuilderForField(fd))); - } - } + @SuppressWarnings("unchecked") + public ProtobufStructField(FieldDescriptor fd, Message.Builder builder) { + this.descriptor = fd.getMessageType(); + this.fieldDescriptor = fd; + this.builder = builder; + oi = this.createOIForField(); } @Override - public Category getCategory() { - return Category.STRUCT; + public String getFieldName() { + return fieldDescriptor.getName(); } @Override - public String getTypeName() { - StringBuilder sb = new StringBuilder("struct<"); - boolean first = true; - for (StructField structField : getAllStructFieldRefs()) { - if (first) { - first = false; - } else { - sb.append(","); - } - sb.append(structField.getFieldName()).append(":") - .append(structField.getFieldObjectInspector().getTypeName()); - } - sb.append(">"); - return sb.toString(); + public ObjectInspector getFieldObjectInspector() { + return oi; } @Override - public Object create() { - return builder; + public String getFieldComment() { + return comment; } - @Override - public Object setStructFieldData(Object data, StructField field, Object fieldValue) { - Message.Builder builder = (Message.Builder) data; - ProtobufStructField psf = (ProtobufStructField) field; - FieldDescriptor fieldDescriptor = psf.getFieldDescriptor(); - if (fieldDescriptor.getType() != Type.MESSAGE) { - builder.setField(descriptor.findFieldByName(field.getFieldName()), fieldValue); - } else { - Message.Builder subFieldBuilder = (Message.Builder)fieldValue; - builder.setField(descriptor.findFieldByName(field.getFieldName()), subFieldBuilder.build()); - } - return builder; + public FieldDescriptor getFieldDescriptor() { + return fieldDescriptor; + } + private PrimitiveCategory getPrimitiveCategory(JavaType fieldType) { + switch (fieldType) { + case INT: + return PrimitiveCategory.INT; + case LONG: + return PrimitiveCategory.LONG; + case FLOAT: + return PrimitiveCategory.FLOAT; + case DOUBLE: + return PrimitiveCategory.DOUBLE; + case BOOLEAN: + return PrimitiveCategory.BOOLEAN; + case STRING: + return PrimitiveCategory.STRING; + case BYTE_STRING: + return PrimitiveCategory.BINARY; + case ENUM: + return PrimitiveCategory.STRING; + default: + return null; + } } - @Override - public List getAllStructFieldRefs() { - return structFields; + private ObjectInspector createOIForField() { + ObjectInspector elementOI = null; + if (descriptor != null) { + // this field is a Message + elementOI = new ProtobufStructObjectInspector(descriptor, builder); + } else { + JavaType fieldType = fieldDescriptor.getJavaType(); + PrimitiveCategory category = getPrimitiveCategory(fieldType); + if (category != null) { + elementOI = PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(category); + } + } + if (fieldDescriptor.isRepeated()) { + return ObjectInspectorFactory.getStandardListObjectInspector(elementOI); + } else { + return elementOI; + } + } + } + + private Descriptor descriptor; + private Message.Builder builder; + private List structFields = Lists.newArrayList(); + + ProtobufStructObjectInspector(Descriptor descriptor, Message.Builder builder) { + this.descriptor = descriptor; + this.builder = builder; + for (FieldDescriptor fd : descriptor.getFields()) { + if (fd.getType() != Type.MESSAGE) { + structFields.add(new ProtobufStructField(fd)); + } else { + structFields.add(new ProtobufStructField(fd, builder.newBuilderForField(fd))); + } + } + } + + @Override + public Category getCategory() { + return Category.STRUCT; + } + + @Override + public String getTypeName() { + StringBuilder sb = new StringBuilder("struct<"); + boolean first = true; + for (StructField structField : getAllStructFieldRefs()) { + if (first) { + first = false; + } else { + sb.append(","); + } + sb.append(structField.getFieldName()).append(":") + .append(structField.getFieldObjectInspector().getTypeName()); + } + sb.append(">"); + return sb.toString(); + } + + @Override + public Object create() { + return builder; + } + + @Override + public Object setStructFieldData(Object data, StructField field, Object fieldValue) { + Message.Builder builder = (Message.Builder) data; + ProtobufStructField psf = (ProtobufStructField) field; + FieldDescriptor fieldDescriptor = psf.getFieldDescriptor(); + if (fieldDescriptor.getType() != Type.MESSAGE) { + builder.setField(descriptor.findFieldByName(field.getFieldName()), fieldValue); + } else { + Message.Builder subFieldBuilder = (Message.Builder) fieldValue; + builder.setField(descriptor.findFieldByName(field.getFieldName()), subFieldBuilder.build()); } + return builder; - @Override - public Object getStructFieldData(Object data, StructField structField) { - if (data == null) { - return null; - } - Message.Builder m = ((Message.Builder) data); - ProtobufStructField psf = (ProtobufStructField) structField; - FieldDescriptor fieldDescriptor = psf.getFieldDescriptor(); + } - Object result = m.getField(fieldDescriptor); - if (fieldDescriptor.getType() == Type.ENUM) { - return ((EnumValueDescriptor) result).getName(); - } - if (fieldDescriptor.getType() == Type.BYTES && (result instanceof ByteString)) { - return ((ByteString) result).toByteArray(); - } + @Override + public List getAllStructFieldRefs() { + return structFields; + } - if (fieldDescriptor.getType() == Type.MESSAGE && !fieldDescriptor.isRepeated()) { - result = ((Message) result).toBuilder(); - } + @Override + public Object getStructFieldData(Object data, StructField structField) { + if (data == null) { + return null; + } + Message.Builder m = ((Message.Builder) data); + ProtobufStructField psf = (ProtobufStructField) structField; + FieldDescriptor fieldDescriptor = psf.getFieldDescriptor(); - return result; + Object result = m.getField(fieldDescriptor); + if (fieldDescriptor.getType() == Type.ENUM) { + return ((EnumValueDescriptor) result).getName(); + } + if (fieldDescriptor.getType() == Type.BYTES && (result instanceof ByteString)) { + return ((ByteString) result).toByteArray(); } - @Override - public StructField getStructFieldRef(String fieldName) { - FieldDescriptor fd = descriptor.findFieldByName(fieldName); - ProtobufStructField result = null; - if (fd.getType() != Type.MESSAGE) { - result = new ProtobufStructField(fd); - } else { - result = new ProtobufStructField(fd, builder.newBuilderForField(fd)); - } - return result; + if (fieldDescriptor.getType() == Type.MESSAGE && !fieldDescriptor.isRepeated()) { + result = ((Message) result).toBuilder(); } - @Override - public List getStructFieldsDataAsList(Object data) { - if (data == null) { - return null; - } - List result = Lists.newArrayList(); - Message.Builder m = (Message.Builder) data; - for (FieldDescriptor fd : descriptor.getFields()) { - result.add(m.getField(fd)); - } - return result; + return result; + } + + @Override + public StructField getStructFieldRef(String fieldName) { + FieldDescriptor fd = descriptor.findFieldByName(fieldName); + ProtobufStructField result = null; + if (fd.getType() != Type.MESSAGE) { + result = new ProtobufStructField(fd); + } else { + result = new ProtobufStructField(fd, builder.newBuilderForField(fd)); + } + return result; + } + + @Override + public List getStructFieldsDataAsList(Object data) { + if (data == null) { + return null; + } + List result = Lists.newArrayList(); + Message.Builder m = (Message.Builder) data; + for (FieldDescriptor fd : descriptor.getFields()) { + result.add(m.getField(fd)); } + return result; + } } From ad4c69430e38545245d5a204d1d884f339dcf8de Mon Sep 17 00:00:00 2001 From: Rahul Ravindran Date: Thu, 22 May 2014 19:41:45 -0700 Subject: [PATCH 3/4] add back extra line break --- .../elephantbird/hive/serde/ProtobufStructObjectInspector.java | 1 + 1 file changed, 1 insertion(+) diff --git a/hive/src/main/java/com/twitter/elephantbird/hive/serde/ProtobufStructObjectInspector.java b/hive/src/main/java/com/twitter/elephantbird/hive/serde/ProtobufStructObjectInspector.java index 050a9f08a..6cdf94303 100644 --- a/hive/src/main/java/com/twitter/elephantbird/hive/serde/ProtobufStructObjectInspector.java +++ b/hive/src/main/java/com/twitter/elephantbird/hive/serde/ProtobufStructObjectInspector.java @@ -10,6 +10,7 @@ import com.google.protobuf.Descriptors.FieldDescriptor.Type; import com.google.protobuf.ByteString; import com.google.protobuf.Message; + import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; From baa55add49a2ac8ebd42993fb48af81adf67470a Mon Sep 17 00:00:00 2001 From: Rahul Ravindran Date: Thu, 22 May 2014 19:42:59 -0700 Subject: [PATCH 4/4] remove assert --- .../elephantbird/hive/serde/ProtobufStructObjectInspector.java | 1 - 1 file changed, 1 deletion(-) diff --git a/hive/src/main/java/com/twitter/elephantbird/hive/serde/ProtobufStructObjectInspector.java b/hive/src/main/java/com/twitter/elephantbird/hive/serde/ProtobufStructObjectInspector.java index 6cdf94303..50d86a27a 100644 --- a/hive/src/main/java/com/twitter/elephantbird/hive/serde/ProtobufStructObjectInspector.java +++ b/hive/src/main/java/com/twitter/elephantbird/hive/serde/ProtobufStructObjectInspector.java @@ -30,7 +30,6 @@ public static class ProtobufStructField implements StructField { @SuppressWarnings("unchecked") public ProtobufStructField(FieldDescriptor fieldDescriptor) { - assert (fieldDescriptor != null); this.fieldDescriptor = fieldDescriptor; this.descriptor = null; oi = this.createOIForField();