Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/master' into apacheGH-3035-parqu…
Browse files Browse the repository at this point in the history
…et-rewriter-add-column-renaming-feature
  • Loading branch information
maxim_konstantinov committed Nov 9, 2024
2 parents a4eccc6 + 4fb7518 commit 1565d73
Show file tree
Hide file tree
Showing 7 changed files with 83 additions and 19 deletions.
22 changes: 11 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,19 +43,19 @@ Parquet-Java uses Maven to build and depends on the thrift compiler (protoc is n
To build and install the thrift compiler, run:

```
wget -nv https://archive.apache.org/dist/thrift/0.20.0/thrift-0.20.0.tar.gz
tar xzf thrift-0.20.0.tar.gz
cd thrift-0.20.0
wget -nv https://archive.apache.org/dist/thrift/0.21.0/thrift-0.21.0.tar.gz
tar xzf thrift-0.21.0.tar.gz
cd thrift-0.21.0
chmod +x ./configure
./configure --disable-libs
sudo make install -j
```

If you're on OSX and use homebrew, you can instead install Thrift 0.20.0 with `brew` and ensure that it comes first in your `PATH`.
If you're on OSX and use homebrew, you can instead install Thrift 0.21.0 with `brew` and ensure that it comes first in your `PATH`.

```
brew install thrift
export PATH="/usr/local/opt/thrift@0.20.0/bin:$PATH"
export PATH="/usr/local/opt/thrift@0.21.0/bin:$PATH"
```

### Build Parquet with Maven
Expand Down Expand Up @@ -125,7 +125,7 @@ Protobuf conversion is implemented via the [parquet-protobuf](https://github.com

### Create your own objects
* The ParquetOutputFormat can be provided a WriteSupport to write your own objects to an event based RecordConsumer.
* the ParquetInputFormat can be provided a ReadSupport to materialize your own objects by implementing a RecordMaterializer
* The ParquetInputFormat can be provided a ReadSupport to materialize your own objects by implementing a RecordMaterializer

See the APIs:
* [Record conversion API](https://github.com/apache/parquet-java/tree/master/parquet-column/src/main/java/org/apache/parquet/io/api)
Expand Down Expand Up @@ -166,29 +166,29 @@ The build runs in [GitHub Actions](https://github.com/apache/parquet-java/action

## Add Parquet as a dependency in Maven

The current release is version `1.14.1`
The current release is version `1.14.3`.

```xml
<dependencies>
<dependency>
<groupId>org.apache.parquet</groupId>
<artifactId>parquet-common</artifactId>
<version>1.14.1</version>
<version>1.14.3</version>
</dependency>
<dependency>
<groupId>org.apache.parquet</groupId>
<artifactId>parquet-encoding</artifactId>
<version>1.14.1</version>
<version>1.14.3</version>
</dependency>
<dependency>
<groupId>org.apache.parquet</groupId>
<artifactId>parquet-column</artifactId>
<version>1.14.1</version>
<version>1.14.3</version>
</dependency>
<dependency>
<groupId>org.apache.parquet</groupId>
<artifactId>parquet-hadoop</artifactId>
<version>1.14.1</version>
<version>1.14.3</version>
</dependency>
</dependencies>
```
Expand Down
2 changes: 1 addition & 1 deletion dev/ci-before_install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
# This script gets invoked by the CI system in a "before install" step
################################################################################

export THRIFT_VERSION=0.20.0
export THRIFT_VERSION=0.21.0

set -e
date
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,14 @@

import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
import java.io.EOFException;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.channels.Channels;
import java.nio.channels.ReadableByteChannel;
import java.nio.channels.WritableByteChannel;
import java.util.Arrays;
import java.util.List;
Expand Down Expand Up @@ -376,10 +378,18 @@ void writeInto(ByteBuffer buffer) {
ByteBuffer workBuf = buffer.duplicate();
int pos = buffer.position();
workBuf.limit(pos + byteCount);
Channels.newChannel(in).read(workBuf);
ReadableByteChannel channel = Channels.newChannel(in);
int remaining = byteCount;
while (remaining > 0) {
int bytesRead = channel.read(workBuf);
if (bytesRead < 0) {
throw new EOFException("Reached the end of stream with " + remaining + " bytes left to read");
}
remaining -= bytesRead;
}
buffer.position(pos + byteCount);
} catch (IOException e) {
new RuntimeException("Exception occurred during reading input stream", e);
throw new RuntimeException("Exception occurred during reading input stream", e);
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.parquet.bytes;

import java.io.ByteArrayInputStream;

public class AvailableAgnosticInputStream extends ByteArrayInputStream {

public AvailableAgnosticInputStream(byte[] buf) {
super(buf);
}

// In practice, there are some implementations always return 0 even if they has more data
@Override
public synchronized int available() {
return 0;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,20 @@ public void testFromInputStream() throws IOException {
validate(data, factory);
}

@Test
public void testFromLargeAvailableAgnosticInputStream() throws IOException {
// allocate a bytes that large than
// java.nio.channel.Channels.ReadableByteChannelImpl.TRANSFER_SIZE = 8192
byte[] data = new byte[9 * 1024];
RANDOM.nextBytes(data);
byte[] input = new byte[data.length + 10];
RANDOM.nextBytes(input);
System.arraycopy(data, 0, input, 0, data.length);
Supplier<BytesInput> factory = () -> BytesInput.from(new AvailableAgnosticInputStream(input), 9 * 1024);

validate(data, factory);
}

@Test
public void testFromByteArrayOutputStream() throws IOException {
byte[] data = new byte[1000];
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -654,6 +654,11 @@ public ProtoTimeConverter(
this.logicalTypeAnnotation = logicalTypeAnnotation;
}

@Override
public void addInt(int value) {
addLong(value);
}

@Override
public void addLong(long value) {
LocalTime localTime;
Expand Down
10 changes: 5 additions & 5 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<groupId>org.apache</groupId>
<artifactId>apache</artifactId>
<version>32</version>
<version>33</version>
</parent>

<groupId>org.apache.parquet</groupId>
Expand All @@ -27,13 +27,13 @@
<licenses>
<license>
<name>The Apache Software License, Version 2.0</name>
<url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
<url>https://www.apache.org/licenses/LICENSE-2.0.txt</url>
</license>
</licenses>

<issueManagement>
<system>JIRA</system>
<url>https://issues.apache.org/jira/browse/PARQUET</url>
<system>GitHub</system>
<url>https://github.com/apache/parquet-java/issues</url>
</issueManagement>

<mailingLists>
Expand Down Expand Up @@ -86,7 +86,7 @@
<pig.version>0.16.0</pig.version>
<pig.classifier>h2</pig.classifier>
<thrift-maven-plugin.version>0.10.0</thrift-maven-plugin.version>
<thrift.version>0.20.0</thrift.version>
<thrift.version>0.21.0</thrift.version>
<format.thrift.version>${thrift.version}</format.thrift.version>
<fastutil.version>8.5.13</fastutil.version>
<semver.api.version>0.9.33</semver.api.version>
Expand Down

0 comments on commit 1565d73

Please sign in to comment.