Skip to content

Commit

Permalink
GH-3040: DictionaryFilter.canDrop may return false positive result wh…
Browse files Browse the repository at this point in the history
…en dict size exceeds 8k (#3041)

* GH-3040: DictionaryFilter.canDrop may return false positive result when dict size exceeds 8k

* style

* check bytesRead

* import
  • Loading branch information
pan3793 authored and Fokko committed Nov 6, 2024
1 parent 9584632 commit 47fe3d3
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,14 @@

import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
import java.io.EOFException;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.channels.Channels;
import java.nio.channels.ReadableByteChannel;
import java.nio.channels.WritableByteChannel;
import java.util.Arrays;
import java.util.List;
Expand Down Expand Up @@ -376,7 +378,15 @@ void writeInto(ByteBuffer buffer) {
ByteBuffer workBuf = buffer.duplicate();
int pos = buffer.position();
workBuf.limit(pos + byteCount);
Channels.newChannel(in).read(workBuf);
ReadableByteChannel channel = Channels.newChannel(in);
int remaining = byteCount;
while (remaining > 0) {
int bytesRead = channel.read(workBuf);
if (bytesRead < 0) {
throw new EOFException("Reached the end of stream with " + remaining + " bytes left to read");
}
remaining -= bytesRead;
}
buffer.position(pos + byteCount);
} catch (IOException e) {
new RuntimeException("Exception occurred during reading input stream", e);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.parquet.bytes;

import java.io.ByteArrayInputStream;

public class AvailableAgnosticInputStream extends ByteArrayInputStream {

public AvailableAgnosticInputStream(byte[] buf) {
super(buf);
}

// In practice, there are some implementations always return 0 even if they has more data
@Override
public synchronized int available() {
return 0;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,20 @@ public void testFromInputStream() throws IOException {
validate(data, factory);
}

@Test
public void testFromLargeAvailableAgnosticInputStream() throws IOException {
// allocate a bytes that large than
// java.nio.channel.Channels.ReadableByteChannelImpl.TRANSFER_SIZE = 8192
byte[] data = new byte[9 * 1024];
RANDOM.nextBytes(data);
byte[] input = new byte[data.length + 10];
RANDOM.nextBytes(input);
System.arraycopy(data, 0, input, 0, data.length);
Supplier<BytesInput> factory = () -> BytesInput.from(new AvailableAgnosticInputStream(input), 9 * 1024);

validate(data, factory);
}

@Test
public void testFromByteArrayOutputStream() throws IOException {
byte[] data = new byte[1000];
Expand Down

0 comments on commit 47fe3d3

Please sign in to comment.