Skip to content

Commit

Permalink
[Fix](multi-catalog) Filter invisible files for hive table. (apache#…
Browse files Browse the repository at this point in the history
…21867)

In fact, hive can not read files which startswith "." or "_", so we need filter these files.
  • Loading branch information
dutyu authored Jul 18, 2023
1 parent 417e3e5 commit 50b81a9
Showing 1 changed file with 21 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@
import org.apache.doris.planner.ListPartitionPrunerV2;
import org.apache.doris.planner.PartitionPrunerV2Base.UniqueId;
import org.apache.doris.planner.external.FileSplit;
import org.apache.doris.spi.Split;

import com.google.common.base.Preconditions;
import com.google.common.base.Strings;
Expand All @@ -66,6 +65,7 @@
import com.google.common.collect.Streams;
import com.google.common.collect.TreeRangeMap;
import lombok.Data;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.math.NumberUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
Expand Down Expand Up @@ -957,7 +957,7 @@ public static class FileCacheValue {
// File Cache for self splitter.
private final List<HiveFileStatus> files = Lists.newArrayList();
// File split cache for old splitter. This is a temp variable.
private final List<Split> splits = Lists.newArrayList();
private final List<FileSplit> splits = Lists.newArrayList();
private boolean isSplittable;
// The values of partitions.
// e.g for file : hdfs://path/to/table/part1=a/part2=b/datafile
Expand All @@ -967,17 +967,21 @@ public static class FileCacheValue {
private AcidInfo acidInfo;

public void addFile(RemoteFile file) {
HiveFileStatus status = new HiveFileStatus();
status.setBlockLocations(file.getBlockLocations());
status.setPath(file.getPath());
status.length = file.getSize();
status.blockSize = file.getBlockSize();
status.modificationTime = file.getModificationTime();
files.add(status);
if (isFileVisible(file.getName())) {
HiveFileStatus status = new HiveFileStatus();
status.setBlockLocations(file.getBlockLocations());
status.setPath(file.getPath());
status.length = file.getSize();
status.blockSize = file.getBlockSize();
status.modificationTime = file.getModificationTime();
files.add(status);
}
}

public void addSplit(Split split) {
splits.add(split);
public void addSplit(FileSplit split) {
if (isFileVisible(split.getPath().getName())) {
splits.add(split);
}
}

public int getValuesSize() {
Expand All @@ -992,6 +996,12 @@ public AcidInfo getAcidInfo() {
public void setAcidInfo(AcidInfo acidInfo) {
this.acidInfo = acidInfo;
}

private boolean isFileVisible(String filename) {
return StringUtils.isNotEmpty(filename)
&& !filename.startsWith(".")
&& !filename.startsWith("_");
}
}

@Data
Expand Down

0 comments on commit 50b81a9

Please sign in to comment.