Skip to content

Commit

Permalink
TEZ-4526: Avoid calling LocationProvider#getPreferredLocations multip…
Browse files Browse the repository at this point in the history
…le times while generating grouped splits (#323)
  • Loading branch information
SourabhBadhya authored Jan 9, 2024
1 parent 174d0d1 commit 2161124
Showing 1 changed file with 6 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -208,10 +208,12 @@ public List<GroupedSplitContainer> getGroupedSplits(Configuration conf,

long totalLength = 0;
Map<String, LocationHolder> distinctLocations = createLocationsMap(conf);
Map<SplitContainer, String[]> splitToLocationsMap = new HashMap<>(originalSplits.size());
// go through splits and add them to locations
for (SplitContainer split : originalSplits) {
totalLength += estimator.getEstimatedSize(split);
String[] locations = locationProvider.getPreferredLocations(split);
splitToLocationsMap.put(split, locations);
if (locations == null || locations.length == 0) {
locations = emptyLocations;
allSplitsHaveLocalhost = false;
Expand Down Expand Up @@ -293,7 +295,7 @@ public List<GroupedSplitContainer> getGroupedSplits(Configuration conf,
groupedSplits = new ArrayList<GroupedSplitContainer>(originalSplits.size());
for (SplitContainer split : originalSplits) {
GroupedSplitContainer newSplit =
new GroupedSplitContainer(1, wrappedInputFormatName, cleanupLocations(locationProvider.getPreferredLocations(split)),
new GroupedSplitContainer(1, wrappedInputFormatName, cleanupLocations(splitToLocationsMap.get(split)),
null);
newSplit.addSplit(split);
groupedSplits.add(newSplit);
Expand All @@ -314,7 +316,7 @@ public List<GroupedSplitContainer> getGroupedSplits(Configuration conf,
Set<String> locSet = new HashSet<String>();
for (SplitContainer split : originalSplits) {
locSet.clear();
String[] locations = locationProvider.getPreferredLocations(split);
String[] locations = splitToLocationsMap.get(split);
if (locations == null || locations.length == 0) {
locations = emptyLocations;
}
Expand Down Expand Up @@ -408,7 +410,7 @@ public List<GroupedSplitContainer> getGroupedSplits(Configuration conf,
groupLocation = null;
} else if (doingRackLocal) {
for (SplitContainer splitH : group) {
String[] locations = locationProvider.getPreferredLocations(splitH);
String[] locations = splitToLocationsMap.get(splitH);
if (locations != null) {
for (String loc : locations) {
if (loc != null) {
Expand Down Expand Up @@ -503,7 +505,7 @@ public List<GroupedSplitContainer> getGroupedSplits(Configuration conf,
}
numRackSplitsToGroup--;
rackSet.clear();
String[] locations = locationProvider.getPreferredLocations(split);
String[] locations = splitToLocationsMap.get(split);
if (locations == null || locations.length == 0) {
locations = emptyLocations;
}
Expand Down

0 comments on commit 2161124

Please sign in to comment.