apache · mikemccand · Jun 24, 2021 · Apr 29, 2021 · Apr 29, 2021 · May 11, 2021
diff --git a/lucene/benchmark/conf/indexing-flush-by-RAM-multithreaded.alg b/lucene/benchmark/conf/indexing-flush-by-RAM-multithreaded.alg
@@ -53,7 +53,7 @@ log.queries=true
 
     { "Populate"
         CreateIndex
-        [{ "MAddDocs" AddDoc } : 5000] : 4
+        [{ {{"MAddDocs" AddDoc } : 5000} FlushIndex } ] : 8
         ForceMerge(1)
         CloseIndex
     }

diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersContentSource.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersContentSource.java
@@ -50,8 +50,9 @@ private static final class DateFormatInfo {
   private ThreadLocal<DateFormatInfo> dateFormat = new ThreadLocal<>();
   private Path dataDir = null;
   private ArrayList<Path> inputFiles = new ArrayList<>();
-  private int nextFile = 0;
   private int iteration = 0;
+  private int[] threadIndex;
+  private volatile boolean threadIndexCreated;
 
   @Override
   public void setConfig(Config config) {
@@ -102,19 +103,33 @@ public void close() throws IOException {
   public DocData getNextDocData(DocData docData) throws NoMoreDataException, IOException {
     Path f = null;
     String name = null;
-    synchronized (this) {
-      if (nextFile >= inputFiles.size()) {
-        // exhausted files, start a new round, unless forever set to false.
-        if (!forever) {
-          throw new NoMoreDataException();
-        }
-        nextFile = 0;
-        iteration++;
-      }
-      f = inputFiles.get(nextFile++);
-      name = f.toRealPath() + "_" + iteration;
+    int inputFilesSize = inputFiles.size();
+
+    if (threadIndexCreated == false) {
+      createThreadIndex();
+    }
+
+    // Getting file index value which is set for each thread
+    int index = Integer.parseInt(Thread.currentThread().getName().substring(12));
+    int fIndex = index + threadIndex[index] * threadIndex.length;
+    threadIndex[index]++;
+
+    // Sanity check, if # threads is greater than # input files, wrap index
+    if (index >= inputFilesSize) {
+      index %= inputFilesSize;
     }
 
+    // Check if this thread has exhausted its files
+    if (fIndex >= inputFilesSize) {
+      threadIndex[index] = 0;
+      fIndex = index + threadIndex[index] * threadIndex.length;
+      threadIndex[index]++;
+      iteration++;
+    }
+
+    f = inputFiles.get(fIndex);
+    name = f.toRealPath() + "_" + iteration;
+
     try (BufferedReader reader = Files.newBufferedReader(f, StandardCharsets.UTF_8)) {
       // First line is the date, 3rd is the title, rest is body
       String dateStr = reader.readLine();
@@ -143,7 +158,13 @@ public DocData getNextDocData(DocData docData) throws NoMoreDataException, IOExc
   @Override
   public synchronized void resetInputs() throws IOException {
     super.resetInputs();
-    nextFile = 0;
     iteration = 0;
   }
+
+  private synchronized void createThreadIndex() {
+    if (threadIndexCreated == false) {
+      threadIndex = new int[getConfig().getNumThreads()];
+      threadIndexCreated = true;
+    }
+  }
 }
diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/FlushIndexTask.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/FlushIndexTask.java
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.benchmark.byTask.tasks;
+
+import org.apache.lucene.benchmark.byTask.PerfRunData;
+import org.apache.lucene.index.IndexWriter;
+
+/** Flush Index Task uses flushNextBuffer() to flush documents at thread level */
+public class FlushIndexTask extends PerfTask {
+
+  public FlushIndexTask(PerfRunData runData) {
+    super(runData);
+  }
+
+  @Override
+  public int doLogic() throws Exception {
+    IndexWriter iw = getRunData().getIndexWriter();
+    if (iw != null) {
+      iw.flushNextBuffer();
+    }
+    return 1;
+  }
+}
diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/TaskSequence.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/TaskSequence.java
@@ -340,12 +340,16 @@ private int doParallelTasks() throws Exception {
 
     initTasksArray();
     ParallelTask t[] = runningParallelTasks = new ParallelTask[repetitions * tasks.size()];
+    this.getRunData().getConfig().setNumThreads(t.length);
     // prepare threads
     int index = 0;
     for (int k = 0; k < repetitions; k++) {
       for (int i = 0; i < tasksArray.length; i++) {
         final PerfTask task = tasksArray[i].clone();
-        t[index++] = new ParallelTask(task);
+        t[index] = new ParallelTask(task);
+        // Set the thread name for guaranteed file index while processing.
+        t[index].setName("IndexThread-" + index);
+        index++;
       }
     }
     // run threads

diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/Config.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/Config.java
@@ -54,6 +54,7 @@ public class Config {
   private HashMap<String, Object> valByRound = new HashMap<>();
   private HashMap<String, String> colForValByRound = new HashMap<>();
   private String algorithmText;
+  private int numThreads = 1;
 
   /**
    * Read both algorithm and config properties.
@@ -113,6 +114,14 @@ public Config(Properties props) {
     }
   }
 
+  public void setNumThreads(int numThreads) {
+    this.numThreads = numThreads;
+  }
+
+  public int getNumThreads() {
+    return numThreads;
+  }
+
   @SuppressWarnings({"unchecked", "rawtypes"})
   private void printProps() {
     System.out.println("------------> config properties:");