Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve the random read behavior in StressWorkerBench #18000

Merged
merged 23 commits into from
Sep 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -71,11 +71,14 @@ public final class WorkerBenchParameters extends FileSystemParameters {
public Integer mRandomSeed = 1;

@Parameter(names = {"--random-max-length"},
description = "The random max length upper bound")
description = "The random max length upper bound."
+ "As this InputStream.read() only accept read offset and"
+ " length as Integer, so this max length must smaller than 2.1GB.")
public String mRandomMaxReadLength = "4m";

@Parameter(names = {"--random-min-length"},
description = "The random max length upper bound")
description = "The random max length lower bound."
+ "this random min length must not larger than random max length.")
public String mRandomMinReadLength = "1m";

@Parameter(names = {"--free"},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,9 @@
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.ThreadLocalRandom;
import java.util.concurrent.TimeUnit;

/**
Expand All @@ -67,13 +67,17 @@ public class StressWorkerBench extends AbstractStressBench<WorkerBenchTaskResult

private FileSystem[] mCachedFs;
private Path[] mFilePaths;
private Integer[] mOffsets;
private Integer[] mLengths;
private FileSystemContext mFsContext;

/** generate random number in range [min, max] (include both min and max).*/
private Integer randomNumInRange(Random rand, int min, int max) {
return rand.nextInt(max - min + 1) + min;
/**
* generate random number in range [min, max] (include both min and max).
*/
private long randomNumInRange(long min, long max) {
return ThreadLocalRandom.current().nextLong(min, max + 1) + min;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

note we are using thread local random here, so does the --seed parameter still take effect?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i think we no longer need this param

}

private long minLong(long a, long b) {
return a > b ? a : b;
}

/**
Expand Down Expand Up @@ -153,8 +157,6 @@ public void prepare() throws Exception {
// and offsets
mFilePaths = new Path[numFiles];
// set random offsets and lengths if enabled
mLengths = new Integer[numFiles];
mOffsets = new Integer[numFiles];

generateTestFilePaths(basePath);

Expand Down Expand Up @@ -204,16 +206,10 @@ public void prepare() throws Exception {
* @param basePath base dir where the files should be prepared
*/
public void generateTestFilePaths(Path basePath) throws IOException {
int fileSize = (int) FormatUtils.parseSpaceSize(mParameters.mFileSize);
int clusterSize = mBaseParameters.mClusterLimit;
int threads = mParameters.mThreads;
List<BlockWorkerInfo> workers = mFsContext.getCachedWorkers();

Random rand = new Random();
if (mParameters.mIsRandom) {
rand = new Random(mParameters.mRandomSeed);
}

for (int i = 0; i < clusterSize; i++) {
BlockWorkerInfo localWorker = workers.get(i);
LOG.info("Building file paths for worker {}", localWorker);
Expand All @@ -222,19 +218,6 @@ public void generateTestFilePaths(Path basePath) throws IOException {

int index = i * threads + j;
mFilePaths[index] = filePath;

// Continue init other aspects of the file read operation
// TODO(jiacheng): do we want a new randomness for every read?
if (mParameters.mIsRandom) {
int randomMin = (int) FormatUtils.parseSpaceSize(mParameters.mRandomMinReadLength);
int randomMax = (int) FormatUtils.parseSpaceSize(mParameters.mRandomMaxReadLength);
mOffsets[index] = randomNumInRange(rand, 0, fileSize - 1 - randomMin);
mLengths[index] = randomNumInRange(rand, randomMin,
voddle marked this conversation as resolved.
Show resolved Hide resolved
Integer.min(fileSize - mOffsets[i], randomMax));
} else {
mOffsets[index] = 0;
mLengths[index] = fileSize;
}
}
}
LOG.info("{} file paths generated", mFilePaths.length);
Expand Down Expand Up @@ -356,6 +339,16 @@ public void validateParams() throws Exception {
throw new IllegalStateException(String.format("%s cannot be %s when %s option provided",
FileSystemParameters.WRITE_TYPE_OPTION_NAME, WritePType.MUST_CACHE, "--free"));
}

if (FormatUtils.parseSpaceSize(mParameters.mRandomMaxReadLength) > Integer.MAX_VALUE) {
throw new IllegalArgumentException("mRandomReadMaxLength cannot be larger than 2.1G");
}

if (FormatUtils.parseSpaceSize(mParameters.mRandomMaxReadLength)
< FormatUtils.parseSpaceSize(mParameters.mRandomMinReadLength)) {
throw new IllegalArgumentException("mRandomReadMinLength must not larger"
+ " than mRandomReadMaxLength");
}
}

private static final class BenchContext {
Expand Down Expand Up @@ -402,6 +395,9 @@ private final class BenchThread implements Callable<Void> {
private final byte[] mBuffer;
private final WorkerBenchTaskResult mResult;
private final boolean mIsRandomRead;
private final long mRandomMax;
private final long mRandomMin;
Comment on lines +398 to +399
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

length should be int.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

since these two value will evaluated with long type mFileSize later, i think long would be better

private final long mFileSize;

private FSDataInputStream mInStream;

Expand All @@ -415,6 +411,9 @@ private BenchThread(BenchContext context, int targetFileIndex, FileSystem fs) {
mResult.setParameters(mParameters);
mResult.setBaseParameters(mBaseParameters);
mIsRandomRead = mParameters.mIsRandom;
mRandomMin = FormatUtils.parseSpaceSize(mParameters.mRandomMinReadLength);
mRandomMax = FormatUtils.parseSpaceSize(mParameters.mRandomMaxReadLength);
mFileSize = FormatUtils.parseSpaceSize(mParameters.mFileSize);
}

@Override
Expand Down Expand Up @@ -480,8 +479,6 @@ private void runInternal() throws Exception {
*/
private WorkerBenchDataPoint applyOperation() throws IOException {
Path filePath = mFilePaths[mTargetFileIndex];
int offset = mOffsets[mTargetFileIndex];
int length = mLengths[mTargetFileIndex];

long startOperation = CommonUtils.getCurrentMs();
if (mInStream == null) {
Expand All @@ -490,9 +487,12 @@ private WorkerBenchDataPoint applyOperation() throws IOException {

int bytesRead = 0;
if (mIsRandomRead) {
long offset = randomNumInRange(0, mFileSize - 1 - mRandomMin);
long lengthMax = Math.min(mFileSize - offset, mRandomMax);
long length = randomNumInRange(mRandomMin, lengthMax);
while (length > 0) {
int actualReadLength = mInStream
.read(offset, mBuffer, 0, mBuffer.length);
.read(offset, mBuffer, 0, (int) minLong(mBuffer.length, length));
if (actualReadLength < 0) {
closeInStream();
break;
Expand Down
Loading