Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Testing zero-copy bugs fixes (not for merging) #1156

Open
wants to merge 31 commits into
base: master
Choose a base branch
from
Open
Changes from 1 commit
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
0d2a4dc
RATIS-2164. LeakDetector has a race condition.
szetszwo Sep 28, 2024
205c720
Fix a bug and checkstyle.
szetszwo Sep 28, 2024
58f296f
Enable advanced detection for debugging.
szetszwo Sep 28, 2024
67577ff
Fixed some bugs.
szetszwo Sep 28, 2024
90321af
Some minor changes.
szetszwo Sep 28, 2024
1c5c6eb
try-catch MiniRaftCluster shutdown.
szetszwo Sep 29, 2024
9159532
Report earlier leaks at shutdown.
szetszwo Sep 29, 2024
0f4b61e
Enable advance leak detection.
szetszwo Sep 29, 2024
fe29cde
Move the enable method to ReferenceCountedLeakDetector.
szetszwo Sep 29, 2024
7a6fef9
Use HashMap.
szetszwo Oct 3, 2024
77db48e
Fix a bug in LogAppenderDefault.
szetszwo Oct 3, 2024
c8e3ac8
Rewrite AdvancedTracing.
szetszwo Oct 4, 2024
aea498f
Fix a bug in LogSegment cache.
szetszwo Oct 4, 2024
0104ece
Add synchronized to get()
szetszwo Oct 4, 2024
43980fb
Fix javac error.
szetszwo Oct 4, 2024
9150bdc
Restore RaftBasicTests.
szetszwo Oct 4, 2024
38f5c69
Move ReferenceCountedLeakDetector.enable(..) to MiniRaftCluster.
szetszwo Oct 4, 2024
23af8ed
Fix bugs in LogSegment.EntryCache.
szetszwo Oct 5, 2024
3512387
Fix a bug in SimpleStateMachine4Testing.
szetszwo Oct 5, 2024
b548373
Copy LogEntryProto in SimpleStateMachine4Testing.
szetszwo Oct 5, 2024
c4ac263
Use Throwable in MiniRaftCluster.
szetszwo Oct 5, 2024
b831226
New entries can to added after EntryCache is closed.
szetszwo Oct 6, 2024
55a3896
Bump test related plugin versions.
szetszwo Oct 7, 2024
1d49431
Reduce messages to 100
szetszwo Oct 7, 2024
b57a748
Fix checkstyle.
szetszwo Oct 7, 2024
483b6ae
Resest test Xmx to 2g
szetszwo Oct 7, 2024
6547e14
Retry assertNoLeaks multiple times.
szetszwo Oct 7, 2024
dc690e4
Copy log entries in MemoryRaftLog.
szetszwo Oct 7, 2024
632809e
SegmentedRaftLogWorker should clean up unfinished tasks in the queue.
szetszwo Oct 7, 2024
3b07ab9
Fix checkstyle
szetszwo Oct 7, 2024
6c15124
Revert pom.xml changes.
szetszwo Oct 7, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Retry assertNoLeaks multiple times.
szetszwo committed Oct 7, 2024
commit 6547e14d1500cefc8a292765bf520195ae973784
22 changes: 17 additions & 5 deletions ratis-common/src/main/java/org/apache/ratis/util/LeakDetector.java
Original file line number Diff line number Diff line change
@@ -79,9 +79,9 @@ synchronized LeakTracker add(Object referent, ReferenceQueue<Object> queue, Supp
return tracker;
}

synchronized void assertNoLeaks() {
synchronized int getNumLeaks(boolean throwException) {
if (set.isEmpty()) {
return;
return 0;
}

int n = 0;
@@ -90,7 +90,10 @@ synchronized void assertNoLeaks() {
n++;
}
}
assertNoLeaks(n);
if (throwException) {
assertNoLeaks(n);
}
return n;
}

synchronized void assertNoLeaks(int leaks) {
@@ -151,12 +154,21 @@ Runnable track(Object leakable, Supplier<String> reportLeak) {
return allLeaks.add(leakable, queue, reportLeak)::remove;
}

public void assertNoLeaks() {
public void assertNoLeaks(int maxRetries) throws InterruptedException {
synchronized (leakMessages) {
Preconditions.assertTrue(leakMessages.isEmpty(),
() -> "#leaks = " + leakMessages.size() + "\n" + leakMessages);
}
allLeaks.assertNoLeaks();

for(int i = 0; i < maxRetries; i++) {
final int numLeaks = allLeaks.getNumLeaks(false);
if (numLeaks == 0) {
return;
}
LOG.warn("{}/{}) numLeaks == {} > 0, will wait and retry ...", i, maxRetries, numLeaks);
TimeDuration.ONE_SECOND.sleep();
}
allLeaks.getNumLeaks(true);
}

private static final class LeakTracker extends WeakReference<Object> {
Original file line number Diff line number Diff line change
@@ -854,6 +854,7 @@ public void shutdown() {
LOG.info("*** ");
LOG.info("************************************************************** ");
LOG.info(printServers());
final int maxRetries = 30;

// TODO: classes like RaftLog may throw uncaught exception during shutdown (e.g. write after close)
ExitUtils.setTerminateOnUncaughtException(false);
@@ -864,7 +865,19 @@ public void shutdown() {
try {
executor.shutdown();
// just wait for a few seconds
executor.awaitTermination(5, TimeUnit.SECONDS);
boolean terminated = false;

for(int i = 0; i < maxRetries && !terminated; ) {
terminated = executor.awaitTermination(1, TimeUnit.SECONDS);
if (!terminated) {
i++;
if (i < maxRetries) {
LOG.warn("Not yet able to shutdown executor {}/{}, will wait again ...", i, maxRetries);
} else {
LOG.error("Failed to shutdown executor, some servers may be still running:\n{}", printServers());
}
}
}
} catch (InterruptedException e) {
LOG.warn("shutdown interrupted", e);
Thread.currentThread().interrupt();
@@ -878,9 +891,13 @@ public void shutdown() {
try {
RaftTestUtil.gc();
} catch (InterruptedException e) {
LOG.info("gc interrupted.");
LOG.warn("gc interrupted.", e);
}
try {
ReferenceCountedLeakDetector.getLeakDetector().assertNoLeaks(maxRetries);
} catch (InterruptedException e) {
LOG.warn("LeakDetector interrupted.", e);
}
ReferenceCountedLeakDetector.getLeakDetector().assertNoLeaks();
}

/**