-
Notifications
You must be signed in to change notification settings - Fork 13.7k
[FLINK-38477][runtime] Add FINISHED watermark status to support proper watermark aggregation #27083
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
1839938
d981905
b0d2928
a18f797
dc705b3
7545ca1
943d7e9
6de4c6b
e49e3d4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -30,6 +30,7 @@ public class FinishedOnRestoreInput<IN> implements Input<IN> { | |
private final int inputCount; | ||
|
||
private int watermarksSeen = 0; | ||
private int finishedStatusSeen = 0; | ||
|
||
public FinishedOnRestoreInput(RecordWriterOutput<?>[] streamOutputs, int inputCount) { | ||
this.streamOutputs = streamOutputs; | ||
|
@@ -58,7 +59,22 @@ public void processWatermark(Watermark watermark) { | |
|
||
@Override | ||
public void processWatermarkStatus(WatermarkStatus watermarkStatus) throws Exception { | ||
throw new IllegalStateException(); | ||
// Tasks that finished on restore may receive FINISHED watermark status from upstream. | ||
// Aggregate FINISHED status from all inputs and emit once all are received, similar to | ||
// how MAX_WATERMARK is handled. This ensures proper watermark status propagation. | ||
if (watermarkStatus.isFinished()) { | ||
if (++finishedStatusSeen == inputCount) { | ||
for (RecordWriterOutput<?> streamOutput : streamOutputs) { | ||
streamOutput.emitWatermarkStatus(watermarkStatus); | ||
} | ||
} | ||
return; | ||
} | ||
// Other watermark statuses (ACTIVE, IDLE) should not occur for finished tasks | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nits:
|
||
throw new IllegalStateException( | ||
String.format( | ||
"Unexpected watermark status [%s] received for finished on restore task", | ||
watermarkStatus)); | ||
} | ||
|
||
@Override | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -106,6 +106,7 @@ | |
import org.apache.flink.streaming.runtime.tasks.mailbox.PeriodTimer; | ||
import org.apache.flink.streaming.runtime.tasks.mailbox.TaskMailbox; | ||
import org.apache.flink.streaming.runtime.tasks.mailbox.TaskMailboxImpl; | ||
import org.apache.flink.streaming.runtime.watermarkstatus.WatermarkStatus; | ||
import org.apache.flink.util.ExceptionUtils; | ||
import org.apache.flink.util.FatalExitExceptionHandler; | ||
import org.apache.flink.util.FlinkException; | ||
|
@@ -697,6 +698,7 @@ protected void processInput(MailboxDefaultAction.Controller controller) throws E | |
protected void endData(StopMode mode) throws Exception { | ||
|
||
if (mode == StopMode.DRAIN) { | ||
emitFinishedStatus(); | ||
advanceToEndOfEventTime(); | ||
} | ||
// finish all operators in a chain effect way | ||
|
@@ -746,6 +748,41 @@ private boolean isCurrentSyncSavepoint(long checkpointId) { | |
*/ | ||
protected void advanceToEndOfEventTime() throws Exception {} | ||
|
||
/** | ||
* Emits FINISHED watermark status to indicate this task has permanently completed and should be | ||
* excluded from watermark aggregation in downstream operators. | ||
* | ||
* <p>This method is overridden by source tasks to emit FINISHED status directly to downstream | ||
* via network outputs. Processing tasks (e.g., OneInputStreamTask, TwoInputStreamTask) do not | ||
* override this method because they rely on StatusWatermarkValve to aggregate FINISHED status | ||
* from upstream inputs and propagate it downstream automatically. | ||
*/ | ||
protected void emitFinishedStatus() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we arrange the class hierarchy so that emitFinishedStatus() is only overridable for sources? |
||
// Empty by default - only source tasks override this method. | ||
} | ||
|
||
/** | ||
* Helper method to emit FINISHED watermark status to all stream outputs. Subclasses can call | ||
* this from their emitFinishedStatus() override if needed. | ||
*/ | ||
protected void emitFinishedStatusToOutputs() { | ||
try { | ||
if (operatorChain != null) { | ||
RecordWriterOutput<?>[] streamOutputs = operatorChain.getStreamOutputs(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we have no stream outputs? if so we could issue another LOG.warn("Cannot emit FINISHED watermark status: operator chain has no stream outputs");) |
||
for (RecordWriterOutput<?> output : streamOutputs) { | ||
output.emitWatermarkStatus(WatermarkStatus.FINISHED); | ||
} | ||
LOG.debug( | ||
"Successfully emitted FINISHED watermark status via {} outputs", | ||
streamOutputs.length); | ||
} else { | ||
LOG.warn("Cannot emit FINISHED watermark status: operator chain is null"); | ||
} | ||
} catch (Exception e) { | ||
LOG.warn("Failed to emit FINISHED watermark status", e); | ||
} | ||
} | ||
|
||
// ------------------------------------------------------------------------ | ||
// Core work methods of the Stream Task | ||
// ------------------------------------------------------------------------ | ||
|
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I am curious on you thinking on how this new status effects https://nightlies.apache.org/flink/flink-docs-master/docs/dev/datastream-v2/watermark/#emit-watermark customizations.