-
Notifications
You must be signed in to change notification settings - Fork 4.2k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
✨ Introduce StateIteratorProcessor in CDK (#33312)
- Loading branch information
1 parent
fa2a2cf
commit 18e0e77
Showing
12 changed files
with
306 additions
and
115 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
2 changes: 1 addition & 1 deletion
2
airbyte-cdk/java/airbyte-cdk/core/src/main/resources/version.properties
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
version=0.10.2 | ||
version=0.10.3 |
74 changes: 74 additions & 0 deletions
74
.../main/java/io/airbyte/cdk/integrations/source/relationaldb/state/SourceStateIterator.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
/* | ||
* Copyright (c) 2023 Airbyte, Inc., all rights reserved. | ||
*/ | ||
|
||
package io.airbyte.cdk.integrations.source.relationaldb.state; | ||
|
||
import com.google.common.collect.AbstractIterator; | ||
import io.airbyte.protocol.models.v0.AirbyteMessage; | ||
import io.airbyte.protocol.models.v0.AirbyteMessage.Type; | ||
import io.airbyte.protocol.models.v0.AirbyteStateMessage; | ||
import io.airbyte.protocol.models.v0.AirbyteStateStats; | ||
import java.time.Instant; | ||
import java.util.Iterator; | ||
import javax.annotation.CheckForNull; | ||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
|
||
public class SourceStateIterator<T> extends AbstractIterator<AirbyteMessage> implements Iterator<AirbyteMessage> { | ||
|
||
private static final Logger LOGGER = LoggerFactory.getLogger(SourceStateIterator.class); | ||
private final Iterator<T> messageIterator; | ||
private boolean hasEmittedFinalState = false; | ||
private long recordCount = 0L; | ||
private Instant lastCheckpoint = Instant.now(); | ||
|
||
private final SourceStateIteratorManager sourceStateIteratorManager; | ||
|
||
public SourceStateIterator(final Iterator<T> messageIterator, | ||
final SourceStateIteratorManager sourceStateIteratorManager) { | ||
this.messageIterator = messageIterator; | ||
this.sourceStateIteratorManager = sourceStateIteratorManager; | ||
} | ||
|
||
@CheckForNull | ||
@Override | ||
protected AirbyteMessage computeNext() { | ||
boolean iteratorHasNextValue = false; | ||
try { | ||
iteratorHasNextValue = messageIterator.hasNext(); | ||
} catch (Exception ex) { | ||
LOGGER.info("Caught exception while trying to get the next from message iterator. Treating hasNext to false. ", ex); | ||
} | ||
if (iteratorHasNextValue) { | ||
if (sourceStateIteratorManager.shouldEmitStateMessage(recordCount, lastCheckpoint)) { | ||
AirbyteStateMessage stateMessage = sourceStateIteratorManager.generateStateMessageAtCheckpoint(); | ||
stateMessage.withSourceStats(new AirbyteStateStats().withRecordCount((double) recordCount)); | ||
|
||
recordCount = 0L; | ||
lastCheckpoint = Instant.now(); | ||
return new AirbyteMessage() | ||
.withType(Type.STATE) | ||
.withState(stateMessage); | ||
} | ||
// Use try-catch to catch Exception that could occur when connection to the database fails | ||
try { | ||
final T message = messageIterator.next(); | ||
final AirbyteMessage processedMessage = sourceStateIteratorManager.processRecordMessage(message); | ||
recordCount++; | ||
return processedMessage; | ||
} catch (final Exception e) { | ||
throw new RuntimeException(e); | ||
} | ||
} else if (!hasEmittedFinalState) { | ||
hasEmittedFinalState = true; | ||
final AirbyteStateMessage finalStateMessage = sourceStateIteratorManager.createFinalStateMessage(); | ||
return new AirbyteMessage() | ||
.withType(Type.STATE) | ||
.withState(finalStateMessage); | ||
} else { | ||
return endOfData(); | ||
} | ||
} | ||
|
||
} |
37 changes: 37 additions & 0 deletions
37
...ava/io/airbyte/cdk/integrations/source/relationaldb/state/SourceStateIteratorManager.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
/* | ||
* Copyright (c) 2023 Airbyte, Inc., all rights reserved. | ||
*/ | ||
|
||
package io.airbyte.cdk.integrations.source.relationaldb.state; | ||
|
||
import io.airbyte.protocol.models.v0.AirbyteMessage; | ||
import io.airbyte.protocol.models.v0.AirbyteStateMessage; | ||
import java.time.Instant; | ||
|
||
public interface SourceStateIteratorManager<T> { | ||
|
||
/** | ||
* Returns a state message that should be emitted at checkpoint. | ||
*/ | ||
AirbyteStateMessage generateStateMessageAtCheckpoint(); | ||
|
||
/** | ||
* For the incoming record message, this method defines how the connector will consume it. | ||
*/ | ||
AirbyteMessage processRecordMessage(final T message); | ||
|
||
/** | ||
* At the end of the iteration, this method will be called and it will generate the final state | ||
* message. | ||
* | ||
* @return | ||
*/ | ||
AirbyteStateMessage createFinalStateMessage(); | ||
|
||
/** | ||
* Determines if the iterator has reached checkpoint or not, based on the time and number of record | ||
* messages it has been processed since the last checkpoint. | ||
*/ | ||
boolean shouldEmitStateMessage(final long recordCount, final Instant lastCheckpoint); | ||
|
||
} |
92 changes: 92 additions & 0 deletions
92
...t/java/io/airbyte/cdk/integrations/source/relationaldb/state/SourceStateIteratorTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
/* | ||
* Copyright (c) 2023 Airbyte, Inc., all rights reserved. | ||
*/ | ||
|
||
package io.airbyte.cdk.integrations.source.relationaldb.state; | ||
|
||
import static org.junit.Assert.assertEquals; | ||
import static org.mockito.ArgumentMatchers.any; | ||
import static org.mockito.ArgumentMatchers.anyLong; | ||
import static org.mockito.ArgumentMatchers.eq; | ||
import static org.mockito.Mockito.atLeastOnce; | ||
import static org.mockito.Mockito.doReturn; | ||
import static org.mockito.Mockito.mock; | ||
import static org.mockito.Mockito.verify; | ||
|
||
import io.airbyte.protocol.models.v0.AirbyteMessage; | ||
import io.airbyte.protocol.models.v0.AirbyteMessage.Type; | ||
import io.airbyte.protocol.models.v0.AirbyteRecordMessage; | ||
import io.airbyte.protocol.models.v0.AirbyteStateMessage; | ||
import io.airbyte.protocol.models.v0.AirbyteStateStats; | ||
import java.util.Iterator; | ||
import org.junit.jupiter.api.BeforeEach; | ||
import org.junit.jupiter.api.Test; | ||
|
||
public class SourceStateIteratorTest { | ||
|
||
SourceStateIteratorManager mockProcessor; | ||
Iterator<AirbyteMessage> messageIterator; | ||
|
||
SourceStateIterator sourceStateIterator; | ||
|
||
@BeforeEach | ||
void setup() { | ||
mockProcessor = mock(SourceStateIteratorManager.class); | ||
messageIterator = mock(Iterator.class); | ||
sourceStateIterator = new SourceStateIterator(messageIterator, mockProcessor); | ||
} | ||
|
||
// Provides a way to generate a record message and will verify corresponding spied functions have | ||
// been called. | ||
void processRecordMessage() { | ||
doReturn(true).when(messageIterator).hasNext(); | ||
doReturn(false).when(mockProcessor).shouldEmitStateMessage(anyLong(), any()); | ||
AirbyteMessage message = new AirbyteMessage().withType(Type.RECORD).withRecord(new AirbyteRecordMessage()); | ||
doReturn(message).when(mockProcessor).processRecordMessage(any()); | ||
doReturn(message).when(messageIterator).next(); | ||
|
||
assertEquals(message, sourceStateIterator.computeNext()); | ||
verify(mockProcessor, atLeastOnce()).processRecordMessage(message); | ||
verify(mockProcessor, atLeastOnce()).shouldEmitStateMessage(eq(0L), any()); | ||
} | ||
|
||
@Test | ||
void testShouldProcessRecordMessage() { | ||
processRecordMessage(); | ||
} | ||
|
||
@Test | ||
void testShouldEmitStateMessage() { | ||
processRecordMessage(); | ||
doReturn(true).when(mockProcessor).shouldEmitStateMessage(anyLong(), any()); | ||
final AirbyteStateMessage stateMessage = new AirbyteStateMessage(); | ||
doReturn(stateMessage).when(mockProcessor).generateStateMessageAtCheckpoint(); | ||
AirbyteMessage expectedMessage = new AirbyteMessage().withType(Type.STATE).withState(stateMessage); | ||
expectedMessage.getState().withSourceStats(new AirbyteStateStats().withRecordCount(1.0)); | ||
assertEquals(expectedMessage, sourceStateIterator.computeNext()); | ||
} | ||
|
||
@Test | ||
void testShouldEmitFinalStateMessage() { | ||
processRecordMessage(); | ||
processRecordMessage(); | ||
doReturn(false).when(messageIterator).hasNext(); | ||
final AirbyteStateMessage stateMessage = new AirbyteStateMessage(); | ||
doReturn(stateMessage).when(mockProcessor).createFinalStateMessage(); | ||
AirbyteMessage expectedMessage = new AirbyteMessage().withType(Type.STATE).withState(stateMessage); | ||
expectedMessage.getState().withSourceStats(new AirbyteStateStats().withRecordCount(2.0)); | ||
assertEquals(expectedMessage, sourceStateIterator.computeNext()); | ||
} | ||
|
||
@Test | ||
void testShouldSendEndOfData() { | ||
processRecordMessage(); | ||
doReturn(false).when(messageIterator).hasNext(); | ||
doReturn(new AirbyteStateMessage()).when(mockProcessor).createFinalStateMessage(); | ||
sourceStateIterator.computeNext(); | ||
|
||
// After sending the final state, if iterator was called again, we will return null. | ||
assertEquals(null, sourceStateIterator.computeNext()); | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
104 changes: 0 additions & 104 deletions
104
.../java/io/airbyte/integrations/source/mysql/initialsync/MySqlInitialSyncStateIterator.java
This file was deleted.
Oops, something went wrong.
Oops, something went wrong.