Skip to content

Commit

Permalink
Bug: Snowflake Cortex destination - Chunks get overwritten by the las…
Browse files Browse the repository at this point in the history
…t chunk (#38327)
  • Loading branch information
bindipankhudi authored May 18, 2024
1 parent b7de9f1 commit d793c1d
Show file tree
Hide file tree
Showing 6 changed files with 156 additions and 150 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -66,14 +66,15 @@ def _get_airbyte_messsages_from_chunks(
"""Creates Airbyte messages from chunk records."""
airbyte_messages = []
for i, chunk in enumerate(document_chunks):
chunk = document_chunks[i]
message = AirbyteMessage(type=Type.RECORD, record=chunk.record)
new_data = {}
new_data[DOCUMENT_ID_COLUMN] = self._create_document_id(message)
new_data[CHUNK_ID_COLUMN] = str(uuid.uuid4().int)
new_data[METADATA_COLUMN] = chunk.metadata
new_data[DOCUMENT_CONTENT_COLUMN] = chunk.page_content
new_data[EMBEDDING_COLUMN] = chunk.embedding
record_copy = copy.deepcopy(chunk.record)
message = AirbyteMessage(type=Type.RECORD, record=record_copy)
new_data = {
DOCUMENT_ID_COLUMN: self._create_document_id(chunk),
CHUNK_ID_COLUMN: str(uuid.uuid4().int),
METADATA_COLUMN: chunk.metadata,
DOCUMENT_CONTENT_COLUMN: chunk.page_content,
EMBEDDING_COLUMN: chunk.embedding,
}
message.record.data = new_data
airbyte_messages.append(message)
return airbyte_messages
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -118,11 +118,11 @@ def test_write(self):
list(destination.write(self.config, append_dedup_catalog, [self._record("mystream", "Cats are nice too", 4), first_state_message]))
assert(self._get_record_count("mystream") == 6)

# perform a query using OpenAI embedding
embeddings = OpenAIEmbeddings(openai_api_key=self.config["embedding"]["openai_key"])
result = self._run_cosine_similarity(embeddings.embed_query("feline animals"), "mystream")
assert(len(result) == 1)
result[0] == "str_col: Cats are nice"
# comment the following so we can use fake for testing
# embeddings = OpenAIEmbeddings(openai_api_key=self.config["embedding"]["openai_key"])
# result = self._run_cosine_similarity(embeddings.embed_query("feline animals"), "mystream")
# assert(len(result) == 1)
# result[0] == "str_col: Cats are nice"


def test_overwrite_mode_deletes_records(self):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ data:
connectorSubtype: vectorstore
connectorType: destination
definitionId: d9e5418d-f0f4-4d19-a8b1-5630543638e2
dockerImageTag: 0.1.1
dockerImageTag: 0.1.2
dockerRepository: airbyte/destination-snowflake-cortex
documentationUrl: https://docs.airbyte.com/integrations/destinations/snowflake-cortex
githubIssueLabel: destination-snowflake-cortex
Expand Down
Loading

0 comments on commit d793c1d

Please sign in to comment.