Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🏥 Source Notion: update stream schema #35409

Merged
merged 9 commits into from
Feb 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ data:
connectorSubtype: api
connectorType: source
definitionId: 6e00b415-b02e-4160-bf02-58176a0ae687
dockerImageTag: 2.0.9
dockerImageTag: 2.1.0
dockerRepository: airbyte/source-notion
documentationUrl: https://docs.airbyte.com/integrations/sources/notion
githubIssueLabel: source-notion
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ requires = [ "poetry-core>=1.0.0",]
build-backend = "poetry.core.masonry.api"

[tool.poetry]
version = "2.0.9"
version = "2.1.0"
name = "source-notion"
description = "Source implementation for Notion."
authors = [ "Airbyte <[email protected]>",]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,17 @@
"properties": {
"type": {
"type": ["null", "string"]
},
"info": {
"type": ["null", "object"],
"properties": {
"id": {
"type": ["null", "string"]
},
"object": {
"type": ["null", "string"]
}
}
}
}
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,38 @@
"type": {
"type": "string"
},
"info": {
"avatar_url": {
"type": ["null", "string"]
},
"id": {
"type": ["null", "string"]
},
"name": {
"type": ["null", "string"]
},
"object": {
"type": ["null", "string"]
},
"person": {
"type": ["null", "object"],
"properties": {
"email": {
"type": ["null", "string"]
},
"type": {
"type": ["null", "string"]
}
}
}
},
"workspace": {
"type": ["null", "boolean"]
}
}
},
"workspace_name": {
"type": ["null", "string"]
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,22 @@ def request_params(self, next_page_token: Mapping[str, Any] = None, **kwargs) ->
params["start_cursor"] = next_page_token["next_cursor"]
return params

def transform(self, record: MutableMapping[str, Any]) -> MutableMapping[str, Any]:
owner = record.get("bot", {}).get("owner")
if owner:
owner_type = owner.get("type")
owner_info = owner.get(owner_type)
if owner_type and owner_info:
record["bot"]["owner"]["info"] = owner_info
del record["bot"]["owner"][owner_type]
return record

def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]:
# sometimes notion api returns response without results object
data = response.json().get("results", [])
for record in data:
yield self.transform(record)


class Databases(IncrementalNotionStream):
"""
Expand Down Expand Up @@ -313,6 +329,20 @@ def stream_slices(

yield {"page_id": page_id}

def transform(self, record: Mapping[str, Any]) -> Mapping[str, Any]:
transform_object_field = record.get("type")

if transform_object_field:
rich_text = record.get(transform_object_field, {}).get("rich_text", [])
for r in rich_text:
mention = r.get("mention")
if mention:
type_info = mention[mention["type"]]
record[transform_object_field]["rich_text"][rich_text.index(r)]["mention"]["info"] = type_info
del record[transform_object_field]["rich_text"][rich_text.index(r)]["mention"][mention["type"]]

return record

def parse_response(self, response: requests.Response, stream_state: Mapping[str, Any], **kwargs) -> Iterable[Mapping]:
# pages and databases blocks are already fetched in their streams, so no
# need to do it again
Expand All @@ -321,7 +351,7 @@ def parse_response(self, response: requests.Response, stream_state: Mapping[str,
records = super().parse_response(response, stream_state=stream_state, **kwargs)
for record in records:
if record["type"] not in ("child_page", "child_database", "ai_block"):
yield record
yield self.transform(record)

def read_records(self, **kwargs) -> Iterable[Mapping[str, Any]]:
# if reached recursive limit, don't read anymore
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -312,3 +312,41 @@ def test_request_throttle(initial_page_size, expected_page_size, mock_response,
stream.should_retry(response=response)

assert stream.page_size == expected_page_size


def test_users_record_transformer():
stream = Users(config=MagicMock())
response_record = {
"object": "user", "id": "id", "name": "Airbyte", "avatar_url": "some url", "type": "bot",
"bot": {"owner": {"type": "user", "user": {"object": "user", "id": "id", "name": "Test User", "avatar_url": None, "type": "person",
"person": {"email": "email"}}}, "workspace_name": "test"}
}
expected_record = {
"object": "user", "id": "id", "name": "Airbyte", "avatar_url": "some url", "type": "bot",
"bot": {"owner": {"type": "user", "info": {"object": "user", "id": "id", "name": "Test User", "avatar_url": None, "type": "person",
"person": {"email": "email"}}}, "workspace_name": "test"}
}
assert stream.transform(response_record) == expected_record


def test_block_record_transformer():
stream = Blocks(parent=None, config=MagicMock())
response_record = {
"object": "block", "id": "id", "parent": {"type": "page_id", "page_id": "id"}, "created_time": "2021-10-19T13:33:00.000Z", "last_edited_time": "2021-10-19T13:33:00.000Z",
"created_by": {"object": "user", "id": "id"}, "last_edited_by": {"object": "user", "id": "id"}, "has_children": False, "archived": False, "type": "paragraph",
"paragraph": {"rich_text": [{"type": "text", "text": {"content": "test", "link": None}, "annotations": {"bold": False, "italic": False, "strikethrough": False, "underline": False, "code": False, "color": "default"}, "plain_text": "test", "href": None},
{"type": "text", "text": {"content": "@", "link": None}, "annotations": {"bold": False, "italic": False, "strikethrough": False, "underline": False, "code": True, "color": "default"}, "plain_text": "@", "href": None},
{"type": "text", "text": {"content": "test", "link": None}, "annotations": {"bold": False, "italic": False, "strikethrough": False, "underline": False, "code": False, "color": "default"}, "plain_text": "test", "href": None},
{"type": "mention", "mention": {"type": "page", "page": {"id": "id"}}, "annotations": {"bold": False, "italic": False, "strikethrough": False, "underline": False, "code": False, "color": "default"},
"plain_text": "test", "href": "https://www.notion.so/id"}], "color": "default"}
}
expected_record = {
"object": "block", "id": "id", "parent": {"type": "page_id", "page_id": "id"}, "created_time": "2021-10-19T13:33:00.000Z", "last_edited_time": "2021-10-19T13:33:00.000Z",
"created_by": {"object": "user", "id": "id"}, "last_edited_by": {"object": "user", "id": "id"}, "has_children": False, "archived": False, "type": "paragraph",
"paragraph": {"rich_text": [{"type": "text", "text": {"content": "test", "link": None}, "annotations":{"bold": False, "italic": False, "strikethrough": False, "underline": False, "code": False, "color": "default"}, "plain_text":"test", "href": None},
{"type": "text", "text": {"content": "@", "link": None}, "annotations": {"bold": False, "italic": False, "strikethrough": False, "underline": False, "code": True, "color": "default"}, "plain_text": "@", "href": None},
{"type": "text", "text": {"content": "test", "link": None}, "annotations": {"bold": False, "italic": False, "strikethrough": False, "underline": False, "code": False, "color": "default"}, "plain_text": "test", "href": None},
{"type": "mention", "mention": {"type": "page", "info": {"id": "id"}}, "annotations": {"bold": False, "italic": False, "strikethrough": False, "underline": False, "code": False, "color": "default"}, "plain_text": "test", "href": "https://www.notion.so/id"}],
"color": "default"}
}
assert stream.transform(response_record) == expected_record
Loading
Loading