Skip to content

Commit

Permalink
Massive notion data revamp (#2140)
Browse files Browse the repository at this point in the history
* Block ordering
* Removing extra metadata
  • Loading branch information
f213 authored Dec 10, 2023
1 parent b7a67a3 commit e91ff69
Show file tree
Hide file tree
Showing 16 changed files with 365 additions and 133 deletions.
3 changes: 3 additions & 0 deletions mypy.ini
Original file line number Diff line number Diff line change
Expand Up @@ -86,3 +86,6 @@ ignore_missing_imports = on

[mypy-dj_rest_auth.*]
ignore_missing_imports = on

[mypy-benedict.*]
ignore_missing_imports = on
73 changes: 68 additions & 5 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ simplejson = "^3.19.1"
stripe = "^6.5.0"
whitenoise = "^6.5.0"
pillow = "^10.1.0"
python-benedict = "^0.33.0"

[tool.poetry.group.dev.dependencies]
autoflake = "^2.2.1"
Expand Down
27 changes: 24 additions & 3 deletions src/apps/notion/block.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from dataclasses import dataclass
from typing import Generator

from apps.notion.rewrite import rewrite
from apps.notion.rewrite import apply_our_adjustments
from apps.notion.types import BlockData
from apps.notion.types import BlockId

Expand All @@ -21,7 +21,7 @@ def from_json(cls, data: dict) -> "NotionBlock":
return cls(id=data["id"], data=data["data"])

def get_data(self) -> BlockData:
return rewrite(self.data)
return apply_our_adjustments(self.data)

@property
def content(self) -> list[BlockId]:
Expand All @@ -42,6 +42,17 @@ def from_json(cls, data: dict) -> "NotionBlockList":
blocks = [NotionBlock.from_json(block_dict) for block_dict in data]
return cls(blocks)

def ordered(self) -> "NotionBlockList":
"""Blockes in order from the first_page_block"""
if self.first_page_block is None:
return self

result = self.__class__([self.first_page_block])
for block_id in self.first_page_block.content:
result.append(self.get_block(block_id))

return result

@classmethod
def from_api_response(cls, api_response: dict[str, BlockData]) -> "NotionBlockList":
instance = cls()
Expand All @@ -60,8 +71,18 @@ def get_underlying_block_ids(self) -> set[BlockId]:

return block_ids

def get_block(self, block_id: BlockId) -> NotionBlock:
for block in self.data:
if block.id == block_id:
return block

raise KeyError("Block with id %s not found", block_id)

def have_block_with_id(self, block_id: BlockId) -> bool:
return len([block for block in self.data if block.id == block_id]) > 0
try:
return self.get_block(block_id) is not None
except KeyError:
return False

def blocks_with_underliying_blocks(self) -> Generator[NotionBlock, None, None]:
"""List of non-page blocks that have other blocks in it"""
Expand Down
2 changes: 1 addition & 1 deletion src/apps/notion/page.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ class NotionPage:
blocks: NotionBlockList

def to_json(self) -> dict:
return {"blocks": [block.to_json() for block in self.blocks]}
return {"blocks": [block.to_json() for block in self.blocks.ordered()]}

@classmethod
def from_json(cls, data: dict) -> "NotionPage":
Expand Down
16 changes: 16 additions & 0 deletions src/apps/notion/rewrite/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from apps.notion.rewrite.links import rewrite_links
from apps.notion.rewrite.tags import drop_extra_tags
from apps.notion.types import BlockData


def apply_our_adjustments(notion_block_data: BlockData) -> BlockData:
adjusted = rewrite_links(notion_block_data)

return drop_extra_tags(adjusted)


__all__ = [
"apply_our_adjustments",
"drop_extra_tags",
"rewrite_links",
]
23 changes: 13 additions & 10 deletions src/apps/notion/rewrite.py → src/apps/notion/rewrite/links.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,7 @@
from apps.notion.types import TextProperty


@lru_cache
def get_rewrite_mapping() -> Mapping[BlockId, BlockId]:
"""Returns a mapping Notion Material -> our slug"""
mapping = Material.objects.all().values_list("page_id", "slug")

return {page_id: uuid_to_id(str(slug)) for page_id, slug in mapping}


def rewrite(notion_block_data: BlockData) -> BlockData:
def rewrite_links(notion_block_data: BlockData) -> BlockData:
if "properties" not in notion_block_data.get("value", {}):
return notion_block_data

Expand All @@ -28,12 +20,20 @@ def rewrite(notion_block_data: BlockData) -> BlockData:
return notion_block_data


@lru_cache
def get_link_rewrite_mapping() -> Mapping[BlockId, BlockId]:
"""Returns a mapping Notion Material -> our slug"""
mapping = Material.objects.all().values_list("page_id", "slug")

return {page_id: uuid_to_id(str(slug)) for page_id, slug in mapping}


def rewrite_prop(prop: TextProperty) -> TextProperty: # NOQA: CCR001
"""Drill down notion property data, searching for a link to the internal material.
If the link is found -- rewrite its id to our material slug
"""
rewritten = TextProperty()
mapping = get_rewrite_mapping()
mapping = get_link_rewrite_mapping()

for value in prop:
if isinstance(value, list):
Expand All @@ -46,3 +46,6 @@ def rewrite_prop(prop: TextProperty) -> TextProperty: # NOQA: CCR001
rewritten.append(value)

return rewritten


__all__ = ["rewrite_links"]
37 changes: 37 additions & 0 deletions src/apps/notion/rewrite/tags.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import fnmatch

from benedict import benedict

from apps.notion.types import BlockData

TAGS_TO_LIVE = (
"value.id",
"value.type",
"value.content.*",
"value.properties.*",
"value.format.page_icon",
"value.format.page_cover",
"value.format.page_cover_position",
"value.parent_id",
"value.parent_table",
)


def drop_extra_tags(notion_block_data: BlockData) -> BlockData:
data = benedict(notion_block_data)
for key in sorted(data.keypaths(), key=len, reverse=True):
if not tag_should_live(key):
del data[key]

return notion_block_data


def tag_should_live(tag: str) -> bool:
for tag_to_live in TAGS_TO_LIVE:
if tag_to_live.startswith(f"{tag}."):
return True

if fnmatch.fnmatch(tag, tag_to_live) or tag == tag_to_live.replace(".*", ""):
return True

return False
3 changes: 2 additions & 1 deletion src/apps/notion/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@ class Properties(TypedDict, total=False):
class BlockValue(TypedDict, total=False):
id: BlockId
content: list[BlockId]
last_edited_time: int
parent_id: str
parent_table: str
properties: Properties
type: Literal[
"page",
Expand Down
3 changes: 2 additions & 1 deletion tests/apps/notion/api/test_notion_cache_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ def _ok(respx_mock: MockRouter):
],
},
},
"second-block": {},
"third-block": {},
"fourth-block": {},
},
},
},
Expand Down
4 changes: 2 additions & 2 deletions tests/apps/notion/api/test_notion_material_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ def test_both_formats_work_with_slug(api, material_slug, mock_notion_response):
def test_content_is_passed_from_notion_client(api, material):
got = api.get(f"/api/v2/notion/materials/{material.page_id}/")

assert got["block-1"]["role"] == "reader-1"
assert got["block-2"]["role"] == "reader-2"
assert got["block-1"]["value"]["parent_id"] == "100500"
assert got["block-2"]["value"]["parent_id"] == "100600"


def test_404_for_non_existant_materials(api, mock_notion_response):
Expand Down
4 changes: 2 additions & 2 deletions tests/apps/notion/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,8 @@ def page() -> NotionPage:
return NotionPage(
blocks=NotionBlockList(
[
NotionBlock(id="block-1", data={"role": "reader-1", "value": {"last_edited_time": 1642356660000}}),
NotionBlock(id="block-2", data={"role": "reader-2"}),
NotionBlock(id="block-1", data={"role": "reader-1", "value": {"parent_table": "test", "parent_id": "100500"}}),
NotionBlock(id="block-2", data={"value": {"parent_id": "100600"}}),
]
)
)
Expand Down
36 changes: 36 additions & 0 deletions tests/apps/notion/test_drop_extra_tags.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import pytest

from apps.notion.rewrite import drop_extra_tags

@pytest.fixture
def block():
return {
"role": "reader",
"value": {
"format": {"page_icon": "1.ico"},
"second_level_value_to_drop": "test",
"second_level_key_to_drop": {
"test": "__expired",
},
"parent_id": "will_no_be_dropped",
"content": {
"random_key": "random_value",
},
},
}


def test_extra_tags_are_dropped(block):
block = drop_extra_tags(block)

assert "role" not in block
assert "second_level_value_to_drop" not in block
assert "second_level_key_to_drop" not in block


def test_required_values_are_not_dropped(block):
block = drop_extra_tags(block)

assert block["value"]["parent_id"] == "will_no_be_dropped"
assert block["value"]["format"]["page_icon"] == "1.ico"
assert block["value"]["content"]["random_key"] == "random_value" # wildcards work wll
Loading

0 comments on commit e91ff69

Please sign in to comment.