Skip to content

Commit

Permalink
Initial save_items() utility plus tests
Browse files Browse the repository at this point in the history
  • Loading branch information
simonw committed Oct 7, 2019
1 parent 5495bca commit 618b05b
Show file tree
Hide file tree
Showing 3 changed files with 198 additions and 0 deletions.
63 changes: 63 additions & 0 deletions pocket_to_sqlite/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
import datetime
import requests
from sqlite_utils.db import AlterError, ForeignKey


def save_items(items, db):
for item in items:
transform(item)
authors = item.pop("authors", None)
items_authors_to_save = []
if authors:
authors_to_save = []
for details in authors.values():
authors_to_save.append(
{
"author_id": int(details["author_id"]),
"name": details["name"],
"url": details["url"],
}
)
items_authors_to_save.append(
{
"author_id": int(details["author_id"]),
"item_id": int(details["item_id"]),
}
)
db["authors"].upsert_all(authors_to_save, pk="author_id")
db["items"].upsert_all(items, pk="item_id", alter=True)
db["items_authors"].upsert_all(
items_authors_to_save,
pk=("author_id", "item_id"),
foreign_keys=("author_id", "item_id"),
)


def transform(item):
for key in (
"item_id",
"resolved_id",
"favorite",
"status",
"time_added",
"time_updated",
"time_read",
"time_favorited",
"is_article",
"is_index",
"has_video",
"has_image",
"word_count",
"time_to_read",
"listen_duration_estimate",
):
item[key] = int(item[key])

for key in ("time_read", "time_favorited"):
if not item[key]:
item[key] = None


def ensure_fts(db):
if "items_fts" not in db.table_names():
db["items"].enable_fts(["resolved_title", "excerpt"], create_triggers=True)
61 changes: 61 additions & 0 deletions tests/pocket.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
[
{
"item_id": "2746847510",
"resolved_id": "2746847510",
"given_url": "http://people.idsia.ch/~juergen/deep-learning-miraculous-year-1990-1991.html",
"given_title": "Deep Learning: Our Miraculous Year 1990-1991",
"favorite": "0",
"status": "0",
"time_added": "1570303854",
"time_updated": "1570303854",
"time_read": "0",
"time_favorited": "0",
"sort_id": 206,
"resolved_title": "Deep Learning: Our Miraculous Year 1990-1991",
"resolved_url": "http://people.idsia.ch/~juergen/deep-learning-miraculous-year-1990-1991.html",
"excerpt": "The Deep Learning (DL) Neural Networks (NNs) of our team have revolutionised Pattern Recognition and Machine Learning, and are now heavily used in academia and industry [DL4].",
"is_article": "1",
"is_index": "0",
"has_video": "0",
"has_image": "1",
"word_count": "11415",
"lang": "en",
"time_to_read": 52,
"top_image_url": "http://people.idsia.ch/~juergen/miraculous-year754x395.png",
"authors": {
"120590166": {
"item_id": "2746847510",
"author_id": "120590166",
"name": "Link.",
"url": "http://people.idsia.ch/~juergen/heatexchanger/heatexchanger.html"
}
},
"image": {
"item_id": "2746847510",
"src": "http://people.idsia.ch/~juergen/lstmagfa288.gif",
"width": "0",
"height": "0"
},
"images": {
"1": {
"item_id": "2746847510",
"image_id": "1",
"src": "http://people.idsia.ch/~juergen/lstmagfa288.gif",
"width": "0",
"height": "0",
"credit": "",
"caption": ""
},
"2": {
"item_id": "2746847510",
"image_id": "2",
"src": "http://people.idsia.ch/~juergen/deepoverview466x288-6border.gif",
"width": "0",
"height": "0",
"credit": "",
"caption": ""
}
},
"listen_duration_estimate": 4419
}
]
74 changes: 74 additions & 0 deletions tests/test_save_pocket.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
from pocket_to_sqlite import utils
import pytest
import json
import sqlite_utils
from sqlite_utils.db import ForeignKey
import pathlib


def load():
json_path = pathlib.Path(__file__).parent / "pocket.json"
return json.load(open(json_path, "r"))


@pytest.fixture(scope="session")
def converted():
db = sqlite_utils.Database(":memory:")
utils.save_items(load(), db)
utils.ensure_fts(db)
return db


def test_tables(converted):
assert {
"items_authors",
"items_fts",
"authors",
"items",
"items_fts_config",
"items_fts_idx",
"items_fts_data",
"items_fts_docsize",
} == set(converted.table_names())


def test_item(converted):
item = list(converted["items"].rows)[0]
assert {
"item_id": 2746847510,
"resolved_id": 2746847510,
"given_url": "http://people.idsia.ch/~juergen/deep-learning-miraculous-year-1990-1991.html",
"given_title": "Deep Learning: Our Miraculous Year 1990-1991",
"favorite": 0,
"status": 0,
"time_added": 1570303854,
"time_updated": 1570303854,
"time_read": None,
"time_favorited": None,
"sort_id": 206,
"resolved_title": "Deep Learning: Our Miraculous Year 1990-1991",
"resolved_url": "http://people.idsia.ch/~juergen/deep-learning-miraculous-year-1990-1991.html",
"excerpt": "The Deep Learning (DL) Neural Networks (NNs) of our team have revolutionised Pattern Recognition and Machine Learning, and are now heavily used in academia and industry [DL4].",
"is_article": 1,
"is_index": 0,
"has_video": 0,
"has_image": 1,
"word_count": 11415,
"lang": "en",
"time_to_read": 52,
"top_image_url": "http://people.idsia.ch/~juergen/miraculous-year754x395.png",
"image": '{"item_id": "2746847510", "src": "http://people.idsia.ch/~juergen/lstmagfa288.gif", "width": "0", "height": "0"}',
"images": '{"1": {"item_id": "2746847510", "image_id": "1", "src": "http://people.idsia.ch/~juergen/lstmagfa288.gif", "width": "0", "height": "0", "credit": "", "caption": ""}, "2": {"item_id": "2746847510", "image_id": "2", "src": "http://people.idsia.ch/~juergen/deepoverview466x288-6border.gif", "width": "0", "height": "0", "credit": "", "caption": ""}}',
"listen_duration_estimate": 4419,
} == item


def test_authors(converted):
authors = list(converted["authors"].rows)
assert [
{
"author_id": 120590166,
"name": "Link.",
"url": "http://people.idsia.ch/~juergen/heatexchanger/heatexchanger.html",
}
] == authors

0 comments on commit 618b05b

Please sign in to comment.