From 618b05b44fa526aca7d4a8fcdbb2e7ee7f3456d7 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sun, 6 Oct 2019 21:50:41 -0700 Subject: [PATCH] Initial save_items() utility plus tests --- pocket_to_sqlite/utils.py | 63 +++++++++++++++++++++++++++++++++ tests/pocket.json | 61 ++++++++++++++++++++++++++++++++ tests/test_save_pocket.py | 74 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 198 insertions(+) create mode 100644 pocket_to_sqlite/utils.py create mode 100644 tests/pocket.json create mode 100644 tests/test_save_pocket.py diff --git a/pocket_to_sqlite/utils.py b/pocket_to_sqlite/utils.py new file mode 100644 index 0000000..e6bedba --- /dev/null +++ b/pocket_to_sqlite/utils.py @@ -0,0 +1,63 @@ +import datetime +import requests +from sqlite_utils.db import AlterError, ForeignKey + + +def save_items(items, db): + for item in items: + transform(item) + authors = item.pop("authors", None) + items_authors_to_save = [] + if authors: + authors_to_save = [] + for details in authors.values(): + authors_to_save.append( + { + "author_id": int(details["author_id"]), + "name": details["name"], + "url": details["url"], + } + ) + items_authors_to_save.append( + { + "author_id": int(details["author_id"]), + "item_id": int(details["item_id"]), + } + ) + db["authors"].upsert_all(authors_to_save, pk="author_id") + db["items"].upsert_all(items, pk="item_id", alter=True) + db["items_authors"].upsert_all( + items_authors_to_save, + pk=("author_id", "item_id"), + foreign_keys=("author_id", "item_id"), + ) + + +def transform(item): + for key in ( + "item_id", + "resolved_id", + "favorite", + "status", + "time_added", + "time_updated", + "time_read", + "time_favorited", + "is_article", + "is_index", + "has_video", + "has_image", + "word_count", + "time_to_read", + "listen_duration_estimate", + ): + item[key] = int(item[key]) + + for key in ("time_read", "time_favorited"): + if not item[key]: + item[key] = None + + +def ensure_fts(db): + if "items_fts" not in db.table_names(): + db["items"].enable_fts(["resolved_title", "excerpt"], create_triggers=True) diff --git a/tests/pocket.json b/tests/pocket.json new file mode 100644 index 0000000..9162975 --- /dev/null +++ b/tests/pocket.json @@ -0,0 +1,61 @@ +[ + { + "item_id": "2746847510", + "resolved_id": "2746847510", + "given_url": "http://people.idsia.ch/~juergen/deep-learning-miraculous-year-1990-1991.html", + "given_title": "Deep Learning: Our Miraculous Year 1990-1991", + "favorite": "0", + "status": "0", + "time_added": "1570303854", + "time_updated": "1570303854", + "time_read": "0", + "time_favorited": "0", + "sort_id": 206, + "resolved_title": "Deep Learning: Our Miraculous Year 1990-1991", + "resolved_url": "http://people.idsia.ch/~juergen/deep-learning-miraculous-year-1990-1991.html", + "excerpt": "The Deep Learning (DL) Neural Networks (NNs) of our team have revolutionised Pattern Recognition and Machine Learning, and are now heavily used in academia and industry [DL4].", + "is_article": "1", + "is_index": "0", + "has_video": "0", + "has_image": "1", + "word_count": "11415", + "lang": "en", + "time_to_read": 52, + "top_image_url": "http://people.idsia.ch/~juergen/miraculous-year754x395.png", + "authors": { + "120590166": { + "item_id": "2746847510", + "author_id": "120590166", + "name": "Link.", + "url": "http://people.idsia.ch/~juergen/heatexchanger/heatexchanger.html" + } + }, + "image": { + "item_id": "2746847510", + "src": "http://people.idsia.ch/~juergen/lstmagfa288.gif", + "width": "0", + "height": "0" + }, + "images": { + "1": { + "item_id": "2746847510", + "image_id": "1", + "src": "http://people.idsia.ch/~juergen/lstmagfa288.gif", + "width": "0", + "height": "0", + "credit": "", + "caption": "" + }, + "2": { + "item_id": "2746847510", + "image_id": "2", + "src": "http://people.idsia.ch/~juergen/deepoverview466x288-6border.gif", + "width": "0", + "height": "0", + "credit": "", + "caption": "" + } + }, + "listen_duration_estimate": 4419 + } +] diff --git a/tests/test_save_pocket.py b/tests/test_save_pocket.py new file mode 100644 index 0000000..6af0b8d --- /dev/null +++ b/tests/test_save_pocket.py @@ -0,0 +1,74 @@ +from pocket_to_sqlite import utils +import pytest +import json +import sqlite_utils +from sqlite_utils.db import ForeignKey +import pathlib + + +def load(): + json_path = pathlib.Path(__file__).parent / "pocket.json" + return json.load(open(json_path, "r")) + + +@pytest.fixture(scope="session") +def converted(): + db = sqlite_utils.Database(":memory:") + utils.save_items(load(), db) + utils.ensure_fts(db) + return db + + +def test_tables(converted): + assert { + "items_authors", + "items_fts", + "authors", + "items", + "items_fts_config", + "items_fts_idx", + "items_fts_data", + "items_fts_docsize", + } == set(converted.table_names()) + + +def test_item(converted): + item = list(converted["items"].rows)[0] + assert { + "item_id": 2746847510, + "resolved_id": 2746847510, + "given_url": "http://people.idsia.ch/~juergen/deep-learning-miraculous-year-1990-1991.html", + "given_title": "Deep Learning: Our Miraculous Year 1990-1991", + "favorite": 0, + "status": 0, + "time_added": 1570303854, + "time_updated": 1570303854, + "time_read": None, + "time_favorited": None, + "sort_id": 206, + "resolved_title": "Deep Learning: Our Miraculous Year 1990-1991", + "resolved_url": "http://people.idsia.ch/~juergen/deep-learning-miraculous-year-1990-1991.html", + "excerpt": "The Deep Learning (DL) Neural Networks (NNs) of our team have revolutionised Pattern Recognition and Machine Learning, and are now heavily used in academia and industry [DL4].", + "is_article": 1, + "is_index": 0, + "has_video": 0, + "has_image": 1, + "word_count": 11415, + "lang": "en", + "time_to_read": 52, + "top_image_url": "http://people.idsia.ch/~juergen/miraculous-year754x395.png", + "image": '{"item_id": "2746847510", "src": "http://people.idsia.ch/~juergen/lstmagfa288.gif", "width": "0", "height": "0"}', + "images": '{"1": {"item_id": "2746847510", "image_id": "1", "src": "http://people.idsia.ch/~juergen/lstmagfa288.gif", "width": "0", "height": "0", "credit": "", "caption": ""}, "2": {"item_id": "2746847510", "image_id": "2", "src": "http://people.idsia.ch/~juergen/deepoverview466x288-6border.gif", "width": "0", "height": "0", "credit": "", "caption": ""}}', + "listen_duration_estimate": 4419, + } == item + + +def test_authors(converted): + authors = list(converted["authors"].rows) + assert [ + { + "author_id": 120590166, + "name": "Link.", + "url": "http://people.idsia.ch/~juergen/heatexchanger/heatexchanger.html", + } + ] == authors