diff --git a/.editorconfig b/.editorconfig
new file mode 100644
index 0000000..88ff976
--- /dev/null
+++ b/.editorconfig
@@ -0,0 +1,20 @@
+root = true
+
+[*]
+end_of_line = lf
+insert_final_newline = true
+
+[*.{py}]
+charset = utf-8
+
+[*.py]
+indent_style = space
+indent_size = 4
+
+[*.css]
+indent_style = space
+indent_size = 2
+
+[{.travis.yml}]
+indent_style = space
+indent_size = 2
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..d2b24e8
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,14 @@
+# packages
+venv
+
+# python
+.eggs
+*.egg-info
+
+# cache
+__pycache__
+*.pyc
+tmp
+
+# build
+*.epub
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..f45c27e
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,21 @@
+The MIT License
+
+Copyright (c) 2019 Jacob Budin
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..56f2fba
--- /dev/null
+++ b/README.md
@@ -0,0 +1,34 @@
+# Portable Wisdom
+
+Portable Wisdom is a tool to generate EPUB files from [Instapaper](https://www.instapaper.com/). You can then choose to sync these files to your ereader.
+
+## Technologies
+
+- Python 3
+
+## Features
+
+- Retrieves unread articles from Instapaper
+- Embeds web images, downsizes them, and converts them to greyscale
+- Caches articles and images
+- Creates well-formatted EPUB files, tailored for your ereader
+
+## Quick Start
+
+1. Download and install Portable Wisdom from PyPI:
+
+ $ pip install portable_wisdom
+
+2. [Request an Instapaper API key.](https://www.instapaper.com/main/request_oauth_consumer_token) (Or copy one from a friend.)
+4. Run Portable Wisdom from the command line:
+
+ $ portable-wisdom --instapaper-key KEY \
+ --instapaper-secret SECRET \
+ --instapaper-login USER \
+ --instapaper-password PASS
+
+On success, the script will print the output filename. For all the options, run `$ portable-wisdom -h`.
+
+## License
+
+MIT License
diff --git a/portable_wisdom/__init__.py b/portable_wisdom/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/portable_wisdom/article.py b/portable_wisdom/article.py
new file mode 100644
index 0000000..23d6e3b
--- /dev/null
+++ b/portable_wisdom/article.py
@@ -0,0 +1,10 @@
+class Article:
+ def __init__(self, title, content):
+ self.title = title
+ self.content = content
+
+ def __repr__(self):
+ return 'Article("%s", "%s")' % (self.title, self.content)
+
+ def __str__(self):
+ return '"%s"' % self.title
diff --git a/portable_wisdom/cache.py b/portable_wisdom/cache.py
new file mode 100644
index 0000000..bb0c862
--- /dev/null
+++ b/portable_wisdom/cache.py
@@ -0,0 +1,36 @@
+from diskcache import Cache as DiskCache
+import logging
+import os
+
+CACHE_PATH = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'tmp')
+CACHE_SIZE = 64 * 1000000 #64MB
+
+class Cache:
+ def __init__(self):
+ self.cache = DiskCache(CACHE_PATH, size_limit=CACHE_SIZE)
+
+ def get(self, key):
+ value = self.cache.get(key)
+
+ if value:
+ logging.debug('Hit cache key %s' % key)
+
+ return value
+
+ def clear(self):
+ return self.cache.clear()
+
+ def set(self, key, value):
+ return self.cache.set(key, value)
+
+ def get_or(self, key, _or):
+ """Get a key's value, or use function's return value to set"""
+ if key in self.cache:
+ logging.debug('Hit cache key %s' % key)
+ return self.cache[key]
+
+ value = _or()
+ self.cache.set(key, value)
+ return value
+
+cache = Cache()
diff --git a/portable_wisdom/config.py b/portable_wisdom/config.py
new file mode 100644
index 0000000..e11789d
--- /dev/null
+++ b/portable_wisdom/config.py
@@ -0,0 +1,26 @@
+# HTML elements that are preserved
+ALLOWED_TAGS = ('p', 'b', 'i', 'blockquote', 'strong', 'em', 'figure', 'figcaption', 'img')
+
+# HTML element attributes that are preserved
+ALLOWED_ATTRIBUTES = ('src', )
+
+# Instapaper configuration
+INSTAPAPER_KEY = ''
+INSTAPAPER_SECRET = ''
+INSTAPAPER_LOGIN = ''
+INSTAPAPER_PASSWORD = ''
+
+# Maximum number of articles to include
+ARTICLE_LIMIT = 25
+
+# Maximum dimensions of embedded images
+IMAGE_MAX_SIZE = (600, 600)
+
+# Name of stylesheet to use
+STYLE = 'nook-glowlight-3'
+
+# Debug mode
+DEBUG = False
+
+# Verbose mode
+VERBOSE = False
diff --git a/portable_wisdom/epub.py b/portable_wisdom/epub.py
new file mode 100644
index 0000000..bf4e61e
--- /dev/null
+++ b/portable_wisdom/epub.py
@@ -0,0 +1,91 @@
+from bs4 import BeautifulSoup
+from .cache import cache
+from urllib.parse import urlparse
+import os
+from ebooklib import epub
+import io
+import logging
+import requests
+from PIL import Image
+from .config import *
+
+def embed_images(book):
+ """Embeds remote images in EPUB HTML chapters"""
+ image_names = set()
+
+ for item in book.items:
+ if type(item) is not epub.EpubHtml:
+ continue
+
+ # Parse HTML, find `img` elements
+ soup = BeautifulSoup('
%s' % item.content, 'html5lib')
+
+ for img in soup.find_all('img'):
+ src = img.get('src')
+
+ # Remove junk images
+ if not src:
+ img.decompose()
+ continue
+ if src.startswith('denied:'):
+ img.decompose()
+ continue
+ if src.startswith('data:'):
+ img.decompose()
+ continue
+
+ src_parts = urlparse(src)
+ ext = os.path.splitext(src_parts.path)[1]
+ name = str(hash(src)) + ext
+
+ if name not in image_names:
+ # Create `EpubImage` wrapper object
+ image = epub.EpubImage()
+ image.id = str(hash(src))
+ image.file_name = name
+
+ thumbnail_hash = src + str(IMAGE_MAX_SIZE)
+ thumbnail_bytes = cache.get(thumbnail_hash)
+
+ # Download the image
+ if thumbnail_bytes:
+ thumbnail = io.BytesIO(thumbnail_bytes)
+ else:
+ thumbnail = io.BytesIO()
+
+ try:
+ logging.info('Downloading image %s', img['src'])
+ content = requests.get(img['src']).content
+ except requests.exceptions.ContentDecodingError as e:
+ logging.error('Skipping image %s (%s)' % (img['src'], e))
+ continue
+ except requests.exceptions.ConnectionError as e:
+ logging.error('Skipping image %s (%s)' % (img['src'], e))
+ continue
+
+ original = io.BytesIO()
+ original.write(content)
+
+ try:
+ # Create smaller, greyscale image from source image
+ im = Image.open(original).convert('RGBA') # convert to `RGBA` before `L` or Pillow will complain
+ im.thumbnail(IMAGE_MAX_SIZE)
+ im = im.convert('L')
+ im.save(thumbnail, 'png' if ext == '.png' else 'jpeg')
+
+ except OSError as e:
+ logging.error('Skipping image %s (%s)' % (img['src'], e))
+ continue
+
+ cache.set(thumbnail_hash, thumbnail.getvalue())
+
+ thumbnail.seek(0)
+
+ image.content = thumbnail.read()
+ book.add_item(image)
+ image_names.add(name)
+
+ img['style'] = 'max-width: 100%'
+ img['src'] = name
+
+ item.content = str(soup.body)
diff --git a/portable_wisdom/source.py b/portable_wisdom/source.py
new file mode 100644
index 0000000..c518260
--- /dev/null
+++ b/portable_wisdom/source.py
@@ -0,0 +1,45 @@
+import os.path
+from .config import *
+from ebooklib import epub
+import logging
+
+STYLES_PATH = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'styles')
+
+class Source:
+ def to_epub(self, style=None):
+ """Generate `EpubBook` from result of `get_articles`"""
+ if not style:
+ style = STYLE
+
+ logging.info('Creating book using %s style' % style)
+ articles = self.get_articles()
+
+ book = epub.EpubBook()
+ book.set_title(self.__class__.name)
+
+ # Create HTML file for each article
+ chapters = []
+ for i, article in enumerate(articles):
+ chapter = epub.EpubHtml(uid=str(i), title=article.title, file_name=('%d.xhtml' % i))
+ chapter.content = '' + ('
%s
' % article.title) + article.content + ''
+ chapters.append(chapter)
+ book.add_item(chapter)
+
+ # Add generic book metadata
+ book.toc = map(lambda c: epub.Link(c.get_name(), c.title, str(c.get_id())), chapters)
+ book.add_item(epub.EpubNcx())
+ book.add_item(epub.EpubNav())
+
+ # Add stylesheet
+ if not style.endswith('.css'):
+ style = style + '.css'
+
+ style_path = os.path.join(STYLES_PATH, style)
+ with open(style_path) as f:
+ nav_css = epub.EpubItem(uid="style_nav", file_name="style/default.css", media_type="text/css", content=f.read())
+
+ book.add_item(nav_css)
+
+ book.spine = ['nav'] + chapters
+
+ return book
diff --git a/portable_wisdom/sources/__init__.py b/portable_wisdom/sources/__init__.py
new file mode 100644
index 0000000..3838e99
--- /dev/null
+++ b/portable_wisdom/sources/__init__.py
@@ -0,0 +1 @@
+from .instapaper import Instapaper
diff --git a/portable_wisdom/sources/instapaper.py b/portable_wisdom/sources/instapaper.py
new file mode 100644
index 0000000..9374319
--- /dev/null
+++ b/portable_wisdom/sources/instapaper.py
@@ -0,0 +1,33 @@
+import logging
+import os.path
+from ..cache import cache
+from ..article import Article
+from ..config import *
+from ..source import Source
+from pyinstapaper.instapaper import Instapaper as PInstapaper
+
+class Instapaper(Source):
+ name = 'Instapaper'
+
+ def get_articles(self):
+ """Produce a list of Articles"""
+ instapaper = PInstapaper(INSTAPAPER_KEY, INSTAPAPER_SECRET)
+ instapaper.login(INSTAPAPER_LOGIN, INSTAPAPER_PASSWORD)
+
+ # Enforce 25 article maximum
+ limit = 25
+
+ if ARTICLE_LIMIT:
+ limit = min(limit, ARTICLE_LIMIT)
+
+ bookmarks = instapaper.get_bookmarks('unread', limit)
+ articles = []
+
+ for bookmark in bookmarks:
+ content = cache.get_or(bookmark.hash,
+ lambda: bookmark.get_text()['data'].decode())
+ article = Article(title=bookmark.title, content=content)
+ articles.append(article)
+
+ logging.info('Retrieved %d articles' % len(articles))
+ return articles
diff --git a/portable_wisdom/styles/nook-glowlight-3.css b/portable_wisdom/styles/nook-glowlight-3.css
new file mode 100644
index 0000000..bf20832
--- /dev/null
+++ b/portable_wisdom/styles/nook-glowlight-3.css
@@ -0,0 +1,65 @@
+/* General */
+p {
+ margin: 1em 0;
+ text-indent: 1.5em;
+}
+
+/* Headers */
+h1, h2, h3, h4, h5, h6 {
+ hyphens: none;
+}
+h1 {
+ font-size: 1.4em;
+ margin: 0 2.5em 2.5em 2.5em;
+ text-align: center;
+ font-weight: bold;
+}
+h2 {
+ font-size: 1.25em;
+ font-weight: bold;
+}
+h3 {
+ font-size: 1.1em;
+ font-weight: bold;
+}
+h4 {
+ font-size: 1em;
+ font-weight: bold;
+}
+h5 {
+ font-size: 1em;
+ font-weight: normal;
+}
+h6 {
+ font-size: 1em;
+ font-weight: normal;
+}
+
+/* Quotes */
+blockquote, aside {
+ margin: 1em 1.5em;
+ font-style: italic;
+}
+blockquote p {
+ text-indent: 0;
+}
+blockquote p:first-child {
+ margin-top: 0;
+}
+blockquote p:last-child {
+ margin-bottom: 0;
+}
+
+/* Images */
+figcaption {
+ margin: 0 0 1em 0;
+}
+img {
+ text-align: center;
+}
+
+/* Inline */
+a {
+ color: black !important;
+ text-decoration: underline;
+}
diff --git a/portable_wisdom/tests/__init__.py b/portable_wisdom/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/portable_wisdom/tests/test_article.py b/portable_wisdom/tests/test_article.py
new file mode 100644
index 0000000..940ac58
--- /dev/null
+++ b/portable_wisdom/tests/test_article.py
@@ -0,0 +1,17 @@
+from unittest import TestCase
+from portable_wisdom.article import Article
+
+title = 'Bank Robber Strikes Again'
+content = '$1m was stolen from First Federal Bank yesterday.'
+
+class TestArticle(TestCase):
+ def test_init(self):
+ a = Article(title, content)
+ self.assertEqual(a.title, title)
+ self.assertEqual(a.content, content)
+
+ def test_str(self):
+ a = Article(title, content)
+ self.assertIn(title, str(a))
+ self.assertNotIn(content, str(a))
+
diff --git a/portable_wisdom/tests/test_cache.py b/portable_wisdom/tests/test_cache.py
new file mode 100644
index 0000000..2c967e7
--- /dev/null
+++ b/portable_wisdom/tests/test_cache.py
@@ -0,0 +1,39 @@
+from unittest import TestCase
+from random import choice
+from portable_wisdom.cache import Cache
+
+class TestCache(TestCase):
+ def test_get(self):
+ c = Cache()
+ v = choice(range(1, 255))
+ c.set('some_key', v)
+ self.assertEqual(c.get('some_key'), v)
+ c.clear()
+
+ def test_get_empty(self):
+ c = Cache()
+ self.assertIsNone(c.get('some_absent_key'))
+ c.clear()
+
+ def test_get_or(self):
+ c = Cache()
+ v = choice(range(1, 255))
+ print(v)
+ c.get_or('some_key', lambda: v)
+ self.assertEqual(c.get('some_key'), v)
+ c.clear()
+
+ def test_get_or2(self):
+ c = Cache()
+ v = choice(range(1, 255))
+ c.set('some_key', v)
+ c.get_or('some_key', lambda: v+1)
+ self.assertEqual(c.get('some_key'), v)
+ c.clear()
+
+ def test_set(self):
+ c = Cache()
+ v = choice(range(1, 255))
+ c.set('some_key', v)
+ self.assertEqual(c.get('some_key'), v)
+ c.clear()
diff --git a/portable_wisdom/tests/test_source.py b/portable_wisdom/tests/test_source.py
new file mode 100644
index 0000000..619ec99
--- /dev/null
+++ b/portable_wisdom/tests/test_source.py
@@ -0,0 +1,27 @@
+from ebooklib import epub
+from unittest import TestCase
+from portable_wisdom.source import Source
+from portable_wisdom.article import Article
+
+articles = (
+ Article('Bank Robber Strikes Again', '$1m was stolen from First Federal Bank yesterday.'),
+ Article('UFO Lands in Area 51', 'Aliens greeted humans with peace sign.'),
+ )
+
+class FakeSource(Source):
+ name = 'Lies Magazine'
+
+ def get_articles(self):
+ return articles
+
+class TestSource(TestCase):
+ def test_to_epub(self):
+ source = FakeSource()
+ book = source.to_epub()
+
+ chapters = 0
+ for item in book.items:
+ if type(item) is epub.EpubHtml:
+ chapters += 1
+
+ self.assertEqual(len(articles), chapters)
diff --git a/portable_wisdom/tests/test_wisdom.py b/portable_wisdom/tests/test_wisdom.py
new file mode 100644
index 0000000..e69de29
diff --git a/portable_wisdom/tmp/.keep b/portable_wisdom/tmp/.keep
new file mode 100644
index 0000000..e69de29
diff --git a/portable_wisdom/wisdom.py b/portable_wisdom/wisdom.py
new file mode 100644
index 0000000..d47da1c
--- /dev/null
+++ b/portable_wisdom/wisdom.py
@@ -0,0 +1,52 @@
+#!/usr/bin/env python3
+#
+# Generate EPUB from Instapaper
+
+import argparse
+import datetime
+from ebooklib import epub
+import logging
+from . import config
+
+def main():
+ """Generate EPUB from Instapaper"""
+ # Support CLI
+ parser = argparse.ArgumentParser(description='Generate EPUB from Instapaper')
+ parser.add_argument('--instapaper-key', help='Instapaper API key')
+ parser.add_argument('--instapaper-secret', help='Instapaper API secret')
+ parser.add_argument('--instapaper-login', help='Instapaper account username or email address')
+ parser.add_argument('--instapaper-password', help='Instapaper account password')
+ parser.add_argument('-s', '--style', default=config.STYLE, help='stylesheet to use')
+ parser.add_argument('-l', '--article-limit', '--limit', default=config.ARTICLE_LIMIT, metavar='LIMIT', type=int, help='number of articles to include')
+ parser.add_argument('-v', '--verbose', default=False, action='store_true', help='verbose mode')
+ parser.add_argument('-d', '--debug', default=False, action='store_true', help='debug mode')
+
+ args = parser.parse_args()
+
+ # Where an option is provided, override its configuration value
+ for option, value in vars(args).items():
+ if value:
+ setattr(config, option.upper(), value)
+
+ logging_level = logging.CRITICAL
+ if config.VERBOSE or config.DEBUG:
+ logging_level = logging.DEBUG
+
+ logging.basicConfig(format='%(levelname)s: %(message)s', level=logging_level)
+
+ # Import after configuration is set
+ from .epub import embed_images
+ from .sources import Instapaper
+
+ # Create EPUB and save to disk
+ source = Instapaper
+ book = source().to_epub()
+ embed_images(book)
+
+ today = datetime.datetime.today()
+ filename = '%s - %s-%s-%s.epub' % (source.name, today.year, today.month, today.day)
+ epub.write_epub(filename, book, {})
+ print(filename)
+
+if __name__ == '__main__':
+ main()
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..b1868b2
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,44 @@
+from setuptools import find_packages, setup
+
+with open('README.md') as f:
+ readme = f.read()
+
+setup(name='portable_wisdom',
+ version='0.1',
+ description='Generate EPUB from Instapaper',
+ long_description=readme,
+ long_description_content_type='text/markdown',
+ classifiers=[
+ 'Development Status :: 3 - Alpha',
+ 'Environment :: Console',
+ 'Intended Audience :: End Users/Desktop',
+ 'License :: OSI Approved :: MIT License',
+ 'Natural Language :: English',
+ 'Programming Language :: Python',
+ 'Programming Language :: Python :: 3',
+ 'Programming Language :: Python :: 3.7',
+ 'Topic :: Utilities',
+ ],
+ url='https://github.com/jacobbudin/portable_wisdom',
+ author='Jacob Budin',
+ author_email='self@jacobbudin.com',
+ license='MIT',
+ install_requires=[
+ 'ebooklib>=0.17', # generates EPUB
+ 'pillow>=6.0', # downsizes images
+ 'requests>=2.21', # downloads images
+ 'beautifulsoup4>=4.7.1', # parses HTML for imags
+ 'html5lib>=1.0.1', # parses HTML for images
+ 'diskcache>=3.1.1', # caches article text and images
+ 'pyinstapaper>=0.2.2', # Instapaper API client
+ ],
+ packages=find_packages(),
+ package_data={
+ 'portable_wisdom': ['styles/*.css'],
+ },
+ entry_points={
+ 'console_scripts': ['portable-wisdom=portable_wisdom.wisdom:main'],
+ },
+ test_suite='nose.collector',
+ tests_require=['nose'],
+ zip_safe=False)