Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New try at HTML/PDF export #16

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
**.DS_Store
.gitignore

frontend/node_modules
frontend/node_modules
frontend/build
attachments
__pycache__
backend/env
5 changes: 3 additions & 2 deletions backend/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@ FROM tiangolo/uwsgi-nginx:python3.6
RUN apt-get update && \
apt-get install -y \
libldap2-dev \
libsasl2-dev
libsasl2-dev \
wkhtmltopdf

RUN mkdir /app/elogy

Expand All @@ -20,4 +21,4 @@ RUN mkdir /var/elogy/attachments

EXPOSE 80

ENTRYPOINT ["uwsgi", "--socket=0.0.0.0:80", "--protocol=http", "--file=run.py", "--callable=app"]
ENTRYPOINT ["uwsgi", "--socket=0.0.0.0:80", "--protocol=http", "--file=run.py", "--callable=app", "--wsgi-disable-file-wrapper"]
3 changes: 1 addition & 2 deletions backend/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,4 @@ install:
env/bin/pip install -e .[dev]

run:
FLASK_APP=backend.app ELOGY_CONFIG_FILE=../config.py env/bin/flask run -p 8888

FLASK_APP=backend.app ELOGY_CONFIG_FILE=../config.py env/bin/flask run -p 8888 --reload --debugger
48 changes: 28 additions & 20 deletions backend/backend/api/entries.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import logging
import io

from flask import request, send_file
from flask_restful import Resource, marshal, marshal_with, abort
Expand All @@ -8,7 +8,7 @@

from ..db import Entry, Logbook, EntryLock
from ..attachments import handle_img_tags
from ..export import export_entries_as_pdf
from ..export import export_entries
from ..actions import new_entry, edit_entry
from . import fields, send_signal

Expand Down Expand Up @@ -141,9 +141,10 @@ def put(self, args, entry_id, logbook_id=None):
"archived": Boolean(),
"ignore_children": Boolean(),
"n": Integer(missing=50),
"offset": Integer(),
"offset": Integer(missing=0),
"download": Str(),
"sort_by_timestamp": Boolean(missing=True),
"reverse_order": Boolean(missing=True)
}


Expand All @@ -159,6 +160,9 @@ def get(self, args, logbook_id=None):
metadata = [meta.split(":")
for meta in args.get("metadata", [])]

n_entries = args["n"]
offset = args["offset"]

if logbook_id:
# restrict search to the given logbook and its descendants
logbook = Logbook.get(Logbook.id == logbook_id)
Expand All @@ -169,8 +173,9 @@ def get(self, args, logbook_id=None):
attachment_filter=args.get("attachments"),
attribute_filter=attributes,
metadata_filter=metadata,
n=args["n"], offset=args.get("offset"),
sort_by_timestamp=args.get("sort_by_timestamp"))
n=n_entries, offset=offset,
sort_by_timestamp=args.get("sort_by_timestamp"),
reverse_order=args.get("reverse_order"))
entries = logbook.get_entries(**search_args)
else:
# global search (all logbooks)
Expand All @@ -182,23 +187,26 @@ def get(self, args, logbook_id=None):
attachment_filter=args.get("attachments"),
attribute_filter=attributes,
metadata_filter=metadata,
n=args["n"], offset=args.get("offset"),
sort_by_timestamp=args.get("sort_by_timestamp"))
n=n_entries, offset=offset,
sort_by_timestamp=args.get("sort_by_timestamp"),
reverse_order=args.get("reverse_order"))
entries = Entry.search(**search_args)

if args.get("download") == "pdf":
# return a PDF version
# TODO: not sure if this belongs in the API
pdf = export_entries_as_pdf(logbook, entries)
if pdf is None:
abort(400, message="Could not create PDF!")
return send_file(pdf, mimetype="application/pdf",
as_attachment=True,
attachment_filename=("{logbook.name}.pdf"
.format(logbook=logbook)))

return marshal(dict(logbook=logbook,
entries=list(entries)), fields.entries)
if args.get("download"):
# Allow getting the entries as one big file (html or pdf), to go.
try:
data, real_n_entries, mimetype, filename = export_entries(
logbook, entries, n_entries, offset, args["download"])
except ValueError:
abort(400)
buf = io.BytesIO()
buf.write(data)
buf.seek(0)
return send_file(buf, mimetype=mimetype, as_attachment=True,
attachment_filename=filename)

return marshal(dict(logbook=logbook, entries=list(entries)),
fields.entries)


class EntryLockResource(Resource):
Expand Down
5 changes: 2 additions & 3 deletions backend/backend/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,8 @@

from time import time

from flask import Flask, current_app, send_from_directory, g, request
from flask import Flask, current_app, send_from_directory, g
from flask_restful import Api
import logging

from .api.errors import errors as api_errors
from .api.logbooks import LogbooksResource, LogbookChangesResource
Expand All @@ -23,7 +22,7 @@
static_folder="frontend/build/static",
static_url_path="/static")
app.config.from_envvar('ELOGY_CONFIG_FILE')

app.root_path = app.config["ROOT_PATH"]
app.secret_key = app.config["SECRET"]

# add some hooks for debugging purposes
Expand Down
47 changes: 43 additions & 4 deletions backend/backend/attachments.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,14 @@
original files, in a configurable location on disk.
"""

from base64 import decodestring
from base64 import decodestring, encodestring
import binascii
from datetime import datetime
from dateutil.parser import parse
import io
import mimetypes
import os

from flask import (Blueprint, abort, request, url_for, redirect,
current_app, jsonify, send_from_directory)
from flask import url_for, current_app
from lxml import html, etree
from lxml.html.clean import Cleaner
from PIL import Image
Expand All @@ -39,6 +37,19 @@ def get_content_type(file_):
return type_


def load_attachment(path):
upload_dir = os.path.join(current_app.config["UPLOAD_FOLDER"])
full_path = os.path.join(upload_dir, path)
with open(full_path, "rb") as f:
return f.read()


def get_full_attachment_path(path):
upload_dir = os.path.join(current_app.config["UPLOAD_FOLDER"])
full_path = os.path.join(upload_dir, path)
return full_path


def save_attachment(file_, timestamp, entry_id, metadata=None, embedded=False):
"Store an attachment in the proper place"
# make up a path and unique filename using the timestamp
Expand Down Expand Up @@ -188,3 +199,31 @@ def handle_img_tags(text, entry_id=None, timestamp=None):
return content, attachments


def embed_attachments(text, entry_id=None, timestamp=None):

""" Embed inlined images in HTML, for export. """
# TODO I guess we could embed some other types too, e.g. text files..?

try:
doc = html.document_fromstring(text)
except etree.ParserError:
return text

for i, element in enumerate(doc.xpath("//*[@src]")):
src = element.attrib['src'].split("?", 1)[0]
if src.startswith("/attachments/"): # TODO This is a bit crude
# Now we know it's an attachment and not an external link.
mimetype = mimetypes.guess_type(src)[0]
if mimetype and mimetype.startswith("image/"):
# We know it's an image and what type it is
try:
image = load_attachment(src[13:])
except IOError:
# TODO Guess the file does not exist. Put something here?
continue
encoded = encodestring(image)
element.attrib["src"] = (f"data:{mimetype};base64, " +
encoded.decode("utf-8"))

# TODO maybe replace broken images with something more explanatory?
return etree.tostring(doc).decode("utf-8")
9 changes: 6 additions & 3 deletions backend/backend/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -571,7 +571,7 @@ def search(cls, logbook=None, followups=False,
attribute_filter=None, content_filter=None,
title_filter=None, author_filter=None,
attachment_filter=None, metadata_filter=None,
sort_by_timestamp=True):
sort_by_timestamp=True, reverse_order=False):

# Note: this is all pretty messy. The reason we're building
# the query as a raw string is that peewee does not (currently)
Expand Down Expand Up @@ -739,7 +739,8 @@ def search(cls, logbook=None, followups=False,

# Check if we're searching, in that case we want to show all entries.
if followups or any([title_filter, content_filter, author_filter,
metadata_filter, attribute_filter, attachment_filter]):
metadata_filter, attribute_filter,
attachment_filter]):
query += " GROUP BY thread"
else:
# We're not searching. In this case we'll only show
Expand All @@ -748,7 +749,9 @@ def search(cls, logbook=None, followups=False,
# sort newest first, taking into account the last edit if any
# TODO: does this make sense? Should we only consider creation date?
order_by = sort_by_timestamp and "timestamp" or "entry.created_at"
query += " ORDER BY entry.priority DESC, {} DESC".format(order_by)
ordering = "DESC" if reverse_order else "ASC"
query += " ORDER BY entry.priority DESC, {} {}".format(order_by,
ordering)
if n:
query += " LIMIT {}".format(n)
if offset:
Expand Down
84 changes: 64 additions & 20 deletions backend/backend/export.py
Original file line number Diff line number Diff line change
@@ -1,40 +1,35 @@
from datetime import datetime
from tempfile import NamedTemporaryFile

from flask import current_app
from slugify import slugify

try:
import pdfkit
except ImportError:
pdfkit = None

from .attachments import embed_attachments


def export_entries_as_pdf(logbook, entries):
def export_entries_as_pdf(logbook, entries, n, offset):

"""
Super basic "proof-of-concept" PDF export
No proper formatting, and does not embed images.
Export entries as a PDF. Simply uses the HTML export function below,
and converts the HTML into PDF using pdfkit.
Note that pdfkit relies on the external library "wkhtmltopdf".
TODO: pdfkit seems a bit limited, look for a more flexible alternative.
"reportlab" looks pretty good (https://bitbucket.org/rptlab/reportlab)
"""

if pdfkit is None:
return None

entries_html = [
"""
<p><b>Created at:</b> {created_at}</p>
<p><b>Title:</b> {title}</p>
<p><b>Authors:</b> {authors}</p>
<p>{content}</p>
""".format(title=entry.title or "(No title)",
authors=", ".join(a["name"] for a in entry.authors),
created_at=entry.created_at,
content=entry.content or "---")
for entry in entries
]
raise ValueError("No pdfkit/wkhtmltopdf available.")

html, n_entries = export_entries_as_html(logbook, entries, n, offset)

with NamedTemporaryFile(prefix=logbook.name,
suffix=".pdf",
delete=False) as f:
delete=True) as f:
options = {
"load-error-handling": "ignore",
"load-media-error-handling": "ignore",
Expand All @@ -45,10 +40,59 @@ def export_entries_as_pdf(logbook, entries):
'encoding': "UTF-8",
}
try:
pdfkit.from_string("<hr>".join(entries_html), f.name, options)
pdfkit.from_string(html, f.name, options)
except OSError:
# Apparently there's some issue with wkhtmltopdf which produces
# errors, but it works anyway. See
# https://github.com/wkhtmltopdf/wkhtmltopdf/issues/2051
pass
return f.name

f.seek(0)
return f.read(), n_entries


def export_entries_as_html(logbook, entries, n, offset):

"""
Takes the given logbook entries and generates a single HTML string.
Inline images are embedded, to make it a standalone document.
"""

entries = list(entries)
if not entries:
raise ValueError("No entries!")

template = current_app.jinja_env.get_or_select_template(
"entry_export.html.jinja2")

current_app.logger.info("Rendering HTML for logbook %s; n=%d, offset=%d",
logbook.id, n, offset)
html = template.render(logbook=logbook, entries=entries,
n=len(entries), offset=offset,
embed=embed_attachments,
export_time=datetime.now())

return html, len(entries)


def export_entries(logbook, entries, n_entries, offset, filetype):

if filetype == "html":
html, real_n_entries = export_entries_as_html(
logbook, entries, n=n_entries, offset=offset)
data = html.encode("utf-8")
mimetype = "text/html"
elif filetype == "pdf":
pdf, real_n_entries = export_entries_as_pdf(
logbook, entries, n=n_entries, offset=offset)
data = pdf
mimetype = "application/pdf"

filename = "{}_{}_{}-{}.{}".format(
slugify(logbook.name),
datetime.now().strftime("%Y%m%dT%H%M%S"),
offset,
offset + real_n_entries,
filetype
)
return data, real_n_entries, mimetype, filename
2 changes: 2 additions & 0 deletions backend/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@

BASEURL = os.getenv('ELOGY_URL', 'https://elogy.maxiv.lu.se')

ROOT_PATH = os.getenv('ELOGY_ROOT', '')

DEBUG = bool(os.getenv('ELOGY_DEBUG', 1)) # !!!Change this to False for production use!!!

# The name of the database file
Expand Down
2 changes: 2 additions & 0 deletions backend/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,12 @@ peewee==2.10.2
Pillow==5.0.0
pyldap==2.4.45
python-dateutil==2.6.1
python-slugify==2.0.1
pytz==2017.3
six==1.11.0
uWSGI==2.0.15
webargs==1.8.1
Werkzeug==0.14.1
wtf-peewee==0.2.6
WTForms==2.1
pdfkit==0.6.1
4 changes: 3 additions & 1 deletion backend/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@
'flask-restful',
'pyldap', # should be optional, depends on libpdap and libsasl!
'flask-admin', # maybe also optional?
'wtf-peewee'
'wtf-peewee',
'python-slugify', # optional, only used for file export
'pdfkit' # optional, for export and depends on wkhtmltopdf
],
extras_require={
"dev": [
Expand Down
Loading