diff --git a/.travis.yml b/.travis.yml index c7a35f9..ffbe460 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,11 +4,15 @@ language: python matrix: include: - python: 2.7 - - python: 3.4 - - python: 3.5 + dist: bionic - python: 3.6 + dist: bionic - python: 3.7 - dist: xenial + dist: bionic + - python: 3.8 + dist: bionic + - python: 3.9 + dist: bionic before_script: - sh ./travis/init.sh @@ -29,7 +33,7 @@ deploy: tags: true distributions: sdist bdist_wheel repo: jarrekk/imgkit - python: 2.7 + python: 3.7 notifications: email: me@jarrekk.com diff --git a/README.md b/README.md index 8c21fe0..2242b06 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,11 @@ # IMGKit: Python library of HTML to IMG wrapper [![Build Status](https://travis-ci.org/jarrekk/imgkit.svg?branch=master)](https://travis-ci.org/jarrekk/imgkit) -[![Code Coverage](https://codecov.io/github/jarrekk/imgkit/branch/master/graph/badge.svg)](https://codecov.io/github/jarrekk/imgkit/) +[![codecov](https://codecov.io/gh/jarrekk/imgkit/branch/master/graph/badge.svg?token=pNl4TtuAzz)](https://codecov.io/gh/jarrekk/imgkit) [![Codacy Badge](https://api.codacy.com/project/badge/Grade/aa1f67f04ff24bb080b7f8c8a9b7b8b1)](https://www.codacy.com/app/jarrekk/imgkit?utm_source=github.com&utm_medium=referral&utm_content=jarrekk/imgkit&utm_campaign=Badge_Grade) [![PyPI version](https://badge.fury.io/py/imgkit.svg)](https://badge.fury.io/py/imgkit) -``` +``` text _____ __ __ _____ _ __ _ _ |_ _| | \/ | / ____| | |/ / (_) | | | | | \ / | | | __ | ' / _ | |_ @@ -27,21 +27,23 @@ Python 2 and 3 wrapper for wkhtmltoimage utility to convert HTML to IMG using We 2. Install wkhtmltopdf: - * Debian/Ubuntu: + * Debian/Ubuntu: - ``` bash - sudo apt-get install wkhtmltopdf - ``` + ``` bash + sudo apt-get install wkhtmltopdf + ``` + + **Warning!** Version in debian/ubuntu repos have reduced functionality (because it compiled without the wkhtmltopdf QT patches), such as adding outlines, headers, footers, TOC etc. To use this options you should install static binary from [wkhtmltopdf](http://wkhtmltopdf.org/) site or you can use this [script](https://github.com/jarrekk/imgkit/blob/master/travis/init.sh). - **Warning!** Version in debian/ubuntu repos have reduced functionality (because it compiled without the wkhtmltopdf QT patches), such as adding outlines, headers, footers, TOC etc. To use this options you should install static binary from [wkhtmltopdf](http://wkhtmltopdf.org/) site or you can use this [script](https://github.com/jarrekk/imgkit/blob/master/travis/init.sh). + * MacOSX: - * MacOSX + ``` bash + brew install --cask wkhtmltopdf + ``` - ``` bash - brew install wkhtmltopdf - ``` - - * Windows and other options: check [wkhtmltopdf homepage](http://wkhtmltopdf.org/) for binary installers or [wiki page](https://github.com/pdfkit/pdfkit/wiki/Installing-WKHTMLTOPDF). + * Windows and other options: + + Check [wkhtmltopdf homepage](http://wkhtmltopdf.org/) for binary installers or [wiki page](https://github.com/pdfkit/pdfkit/wiki/Installing-WKHTMLTOPDF). ## Usage @@ -55,13 +57,6 @@ imgkit.from_file('test.html', 'out.jpg') imgkit.from_string('Hello!', 'out.jpg') ``` -You can pass a list with multiple URLs or files: - -``` python -imgkit.from_url(['google.com', 'yandex.ru', 'engadget.com'], 'out.jpg') -imgkit.from_file(['file1.html', 'file2.html'], 'out.jpg') -``` - Also you can pass an opened file: ``` python @@ -167,26 +162,30 @@ imgkit.from_string(body, 'out.png') Each API call takes an optional config paramater. This should be an instance of `imgkit.config()` API call. It takes the config options as initial paramaters. The available options are: -* `wkhtmltoimage` - the location of the `wkhtmltoimage` binary. By default `imgkit` will attempt to locate this using which` (on UNIX type systems) or where` (on Windows). -* `meta_tag_prefix` - the prefix for `imgkit` specific meta tags - by default this is `imgkit-` + * `wkhtmltoimage` - the location of the `wkhtmltoimage` binary. By default `imgkit` will attempt to locate this using which` (on UNIX type systems) or where` (on Windows). + * `xvfb` - the location of the `xvfb-run` binary. By default `imgkit` will attempt to locate this using which` (on UNIX type systems) or where` (on Windows). + * `meta_tag_prefix` - the prefix for `imgkit` specific meta tags - by default this is `imgkit-` -Example - for when `wkhtmltopdf` is not in `$PATH`: +Example - for when `wkhtmltopdf` or `xvfb` is not in `$PATH`: ``` python -config = imgkit.config(wkhtmltoimage='/opt/bin/wkhtmltoimage') +config = imgkit.config(wkhtmltoimage='/opt/bin/wkhtmltoimage', xvfb='/opt/bin/xvfb-run') imgkit.from_string(html_string, output_file, config=config) ``` - ## Troubleshooting * `IOError: 'No wkhtmltopdf executable found'`: + + Make sure that you have wkhtmltoimage in your `$PATH` or set via custom configuration (see preceding section). *where wkhtmltoimage* in Windows or *which wkhtmltoimage* on Linux should return actual path to binary. - Make sure that you have wkhtmltoimage in your `$PATH` or set via custom configuration (see preceding section). *where wkhtmltoimage* in Windows or *which wkhtmltoimage* on Linux should return actual path to binary. +* `IOError: 'No xvfb executable found'`: + + Make sure that you have xvfb-run in your `$PATH` or set via custom configuration (see preceding section). *where xvfb* in Windows or *which xvfb-run* or *which Xvfb* on Linux should return actual path to binary. * `IOError: 'Command Failed'`: - - This error means that IMGKit was unable to process an input. You can try to directly run a command from error message and see what error caused failure (on some wkhtmltoimage versions this can be cause by segmentation faults) + + This error means that IMGKit was unable to process an input. You can try to directly run a command from error message and see what error caused failure (on some wkhtmltoimage versions this can be cause by segmentation faults) ## Credit diff --git a/imgkit/__init__.py b/imgkit/__init__.py index 53b227c..fa53f64 100644 --- a/imgkit/__init__.py +++ b/imgkit/__init__.py @@ -1,13 +1,11 @@ # -*- coding: utf-8 -*- -""" -Wkhtmltopdf python wrapper to convert html to image using the webkit rendering engine and qt -""" +"Wkhtmltopdf python wrapper to convert html to image using the webkit rendering engine and qt" -__author__ = 'jarrekk' -__contact__ = 'me@jarrekk.com' -__version__ = '1.0.2' -__homepage__ = 'https://github.com/jarrekk/imgkit' -__license__ = 'MIT' +__author__ = "jarrekk" +__contact__ = "me@jarrekk.com" +__version__ = "1.1.0" +__homepage__ = "https://github.com/jarrekk/imgkit" +__license__ = "MIT" from .imgkit import IMGKit from .api import from_url, from_file, from_string, config diff --git a/imgkit/api.py b/imgkit/api.py index 7366c50..6f34b67 100644 --- a/imgkit/api.py +++ b/imgkit/api.py @@ -14,11 +14,10 @@ def from_url(url, Convert URL/URLs to IMG file/files :param url: URL or list of URLs to be saved - :param output_path: path to output PDF file/files. False means file will be returned as string + :param output_path: path to output image file/files. False means file will be returned as string :param options: (optional) dict with wkhtmltopdf global and page options, with or w/o '--' :param toc: (optional) dict with toc-specific wkhtmltopdf options, with or w/o '--' :param cover: (optional) string with url/filename with a cover html page - :param css: style of input :param config: (optional) instance of imgkit.config.Config() :param cover_first: (optional) if True, cover always precedes TOC :return: True when success @@ -44,7 +43,7 @@ def from_file(filename, Convert HTML file/files to IMG file/files :param filename: path of HTML file or list with paths or file-like object - :param output_path: path to output PDF file/files. False means file will be returned as string + :param output_path: path to output image file/files. False means file will be returned as string :param options: (optional) dict with wkhtmltopdf global and page options, with or w/o '--' :param toc: (optional) dict with toc-specific wkhtmltopdf options, with or w/o '--' :param cover: (optional) string with url/filename with a cover html page @@ -95,7 +94,7 @@ def config(**kwargs): Constructs and returns a :class:`Config` with given options :param wkhtmltopdf: path to binary - :param meta_tag_prefix: the prefix for ``pdfkit`` specific meta tags + :param meta_tag_prefix: the prefix for ``imgkit`` specific meta tags """ return Config(**kwargs) diff --git a/imgkit/config.py b/imgkit/config.py index 00067a3..d705a53 100644 --- a/imgkit/config.py +++ b/imgkit/config.py @@ -1,30 +1,44 @@ # -*- coding: utf-8 -*- import subprocess -import sys +from subprocess import CalledProcessError class Config(object): - def __init__(self, wkhtmltoimage='', meta_tag_prefix='imgkit-'): - self.meta_tag_prefix = meta_tag_prefix + def __init__(self, wkhtmltoimage='', xvfb='', meta_tag_prefix='imgkit-'): + """ + Configure wkhtmltoimage, xvfb, meta_tag_prefix. + :param wkhtmltoimage: wkhtmltoimage path + :param xvfb: xvfb path + :param meta_tag_prefix: the prefix for `imgkit` specific meta tags - by default this is `imgkit-` + """ self.wkhtmltoimage = wkhtmltoimage - - self.xvfb = '' + self.xvfb = xvfb + self.meta_tag_prefix = meta_tag_prefix if not self.wkhtmltoimage: - if sys.platform == 'win32': - self.wkhtmltoimage = subprocess.Popen(['where', 'wkhtmltoimage'], - stdout=subprocess.PIPE).communicate()[0].strip() - else: - self.wkhtmltoimage = subprocess.Popen(['which', 'wkhtmltoimage'], - stdout=subprocess.PIPE).communicate()[0].strip() + # get wkhtmltoimage in *nix/windows server + # see https://github.com/jarrekk/imgkit/issues/57 for windows condition + for find_cmd in ('where', 'which'): + try: + self.wkhtmltoimage = subprocess.check_output([find_cmd, 'wkhtmltoimage']).strip() + break + except CalledProcessError: + self.wkhtmltoimage = '' + except OSError: + self.wkhtmltoimage = '' + if not self.xvfb: - if sys.platform == 'win32': - self.xvfb = subprocess.Popen(['where', 'xvfb-run'], - stdout=subprocess.PIPE).communicate()[0].strip() - else: - self.xvfb = subprocess.Popen(['which', 'xvfb-run'], - stdout=subprocess.PIPE).communicate()[0].strip() + # get xvfb in *nix/windows server + # see https://github.com/jarrekk/imgkit/issues/57 for windows condition + for find_cmd in ('where', 'which'): + try: + self.xvfb = subprocess.check_output([find_cmd, 'xvfb-run']).strip() + break + except CalledProcessError: + self.xvfb = '' + except OSError: + self.xvfb = '' try: with open(self.wkhtmltoimage): @@ -34,3 +48,11 @@ def __init__(self, wkhtmltoimage='', meta_tag_prefix='imgkit-'): 'If this file exists please check that this process can ' 'read it. Otherwise please install wkhtmltopdf - ' 'http://wkhtmltopdf.org\n'.format(self.wkhtmltoimage)) + if self.xvfb: + try: + with open(self.xvfb): + pass + except IOError: + raise IOError('No xvfb executable found: "{0}"\n' + 'If this file exists please check that this process can ' + 'read it. Otherwise please install xvfb -'.format(self.xvfb)) diff --git a/imgkit/imgkit.py b/imgkit/imgkit.py index 1d16da9..1aefa55 100644 --- a/imgkit/imgkit.py +++ b/imgkit/imgkit.py @@ -143,19 +143,21 @@ def _normalize_options(self, options): if '--' in key: normalized_key = self._normalize_arg(key) else: - normalized_key = '--%s' % self._normalize_arg(key) + normalized_key = '--{}'.format(self._normalize_arg(key)) if isinstance(value, (list, tuple)): for opt_val in value: - yield (normalized_key, opt_val) + yield normalized_key, opt_val else: - yield (normalized_key, str(value) if value else value) + yield normalized_key, str(value) if value else value - def _normalize_arg(self, arg): + @staticmethod + def _normalize_arg(arg): return arg.lower() - def _style_tag(self, stylesheet): - return "" % stylesheet + @staticmethod + def _style_tag(stylesheet): + return "".format(stylesheet) def _prepend_css(self, path): if self.source.isUrl() or isinstance(self.source.source, list): @@ -193,15 +195,14 @@ def _find_options_in_meta(self, content): dict: {config option: value} """ if (isinstance(content, io.IOBase) - or content.__class__.__name__ == 'StreamReaderWriter'): + or content.__class__.__name__ == 'StreamReaderWriter'): content = content.read() found = {} for x in re.findall(']*>', content): - if re.search('name=["\']%s' % self.config.meta_tag_prefix, x): - name = re.findall('name=["\']%s([^"\']*)' % - self.config.meta_tag_prefix, x)[0] + if re.search('name=["\']{}'.format(self.config.meta_tag_prefix), x): + name = re.findall('name=["\']{}([^"\']*)'.format(self.config.meta_tag_prefix), x)[0] found[name] = re.findall('content=["\']([^"\']*)', x)[0] return found @@ -209,8 +210,7 @@ def _find_options_in_meta(self, content): def to_img(self, path=None): args = self.command(path) - result = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE, - stderr=subprocess.PIPE) + result = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) # If the source is a string then we will pipe it into wkhtmltoimage. # If we want to add custom CSS to file then we read input file to @@ -231,10 +231,10 @@ def to_img(self, path=None): exit_code = result.returncode if 'cannot connect to X server' in stderr: - raise IOError('%s\n' + raise IOError('{}\n' 'You will need to run wkhtmltoimage within a "virtual" X server.\n' 'Go to the link below for more information\n' - 'http://wkhtmltopdf.org' % stderr) + 'http://wkhtmltopdf.org'.format(stderr)) if 'Error' in stderr: raise IOError('wkhtmltoimage reported an error:\n' + stderr) @@ -243,7 +243,8 @@ def to_img(self, path=None): xvfb_error = '' if 'QXcbConnection' in stderr: xvfb_error = 'You need to install xvfb(sudo apt-get install xvfb, yum install xorg-x11-server-Xvfb, etc), then add option: {"xvfb": ""}.' - raise IOError("wkhtmltoimage exited with non-zero code {0}. error:\n{1}\n\n{2}".format(exit_code, stderr, xvfb_error)) + raise IOError( + "wkhtmltoimage exited with non-zero code {0}. error:\n{1}\n\n{2}".format(exit_code, stderr, xvfb_error)) # Since wkhtmltoimage sends its output to stderr we will capture it # and properly send to stdout @@ -257,11 +258,11 @@ def to_img(self, path=None): with codecs.open(path, mode='rb') as f: text = f.read(4) if text == '': - raise IOError('Command failed: %s\n' - 'Check whhtmltoimage output without \'quiet\' ' - 'option' % ' '.join(args)) + raise IOError("Command failed: {}\n" + "Check whhtmltoimage output without " + "'quiet' option".format(' '.join(args))) return True except IOError as e: - raise IOError('Command failed: %s\n' - 'Check whhtmltoimage output without \'quiet\' option\n' - '%s ' % (' '.join(args)), e) + raise IOError("Command failed: {0}\n" + "Check whhtmltoimage output without " + "'quiet' option\n{1} ".format(' '.join(args), e)) diff --git a/imgkit/source.py b/imgkit/source.py index 44ca1bf..9b03007 100644 --- a/imgkit/source.py +++ b/imgkit/source.py @@ -8,7 +8,7 @@ def __init__(self, url_or_file, type_): self.source = url_or_file self.type = type_ - if self.type is 'file': + if self.type == 'file': self.checkFiles() def isUrl(self): @@ -26,10 +26,10 @@ def checkFiles(self): if isinstance(self.source, list): for path in self.source: if not os.path.exists(path): - raise IOError('No such file: %s' % path) + raise IOError('No such file: {}'.format(path)) else: if not hasattr(self.source, 'read') and not os.path.exists(self.source): - raise IOError('No such file: %s' % self.source) + raise IOError('No such file: {}'.format(self.source)) def isString(self): return 'string' in self.type diff --git a/realwork.txt b/realwork.txt deleted file mode 100644 index a90b6d3..0000000 --- a/realwork.txt +++ /dev/null @@ -1 +0,0 @@ -Wed May 22 00:00:00 2019 -0400764702 diff --git a/setup.py b/setup.py index 9a1b07c..62aa094 100644 --- a/setup.py +++ b/setup.py @@ -47,11 +47,10 @@ def long_description(): classifiers=[ 'Programming Language :: Python', 'Programming Language :: Python :: 2.7', - 'Programming Language :: Python :: 3.3', - 'Programming Language :: Python :: 3.4', - 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8', + 'Programming Language :: Python :: 3.9', 'Topic :: Text Processing', 'Topic :: Text Processing :: General', 'Topic :: Text Processing :: Markup', diff --git a/test/__init__.py b/test/__init__.py deleted file mode 100644 index 506263d..0000000 --- a/test/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# author: Kun Jia -# date: 6/30/17 -# email: me@jarrekk.com diff --git a/test/imgkit_test.py b/test/imgkit_test.py index 37815fa..1428a79 100755 --- a/test/imgkit_test.py +++ b/test/imgkit_test.py @@ -13,7 +13,6 @@ class TestIMGKitInitialization(unittest.TestCase): - """Test init""" def test_html_source(self): r = imgkit.IMGKit('