From 091e37ce418b9f97e6362952fb66d4a5652a7a56 Mon Sep 17 00:00:00 2001 From: Jarrekk Date: Wed, 22 May 2019 00:00:00 -0400 Subject: [PATCH] new version --- .travis.yml | 12 ++++++---- README.md | 55 ++++++++++++++++++++++---------------------- imgkit/__init__.py | 14 +++++------- imgkit/api.py | 7 +++--- imgkit/config.py | 56 +++++++++++++++++++++++++++++++-------------- imgkit/imgkit.py | 43 +++++++++++++++++----------------- imgkit/source.py | 6 ++--- realwork.txt | 1 - setup.py | 5 ++-- test/__init__.py | 5 ---- test/imgkit_test.py | 18 +++++---------- travis/init.sh | 10 ++++---- 12 files changed, 120 insertions(+), 112 deletions(-) delete mode 100644 realwork.txt delete mode 100644 test/__init__.py diff --git a/.travis.yml b/.travis.yml index c7a35f9..ffbe460 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,11 +4,15 @@ language: python matrix: include: - python: 2.7 - - python: 3.4 - - python: 3.5 + dist: bionic - python: 3.6 + dist: bionic - python: 3.7 - dist: xenial + dist: bionic + - python: 3.8 + dist: bionic + - python: 3.9 + dist: bionic before_script: - sh ./travis/init.sh @@ -29,7 +33,7 @@ deploy: tags: true distributions: sdist bdist_wheel repo: jarrekk/imgkit - python: 2.7 + python: 3.7 notifications: email: me@jarrekk.com diff --git a/README.md b/README.md index 8c21fe0..2242b06 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,11 @@ # IMGKit: Python library of HTML to IMG wrapper [![Build Status](https://travis-ci.org/jarrekk/imgkit.svg?branch=master)](https://travis-ci.org/jarrekk/imgkit) -[![Code Coverage](https://codecov.io/github/jarrekk/imgkit/branch/master/graph/badge.svg)](https://codecov.io/github/jarrekk/imgkit/) +[![codecov](https://codecov.io/gh/jarrekk/imgkit/branch/master/graph/badge.svg?token=pNl4TtuAzz)](https://codecov.io/gh/jarrekk/imgkit) [![Codacy Badge](https://api.codacy.com/project/badge/Grade/aa1f67f04ff24bb080b7f8c8a9b7b8b1)](https://www.codacy.com/app/jarrekk/imgkit?utm_source=github.com&utm_medium=referral&utm_content=jarrekk/imgkit&utm_campaign=Badge_Grade) [![PyPI version](https://badge.fury.io/py/imgkit.svg)](https://badge.fury.io/py/imgkit) -``` +``` text _____ __ __ _____ _ __ _ _ |_ _| | \/ | / ____| | |/ / (_) | | | | | \ / | | | __ | ' / _ | |_ @@ -27,21 +27,23 @@ Python 2 and 3 wrapper for wkhtmltoimage utility to convert HTML to IMG using We 2. Install wkhtmltopdf: - * Debian/Ubuntu: + * Debian/Ubuntu: - ``` bash - sudo apt-get install wkhtmltopdf - ``` + ``` bash + sudo apt-get install wkhtmltopdf + ``` + + **Warning!** Version in debian/ubuntu repos have reduced functionality (because it compiled without the wkhtmltopdf QT patches), such as adding outlines, headers, footers, TOC etc. To use this options you should install static binary from [wkhtmltopdf](http://wkhtmltopdf.org/) site or you can use this [script](https://github.com/jarrekk/imgkit/blob/master/travis/init.sh). - **Warning!** Version in debian/ubuntu repos have reduced functionality (because it compiled without the wkhtmltopdf QT patches), such as adding outlines, headers, footers, TOC etc. To use this options you should install static binary from [wkhtmltopdf](http://wkhtmltopdf.org/) site or you can use this [script](https://github.com/jarrekk/imgkit/blob/master/travis/init.sh). + * MacOSX: - * MacOSX + ``` bash + brew install --cask wkhtmltopdf + ``` - ``` bash - brew install wkhtmltopdf - ``` - - * Windows and other options: check [wkhtmltopdf homepage](http://wkhtmltopdf.org/) for binary installers or [wiki page](https://github.com/pdfkit/pdfkit/wiki/Installing-WKHTMLTOPDF). + * Windows and other options: + + Check [wkhtmltopdf homepage](http://wkhtmltopdf.org/) for binary installers or [wiki page](https://github.com/pdfkit/pdfkit/wiki/Installing-WKHTMLTOPDF). ## Usage @@ -55,13 +57,6 @@ imgkit.from_file('test.html', 'out.jpg') imgkit.from_string('Hello!', 'out.jpg') ``` -You can pass a list with multiple URLs or files: - -``` python -imgkit.from_url(['google.com', 'yandex.ru', 'engadget.com'], 'out.jpg') -imgkit.from_file(['file1.html', 'file2.html'], 'out.jpg') -``` - Also you can pass an opened file: ``` python @@ -167,26 +162,30 @@ imgkit.from_string(body, 'out.png') Each API call takes an optional config paramater. This should be an instance of `imgkit.config()` API call. It takes the config options as initial paramaters. The available options are: -* `wkhtmltoimage` - the location of the `wkhtmltoimage` binary. By default `imgkit` will attempt to locate this using which` (on UNIX type systems) or where` (on Windows). -* `meta_tag_prefix` - the prefix for `imgkit` specific meta tags - by default this is `imgkit-` + * `wkhtmltoimage` - the location of the `wkhtmltoimage` binary. By default `imgkit` will attempt to locate this using which` (on UNIX type systems) or where` (on Windows). + * `xvfb` - the location of the `xvfb-run` binary. By default `imgkit` will attempt to locate this using which` (on UNIX type systems) or where` (on Windows). + * `meta_tag_prefix` - the prefix for `imgkit` specific meta tags - by default this is `imgkit-` -Example - for when `wkhtmltopdf` is not in `$PATH`: +Example - for when `wkhtmltopdf` or `xvfb` is not in `$PATH`: ``` python -config = imgkit.config(wkhtmltoimage='/opt/bin/wkhtmltoimage') +config = imgkit.config(wkhtmltoimage='/opt/bin/wkhtmltoimage', xvfb='/opt/bin/xvfb-run') imgkit.from_string(html_string, output_file, config=config) ``` - ## Troubleshooting * `IOError: 'No wkhtmltopdf executable found'`: + + Make sure that you have wkhtmltoimage in your `$PATH` or set via custom configuration (see preceding section). *where wkhtmltoimage* in Windows or *which wkhtmltoimage* on Linux should return actual path to binary. - Make sure that you have wkhtmltoimage in your `$PATH` or set via custom configuration (see preceding section). *where wkhtmltoimage* in Windows or *which wkhtmltoimage* on Linux should return actual path to binary. +* `IOError: 'No xvfb executable found'`: + + Make sure that you have xvfb-run in your `$PATH` or set via custom configuration (see preceding section). *where xvfb* in Windows or *which xvfb-run* or *which Xvfb* on Linux should return actual path to binary. * `IOError: 'Command Failed'`: - - This error means that IMGKit was unable to process an input. You can try to directly run a command from error message and see what error caused failure (on some wkhtmltoimage versions this can be cause by segmentation faults) + + This error means that IMGKit was unable to process an input. You can try to directly run a command from error message and see what error caused failure (on some wkhtmltoimage versions this can be cause by segmentation faults) ## Credit diff --git a/imgkit/__init__.py b/imgkit/__init__.py index 53b227c..fa53f64 100644 --- a/imgkit/__init__.py +++ b/imgkit/__init__.py @@ -1,13 +1,11 @@ # -*- coding: utf-8 -*- -""" -Wkhtmltopdf python wrapper to convert html to image using the webkit rendering engine and qt -""" +"Wkhtmltopdf python wrapper to convert html to image using the webkit rendering engine and qt" -__author__ = 'jarrekk' -__contact__ = 'me@jarrekk.com' -__version__ = '1.0.2' -__homepage__ = 'https://github.com/jarrekk/imgkit' -__license__ = 'MIT' +__author__ = "jarrekk" +__contact__ = "me@jarrekk.com" +__version__ = "1.1.0" +__homepage__ = "https://github.com/jarrekk/imgkit" +__license__ = "MIT" from .imgkit import IMGKit from .api import from_url, from_file, from_string, config diff --git a/imgkit/api.py b/imgkit/api.py index 7366c50..6f34b67 100644 --- a/imgkit/api.py +++ b/imgkit/api.py @@ -14,11 +14,10 @@ def from_url(url, Convert URL/URLs to IMG file/files :param url: URL or list of URLs to be saved - :param output_path: path to output PDF file/files. False means file will be returned as string + :param output_path: path to output image file/files. False means file will be returned as string :param options: (optional) dict with wkhtmltopdf global and page options, with or w/o '--' :param toc: (optional) dict with toc-specific wkhtmltopdf options, with or w/o '--' :param cover: (optional) string with url/filename with a cover html page - :param css: style of input :param config: (optional) instance of imgkit.config.Config() :param cover_first: (optional) if True, cover always precedes TOC :return: True when success @@ -44,7 +43,7 @@ def from_file(filename, Convert HTML file/files to IMG file/files :param filename: path of HTML file or list with paths or file-like object - :param output_path: path to output PDF file/files. False means file will be returned as string + :param output_path: path to output image file/files. False means file will be returned as string :param options: (optional) dict with wkhtmltopdf global and page options, with or w/o '--' :param toc: (optional) dict with toc-specific wkhtmltopdf options, with or w/o '--' :param cover: (optional) string with url/filename with a cover html page @@ -95,7 +94,7 @@ def config(**kwargs): Constructs and returns a :class:`Config` with given options :param wkhtmltopdf: path to binary - :param meta_tag_prefix: the prefix for ``pdfkit`` specific meta tags + :param meta_tag_prefix: the prefix for ``imgkit`` specific meta tags """ return Config(**kwargs) diff --git a/imgkit/config.py b/imgkit/config.py index 00067a3..d705a53 100644 --- a/imgkit/config.py +++ b/imgkit/config.py @@ -1,30 +1,44 @@ # -*- coding: utf-8 -*- import subprocess -import sys +from subprocess import CalledProcessError class Config(object): - def __init__(self, wkhtmltoimage='', meta_tag_prefix='imgkit-'): - self.meta_tag_prefix = meta_tag_prefix + def __init__(self, wkhtmltoimage='', xvfb='', meta_tag_prefix='imgkit-'): + """ + Configure wkhtmltoimage, xvfb, meta_tag_prefix. + :param wkhtmltoimage: wkhtmltoimage path + :param xvfb: xvfb path + :param meta_tag_prefix: the prefix for `imgkit` specific meta tags - by default this is `imgkit-` + """ self.wkhtmltoimage = wkhtmltoimage - - self.xvfb = '' + self.xvfb = xvfb + self.meta_tag_prefix = meta_tag_prefix if not self.wkhtmltoimage: - if sys.platform == 'win32': - self.wkhtmltoimage = subprocess.Popen(['where', 'wkhtmltoimage'], - stdout=subprocess.PIPE).communicate()[0].strip() - else: - self.wkhtmltoimage = subprocess.Popen(['which', 'wkhtmltoimage'], - stdout=subprocess.PIPE).communicate()[0].strip() + # get wkhtmltoimage in *nix/windows server + # see https://github.com/jarrekk/imgkit/issues/57 for windows condition + for find_cmd in ('where', 'which'): + try: + self.wkhtmltoimage = subprocess.check_output([find_cmd, 'wkhtmltoimage']).strip() + break + except CalledProcessError: + self.wkhtmltoimage = '' + except OSError: + self.wkhtmltoimage = '' + if not self.xvfb: - if sys.platform == 'win32': - self.xvfb = subprocess.Popen(['where', 'xvfb-run'], - stdout=subprocess.PIPE).communicate()[0].strip() - else: - self.xvfb = subprocess.Popen(['which', 'xvfb-run'], - stdout=subprocess.PIPE).communicate()[0].strip() + # get xvfb in *nix/windows server + # see https://github.com/jarrekk/imgkit/issues/57 for windows condition + for find_cmd in ('where', 'which'): + try: + self.xvfb = subprocess.check_output([find_cmd, 'xvfb-run']).strip() + break + except CalledProcessError: + self.xvfb = '' + except OSError: + self.xvfb = '' try: with open(self.wkhtmltoimage): @@ -34,3 +48,11 @@ def __init__(self, wkhtmltoimage='', meta_tag_prefix='imgkit-'): 'If this file exists please check that this process can ' 'read it. Otherwise please install wkhtmltopdf - ' 'http://wkhtmltopdf.org\n'.format(self.wkhtmltoimage)) + if self.xvfb: + try: + with open(self.xvfb): + pass + except IOError: + raise IOError('No xvfb executable found: "{0}"\n' + 'If this file exists please check that this process can ' + 'read it. Otherwise please install xvfb -'.format(self.xvfb)) diff --git a/imgkit/imgkit.py b/imgkit/imgkit.py index 1d16da9..1aefa55 100644 --- a/imgkit/imgkit.py +++ b/imgkit/imgkit.py @@ -143,19 +143,21 @@ def _normalize_options(self, options): if '--' in key: normalized_key = self._normalize_arg(key) else: - normalized_key = '--%s' % self._normalize_arg(key) + normalized_key = '--{}'.format(self._normalize_arg(key)) if isinstance(value, (list, tuple)): for opt_val in value: - yield (normalized_key, opt_val) + yield normalized_key, opt_val else: - yield (normalized_key, str(value) if value else value) + yield normalized_key, str(value) if value else value - def _normalize_arg(self, arg): + @staticmethod + def _normalize_arg(arg): return arg.lower() - def _style_tag(self, stylesheet): - return "" % stylesheet + @staticmethod + def _style_tag(stylesheet): + return "".format(stylesheet) def _prepend_css(self, path): if self.source.isUrl() or isinstance(self.source.source, list): @@ -193,15 +195,14 @@ def _find_options_in_meta(self, content): dict: {config option: value} """ if (isinstance(content, io.IOBase) - or content.__class__.__name__ == 'StreamReaderWriter'): + or content.__class__.__name__ == 'StreamReaderWriter'): content = content.read() found = {} for x in re.findall(']*>', content): - if re.search('name=["\']%s' % self.config.meta_tag_prefix, x): - name = re.findall('name=["\']%s([^"\']*)' % - self.config.meta_tag_prefix, x)[0] + if re.search('name=["\']{}'.format(self.config.meta_tag_prefix), x): + name = re.findall('name=["\']{}([^"\']*)'.format(self.config.meta_tag_prefix), x)[0] found[name] = re.findall('content=["\']([^"\']*)', x)[0] return found @@ -209,8 +210,7 @@ def _find_options_in_meta(self, content): def to_img(self, path=None): args = self.command(path) - result = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE, - stderr=subprocess.PIPE) + result = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) # If the source is a string then we will pipe it into wkhtmltoimage. # If we want to add custom CSS to file then we read input file to @@ -231,10 +231,10 @@ def to_img(self, path=None): exit_code = result.returncode if 'cannot connect to X server' in stderr: - raise IOError('%s\n' + raise IOError('{}\n' 'You will need to run wkhtmltoimage within a "virtual" X server.\n' 'Go to the link below for more information\n' - 'http://wkhtmltopdf.org' % stderr) + 'http://wkhtmltopdf.org'.format(stderr)) if 'Error' in stderr: raise IOError('wkhtmltoimage reported an error:\n' + stderr) @@ -243,7 +243,8 @@ def to_img(self, path=None): xvfb_error = '' if 'QXcbConnection' in stderr: xvfb_error = 'You need to install xvfb(sudo apt-get install xvfb, yum install xorg-x11-server-Xvfb, etc), then add option: {"xvfb": ""}.' - raise IOError("wkhtmltoimage exited with non-zero code {0}. error:\n{1}\n\n{2}".format(exit_code, stderr, xvfb_error)) + raise IOError( + "wkhtmltoimage exited with non-zero code {0}. error:\n{1}\n\n{2}".format(exit_code, stderr, xvfb_error)) # Since wkhtmltoimage sends its output to stderr we will capture it # and properly send to stdout @@ -257,11 +258,11 @@ def to_img(self, path=None): with codecs.open(path, mode='rb') as f: text = f.read(4) if text == '': - raise IOError('Command failed: %s\n' - 'Check whhtmltoimage output without \'quiet\' ' - 'option' % ' '.join(args)) + raise IOError("Command failed: {}\n" + "Check whhtmltoimage output without " + "'quiet' option".format(' '.join(args))) return True except IOError as e: - raise IOError('Command failed: %s\n' - 'Check whhtmltoimage output without \'quiet\' option\n' - '%s ' % (' '.join(args)), e) + raise IOError("Command failed: {0}\n" + "Check whhtmltoimage output without " + "'quiet' option\n{1} ".format(' '.join(args), e)) diff --git a/imgkit/source.py b/imgkit/source.py index 44ca1bf..9b03007 100644 --- a/imgkit/source.py +++ b/imgkit/source.py @@ -8,7 +8,7 @@ def __init__(self, url_or_file, type_): self.source = url_or_file self.type = type_ - if self.type is 'file': + if self.type == 'file': self.checkFiles() def isUrl(self): @@ -26,10 +26,10 @@ def checkFiles(self): if isinstance(self.source, list): for path in self.source: if not os.path.exists(path): - raise IOError('No such file: %s' % path) + raise IOError('No such file: {}'.format(path)) else: if not hasattr(self.source, 'read') and not os.path.exists(self.source): - raise IOError('No such file: %s' % self.source) + raise IOError('No such file: {}'.format(self.source)) def isString(self): return 'string' in self.type diff --git a/realwork.txt b/realwork.txt deleted file mode 100644 index a90b6d3..0000000 --- a/realwork.txt +++ /dev/null @@ -1 +0,0 @@ -Wed May 22 00:00:00 2019 -0400764702 diff --git a/setup.py b/setup.py index 9a1b07c..62aa094 100644 --- a/setup.py +++ b/setup.py @@ -47,11 +47,10 @@ def long_description(): classifiers=[ 'Programming Language :: Python', 'Programming Language :: Python :: 2.7', - 'Programming Language :: Python :: 3.3', - 'Programming Language :: Python :: 3.4', - 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8', + 'Programming Language :: Python :: 3.9', 'Topic :: Text Processing', 'Topic :: Text Processing :: General', 'Topic :: Text Processing :: Markup', diff --git a/test/__init__.py b/test/__init__.py deleted file mode 100644 index 506263d..0000000 --- a/test/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# author: Kun Jia -# date: 6/30/17 -# email: me@jarrekk.com diff --git a/test/imgkit_test.py b/test/imgkit_test.py index 37815fa..1428a79 100755 --- a/test/imgkit_test.py +++ b/test/imgkit_test.py @@ -13,7 +13,6 @@ class TestIMGKitInitialization(unittest.TestCase): - """Test init""" def test_html_source(self): r = imgkit.IMGKit('

Oh hai

', 'string') @@ -112,7 +111,6 @@ def test_custom_config(self): class TestIMGKitCommandGeneration(unittest.TestCase): - """Test command() method""" def test_command_construction(self): r = imgkit.IMGKit('html', 'string', options={'format': 'jpg', 'toc-l1-font-size': 12}) @@ -291,7 +289,6 @@ def test_filter_empty_and_none_values_in_opts(self): class TestIMGKitGeneration(unittest.TestCase): - """Test to_img() method""" def setUp(self): pass @@ -323,7 +320,6 @@ def test_raise_error_with_invalid_file_path(self): imgkit.IMGKit(paths, 'file') def test_stylesheet_adding_to_the_head(self): - # TODO rewrite this part of pdfkit.py r = imgkit.IMGKit('Hai!', 'string', css='fixtures/example.css') @@ -331,7 +327,7 @@ def test_stylesheet_adding_to_the_head(self): css = f.read() r._prepend_css('fixtures/example.css') - self.assertIn('' % css, r.source.to_s()) + self.assertIn(''.format(css), r.source.to_s()) def test_stylesheet_adding_without_head_tag(self): r = imgkit.IMGKit('Hai!', 'string', @@ -341,10 +337,9 @@ def test_stylesheet_adding_without_head_tag(self): css = f.read() r._prepend_css('fixtures/example.css') - self.assertIn('' % css, r.source.to_s()) + self.assertIn(''.format(css), r.source.to_s()) def test_multiple_stylesheets_adding_to_the_head(self): - # TODO rewrite this part of pdfkit.py css_files = ['fixtures/example.css', 'fixtures/example2.css'] r = imgkit.IMGKit('Hai!', 'string', css=css_files) @@ -355,7 +350,7 @@ def test_multiple_stylesheets_adding_to_the_head(self): css.append(f.read()) r._prepend_css(css_files) - self.assertIn('' % "\n".join(css), r.source.to_s()) + self.assertIn(''.format("\n".join(css)), r.source.to_s()) def test_multiple_stylesheet_adding_without_head_tag(self): css_files = ['fixtures/example.css', 'fixtures/example2.css'] @@ -368,7 +363,7 @@ def test_multiple_stylesheet_adding_without_head_tag(self): css.append(f.read()) r._prepend_css(css_files) - self.assertIn('' % "\n".join(css), r.source.to_s()) + self.assertIn(''.format("\n".join(css)), r.source.to_s()) def test_stylesheet_throw_error_when_url(self): r = imgkit.IMGKit('http://ya.ru', 'url', css='fixtures/example.css') @@ -388,11 +383,11 @@ def test_wkhtmltoimage_error_handling(self): with self.assertRaises(IOError): r.to_img() - def test_pdf_generation_from_file_like(self): + def test_image_generation_from_file(self): with open('fixtures/example.html', 'r') as f: r = imgkit.IMGKit(f, 'file') output = r.to_img() - self.assertEqual(output[:4], b'\xff\xd8\xff\xe0') # TODO img + self.assertEqual(output[:4], b'\xff\xd8\xff\xe0') def test_raise_error_with_wrong_css_path(self): css = 'fixtures/wrongpath.css' @@ -413,7 +408,6 @@ def test_raise_error_if_bad_wkhtmltoimage_option(self): class TestIMGKitAPI(unittest.TestCase): - """Test API""" def test_from_string(self): pic = imgkit.from_string('hello imgkit!', 'out.jpg') diff --git a/travis/init.sh b/travis/init.sh index ea2b3f6..bf8faba 100644 --- a/travis/init.sh +++ b/travis/init.sh @@ -1,9 +1,7 @@ #!/usr/bin/env bash -sudo apt-get install -y openssl build-essential xorg libssl-dev xvfb +sudo apt update +sudo apt install -y xvfb +wget https://github.com/wkhtmltopdf/packaging/releases/download/0.12.6-1/wkhtmltox_0.12.6-1.bionic_amd64.deb +sudo apt install -y ./wkhtmltox_0.12.6-1.bionic_amd64.deb pip install coverage -wget https://downloads.wkhtmltopdf.org/0.12/0.12.4/wkhtmltox-0.12.4_linux-generic-amd64.tar.xz -tar xf wkhtmltox-0.12.4_linux-generic-amd64.tar.xz -cd wkhtmltox -sudo chown root:root bin/wkhtmltopdf -sudo cp -r * /usr/