diff --git a/.flake8 b/.flake8 new file mode 100644 index 0000000..ab02425 --- /dev/null +++ b/.flake8 @@ -0,0 +1,90 @@ +# vim: set syntax=dosini: +[flake8] +exclude = .*,__pycache__ +max-line-length = 120 + +# B001 Do not use bare `except:` +# C408 Unnecessary dict call - rewrite as a literal +# D,DAR: this project has incomplete documentation +# E203 whitespace before ':' +# E221 multiple spaces before operator +# E303 too many blank lines +# E722 do not use bare 'except' +# E741 ambiguous variable name +# E800 Found commented out code +# N801 class name 'open_outfile' should use CapWords convention +# N806 variable in function should be lowercase +# P101 format string does contain unindexed parameters +# S101 Use of assert detected +# S324 Use of weak MD5 hash for security. Consider usedforsecurity=False +# S406 Using escape to parse untrusted XML data is known to be vulnerable to XML attacks +# S410 Using lxml to parse untrusted XML data is known to be vulnerable to XML attacks +# WPS100 Found wrong module name +# WPS110 Found wrong variable name +# WPS111 Found too short name +# WPS113 Found same alias import +# WPS114 Found underscored number name pattern +# WPS115 Found upper-case constant in a class +# WPS120 Found regular name with trailing underscore +# WPS2XX: Complexity +# WPS300 Found local folder import +# WPS301 Found dotted raw import: http.client +# WPS305 Found `f` string +# WPS306 Found class without a base class +# WPS309 Found reversed compare order +# WPS316 Found context manager with too many assignments +# WPS317 Found incorrect multi-line parameters +# WPS318 Found extra indentation +# WPS319 Found bracket in wrong position +# WPS322 Found incorrect multi-line string +# WPS323 Found `%` string formatting +# WPS326 Found implicit string concatenation +# WPS329 Found useless `except` case +# WPS330 Found unnecessary operator +# WPS336 Found explicit string concatenation +# WPS337 Found multiline conditions +# WPS347 Found vague import that may cause confusion +# WPS360 Found an unnecessary use of a raw string +# WPS361 Found an inconsistently structured comprehension +# WPS414 Found incorrect unpacking target +# WPS420 Found wrong keyword +# WPS421 Found wrong function call +# WPS429 Found multiple assign targets +# WPS430 Found nested function +# WPS431 Found nested class +# WPS432 Found magic number +# WPS433 Found nested import +# WPS437 Found protected attribute usage +# WPS440 Found block variables overlap +# WPS440 Found block variables overlap +# WPS441 Found control variable used after block +# WPS442 Found outer scope names shadowing +# WPS457 Found an infinite while loop +# WPS458 Found imports collision: argparse +# WPS460 Found single element destructuring +# WPS462 Wrong multiline string usage +# WPS463 Found a getter without a return value +# WPS473 Found too many empty lines in `def`: 6 > 5 +# WPS501 Found `finally` in `try` block without `except` +# WPS504 Found negated condition +# WPS505 Found nested `try` block +# WPS508 Found incorrect `not` with compare usage +# WPS509 Found incorrectly nested ternary +# WPS510 Found `in` used with a non-set container +# WPS515 Found `open()` used without a context manager +# WPS516 Found `type()` used to compare types +# WPS519 Found implicit `sum()` call +# WPS529 Found implicit `.get()` dict usage +# WPS531 Found simplifiable returning `if` condition in a function +# WPS602 Found using `@staticmethod` +# WPS604 Found incorrect node inside `class` body +# WPS605 Found method without arguments +# WPS608 Found incorrect `super()` call +# WPS609 Found direct magic attribute usage +# WPS613 Found incorrect `super()` call context +# WPS615 Found unpythonic getter or setter +extend-ignore = B001,C408,D,DAR,E203,E221,E303,E722,E741,E800,N801,N806,P101,S101,S324,S406,S410,WPS100,WPS110,WPS111,WPS113,WPS114,WPS115,WPS120,WPS2,WPS300,WPS301,WPS305,WPS306,WPS309,WPS316,WPS317,WPS318,WPS319,WPS322,WPS323,WPS326,WPS329,WPS330,WPS336,WPS337,WPS347,WPS360,WPS361,WPS414,WPS420,WPS421,WPS429,WPS430,WPS431,WPS432,WPS433,WPS437,WPS440,WPS440,WPS441,WPS442,WPS457,WPS458,WPS460,WPS462,WPS463,WPS473,WPS501,WPS504,WPS505,WPS508,WPS509,WPS510,WPS515,WPS516,WPS519,WPS529,WPS531,WPS602,WPS604,WPS605,WPS608,WPS609,WPS613,WPS615 + +# E131 continuation line unaligned for hanging indent +per-file-ignores = + tumblr_backup/is_reblog.py: E131 diff --git a/.gitignore b/.gitignore index fce19e4..edcb236 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,5 @@ -settings.py +/*.egg-info +/.*/ +/dist/ +*.pyc +__pycache__/ diff --git a/README.md b/README.md index 4d4fe95..026ecaf 100644 --- a/README.md +++ b/README.md @@ -1,26 +1,353 @@ -# tumblr-utils +# tumblr-backup -This is a collection of utilities dealing with Tumblr blogs. +### About this fork -- `tumble.py` creates new posts from RSS or Atom feeds -- `tumblr_backup.py` makes a local backup of posts and images -- `mail_export.py` mails tagged links to a recipient list +This is a fork of bbolli's +[tumblr-utils](https://github.com/bbolli/tumblr-utils), with a focus on +tumblr\_backup.py. It adds Python 3 compatibility, various bug fixes, a few +enhancements to normal operation, support for dashboard-only blogs, and several +other features - see the output of `tumblr-backup --help` for the full list of +options. -These scripts are or have been useful to me over the years. +--- -More documentation can be found in each script's docstring or in -[tumblr_backup.md](https://github.com/bbolli/tumblr-utils/blob/master/tumblr_backup.md). +## 0. Description -The utilities run under Python 2.7. +tumblr-backup is a script that backs up your [Tumblr](http://tumblr.com) blog +locally. -### Notice +The backup includes all images both from inline text as well as photo posts. An +index links to monthly pages, which contain all the posts from the respective +month with links to single post pages. Command line options select which posts +to backup and set the output format. The audio and video files can also be +saved. -On 2015-06-04, I made the v2 API the default on the master branch. The former -master branch using the v1 API is still available on Github as `api-v1`, but -will no longer be updated. The one feature that's only available with the old -API is the option to backup password-protected blogs. There's no way to pass -a password in Tumblr's v2 API. +By default, all posts of a blog are backed up in minimally styled HTML5. -### License +You can see an example of its output [on my home page](http://drbeat.li/tumblr). -[GPL3](http://www.gnu.org/licenses/gpl-3.0.txt). + +## 1. Installation + +1. `pip install tumblr-backup` +2. Create an "app" at https://www.tumblr.com/oauth/apps. Follow the instructions + there; most values entered don't matter. +3. `tumblr-backup --set-api-key API_KEY`, where API\_KEY is the OAuth Consumer + Token from the app created in the previous step. +4. Run `tumblr-backup blog-name` as often as you like manually or from a cron + job. + +There are several optional dependencies that enable additional features: + +1. To backup audio and video, install `tumblr-backup[video]`, or you can + manually install either yt-dlp or youtube\_dl. If you need HTTP cookies to + download, use an appropriate browser plugin to extract the cookie(s) into a + file and use option `--cookiefile=file`. See + [issue 132](https://github.com/bbolli/tumblr-utils/issues/132). +2. To enable EXIF tagging, install `tumblr-backup[exif]`, or you can manually + install py3exiv2. +3. To back up notes with the --save-notes option, install + `tumblr-backup[bs4]`, or you can manually install beautifulsoup4 and lxml. +4. To use the -F/--filter option to filter the downloaded posts with arbitrary + rules based on their metadata, install `tumblr-backup[jq]`. Alternatively, + you can manually install the [jq](https://github.com/mwilliamson/jq.py) + module. +5. To install tumblr-backup with all optional features available, use + `pip install tumblr-backup[all]`. + + +## 2. Usage + +### Synopsis + + tumblr-backup [options] blog-name ... + +### Options + +``` +positional arguments: + blogs + +options: + -h, --help show this help message and exit + -O OUTDIR, --outdir OUTDIR + set the output directory (default: blog-name) + -D, --dirs save each post in its own folder + -q, --quiet suppress progress messages + -i, --incremental incremental backup mode + -l, --likes save a blog's likes, not its posts + -k, --skip-images do not save images; link to Tumblr instead + --save-video save all video files + --save-video-tumblr save only Tumblr video files + --save-audio save audio files + --save-notes save a list of notes for each post + --copy-notes copy the notes list from a previous archive (inverse: + --no-copy-notes) + --notes-limit COUNT limit requested notes to COUNT, per-post + --cookiefile COOKIEFILE + cookie file for youtube-dl, --save-notes, and svc API + -j, --json save the original JSON source + -b, --blosxom save the posts in blosxom format + -r, --reverse-month reverse the post order in the monthly archives + -R, --reverse-index reverse the index file order + --tag-index also create an archive per tag + -a HOUR, --auto HOUR do a full backup at HOUR hours, otherwise do an + incremental backup (useful for cron jobs) + -n COUNT, --count COUNT + save only COUNT posts + -s SKIP, --skip SKIP skip the first SKIP posts + -p PERIOD, --period PERIOD + limit the backup to PERIOD ('y', 'm', 'd', + YYYY[MM[DD]][Z], or START,END) + -N COUNT, --posts-per-page COUNT + set the number of posts per monthly page, 0 for + unlimited + -Q REQUEST, --request REQUEST + save posts matching the request + TYPE:TAG:TAG:…,TYPE:TAG:…,…. TYPE can be text, quote, + link, answer, video, audio, photo, chat or any; TAGs + can be omitted or a colon-separated list. Example: -Q + any:personal,quote,photo:me:self + -t REQUEST, --tags REQUEST + save only posts tagged TAGS (comma-separated values; + case-insensitive) + -T REQUEST, --type REQUEST + save only posts of type TYPE (comma-separated values + from text, quote, link, answer, video, audio, photo, + chat) + -F FILTER, --filter FILTER + save posts matching a jq filter (needs jq module) + --no-reblog don't save reblogged posts + --only-reblog save only reblogged posts + -I FMT, --image-names FMT + image filename format ('o'=original, 'i'=, + 'bi'=_) + -e KW, --exif KW add EXIF keyword tags to each picture (comma-separated + values; '-' to remove all tags, '' to add no extra + tags) + -S, --no-ssl-verify ignore SSL verification errors + --prev-archives DIRS comma-separated list of directories (one per blog) + containing previous blog archives + --no-post-clobber Do not re-download existing posts + --no-server-timestamps + don't set local timestamps from HTTP headers + --hostdirs Generate host-prefixed directories for media + --user-agent USER_AGENT + User agent string to use with HTTP requests + --skip-dns-check Skip DNS checks for internet access + --threads THREADS number of threads to use for post retrieval + --continue Continue an incomplete first backup + --ignore-diffopt Force backup over an incomplete archive with different + options + --no-get Don't retrieve files not found in --prev-archives + --reuse-json Reuse the API responses saved with --json (implies + --copy-notes) + --internet-archive Fall back to the Internet Archive for Tumblr media 403 + and 404 responses + --media-list Save post media URLs to media.json + --id-file FILE file containing a list of post IDs to save, one per + line + --json-info Just print some info for each blog, don't make a + backup +``` + +### Arguments + +_blog-name_: The name of the blog to backup. + +If your blog is under `.tumblr.com`, you can give just the first domain name +part; if your blog is under your own domain, give the whole domain name. You +can give more than one _blog-name_ to backup multiple blogs in one go. + +The default blog name(s) can be changed by copying `settings.py.example` to +`settings.py` and adding the name(s) to the `DEFAULT_BLOGS` list. + +### Environment variables + +`LC_ALL`, `LC_TIME`, `LANG`: These variables, in decreasing importance, +determine the locale for month names and the date/time format. + +### Exit code + +The exit code is 0 if at least one post has been backed up, 1 if no post has +been backed up, 2 on invocation errors, 3 if the backup was interrupted, or 4 +on HTTP errors. + + +## 3. Operation + +By default, tumblr-backup backs up all posts in HTML format. + +The generated directory structure looks like this: + + ./ - the current directory + / - your blog backup + index.html - table of contents with links to the monthly pages + backup.css - the default backup style sheet + custom.css - the user's style sheet (optional) + override.css - the user's style sheet override (optional) + archive/ + .html - the monthly pages + … + posts/ + .html - the single post pages + … + media/ + - image files +