-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit b037c3a
Showing
2 changed files
with
187 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
Backing up your Picplz account | ||
============================== | ||
|
||
You need to have python installed (you do if you're on a recent OS X and likely on a recent Linux). | ||
|
||
Put the code into a folder where you want the backup done, then run (of course you need to substitute [username] with your picplz username / email) | ||
|
||
python picplzexport.py [username] | ||
|
||
You will be prompted to enter your password (we don't want it lying around in .bash_history). | ||
|
||
The run might take a while. When it's done you'll have: | ||
|
||
- downloaded full images (picplz_[id].jpg) | ||
- an html (picplz_[username]_backup.html) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,172 @@ | ||
import sys, os, urllib, urllib2, json, time, getpass | ||
|
||
try: | ||
from bs4 import BeautifulSoup | ||
def get_attr(tag, attr): | ||
return tag.attrs[attr] | ||
except: | ||
try: | ||
from BeautifulSoup import BeautifulSoup | ||
def get_attr(tag, attr): | ||
for t in tag.attrs: | ||
if t[0] == attr: | ||
return t[1] | ||
return None | ||
except: | ||
print """You don't have BeautifulSoup installed. | ||
You can do that with one of the following commands: | ||
easy_install beautifulsoup4 | ||
pip install beautifulsoup4 | ||
apt-get install python-beautifulsoup4 | ||
You can also install BeautifulSoup 3. | ||
More info at http://www.crummy.com/software/BeautifulSoup/""" | ||
sys.exit() | ||
|
||
root = "http://picplz.com" | ||
referer = None | ||
baseheaders = { | ||
"Accept": "text/html", | ||
"User-Agent": "Mozilla/5.0 (PicPlz Downloader)" | ||
} | ||
photos = [] | ||
|
||
cookieprocessor = urllib2.HTTPCookieProcessor() | ||
opener = urllib2.build_opener(cookieprocessor) | ||
urllib2.install_opener(opener) | ||
|
||
def make_request(url, data=None, headers=None): | ||
h = {} | ||
h.update(baseheaders) | ||
h.update(headers or {}) | ||
req = urllib2.Request("%s%s" % (root, url), urllib.urlencode(data) if data else None, h) | ||
global referer | ||
if referer: | ||
req.add_unredirected_header("Referer", referer) | ||
res = urllib2.urlopen(req) | ||
referer = res.geturl() | ||
return res | ||
|
||
def get_photos(username, password): | ||
print "Getting CSRF token..." | ||
res = make_request("/login/?next=/yourphotos/") | ||
csrf = "" | ||
for c in res.headers.getheaders("set-cookie"): | ||
if c.find("csrftoken=") == 0: | ||
csrf = c.split(";")[0].split("=")[1] | ||
print "Logging in & fetching first photos..." | ||
res = make_request("/login/?next=/yourphotos/", {"email": username, "password": password, "csrfmiddlewaretoken": csrf}) | ||
html = res.read() | ||
last_id = html.split("\"last_id\":")[1].split("}")[0] | ||
extract_photos(html) | ||
if last_id: | ||
get_more_photos(last_id) | ||
download_photos() | ||
|
||
def get_more_photos(last_id): | ||
print "Fetching more photos (%s)" % last_id | ||
data = { | ||
"_": int(time.time()), | ||
"last_id": last_id, | ||
"predicate": "yourphotos", | ||
"view_type": "" | ||
} | ||
res = make_request("/api/v1/picfeed_get?%s" % urllib.urlencode(data), None, { | ||
"Accept": "application/json, */*", | ||
"X-Requested-With": "XMLHttpRequest", | ||
}) | ||
obj = json.loads(res.read()) | ||
if obj.get("value"): | ||
v = obj.get("value") | ||
extract_photos(v.get("html")) | ||
if v.get("has_next"): | ||
get_more_photos(v.get("last_id")) | ||
|
||
def extract_photos(html): | ||
global photos | ||
soup = BeautifulSoup(html) | ||
for d in soup("div", attrs={"class": "pic line"}): | ||
url = get_attr(d.find("a", attrs={"class": "download"}), "href") | ||
p = { | ||
"title": "", | ||
"id": url.split("/")[-3], | ||
"url": url, | ||
"filename": photo_name(url) | ||
} | ||
title = d.find("div", attrs={"class": "caption line"}) | ||
if title: | ||
p["title"] = title.text | ||
photos.append(p) | ||
|
||
def photo_name(url): | ||
return "picplz_%s.jpg" % url.split("/")[-3] | ||
|
||
def download_photos(): | ||
global photos | ||
print "%d photos extracted" % len(photos) | ||
e = 0 | ||
n = 0 | ||
for p in photos: | ||
filename = p["filename"] | ||
if not os.path.exists(filename): | ||
download_file(p["url"], p["filename"]) | ||
n += 1 | ||
else: | ||
e += 1 | ||
print "%d photo(s) downloaded, %d already existed." % (n, e) | ||
|
||
def download_file(url, filename): | ||
u = urllib2.urlopen(url) | ||
f = open(filename, 'wb') | ||
meta = u.info() | ||
file_size = int(meta.getheaders("Content-Length")[0]) | ||
print "Downloading: %s Bytes: %s" % (filename, file_size) | ||
file_size_dl = 0 | ||
block_sz = 8192 | ||
while True: | ||
buffer = u.read(block_sz) | ||
if not buffer: | ||
break | ||
file_size_dl += len(buffer) | ||
f.write(buffer) | ||
status = r"%10d [%3.2f%%]" % (file_size_dl, file_size_dl * 100. / file_size) | ||
status = status + chr(8)*(len(status)+1) | ||
print status, | ||
f.close() | ||
|
||
def build_html(username): | ||
global phtoos | ||
imagelist = [] | ||
for p in photos: | ||
imagelist.append('<li id="i%(id)s"><h2>%(title)s</h2><img src="%(filename)s" /></li>' % p) | ||
h = """<!doctype html> | ||
<html> | ||
<head> | ||
<meta charset="utf-8" /> | ||
<title>picplz backup for %(username)s</title> | ||
</head> | ||
<body> | ||
<h1>picplz backup for %(username)s</h1> | ||
<ol> | ||
%(images)s | ||
</ol> | ||
</body> | ||
</html>""" % {"username": username, "images": "\n\t\t\t".join(imagelist)} | ||
username = re.sub('[-\s]+', '-', re.sub('[^\w\s-]', '', username.split("@")[0]).strip().lower()) | ||
filename = "picplz_%s_backup.html" % username | ||
f = open(filename, 'w') | ||
f.write(h) | ||
f.close() | ||
print "HTML built:", filename | ||
|
||
if __name__ == "__main__": | ||
if len(sys.argv) < 2: | ||
print "Usage: python picplzexport.py [username]" | ||
else: | ||
username = sys.argv[1] | ||
password = getpass.getpass("Password: ") | ||
get_photos(username, password) | ||
build_html(username) | ||
|