Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix extension (close #40) #41

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 8 additions & 5 deletions redditdownload/reddit.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,17 @@ def getitems(subreddit, multireddit=False, previd='', reddit_sort=None):
:param reddit_sort: type of sorting post
:returns: list -- list of post url
"""
# assume no advanced sorting.
is_advanced_sort = False
url_base = 'https://www.reddit.com'

if multireddit:
if '/m/' not in subreddit:
warning = ('That doesn\'t look like a multireddit. Are you sure'
'you need that multireddit flag?')
print warning
sys.exit(1)
url = 'http://www.reddit.com/user/%s.json' % subreddit
url = '{}/user/%s.json'.format(url_base, subreddit)
if not multireddit:
if '/m/' in subreddit:
warning = ('It looks like you are trying to fetch a multireddit. \n'
Expand All @@ -32,15 +35,15 @@ def getitems(subreddit, multireddit=False, previd='', reddit_sort=None):
sys.exit(1)
# no sorting needed
if reddit_sort is None:
url = 'http://www.reddit.com/r/{}.json'.format(subreddit)
url = '{}/r/{}.json'.format(url_base, subreddit)
# if sort is top or controversial, may include advanced sort (ie week, all etc)
elif 'top' in reddit_sort:
url = 'http://www.reddit.com/r/{}/{}.json'.format(subreddit, 'top')
url = '{}/r/{}/{}.json'.format(url_base, subreddit, 'top')
elif 'controversial' in reddit_sort:
url = 'http://www.reddit.com/r/{}/{}.json'.format(subreddit, 'controversial')
url = '{}/r/{}/{}.json'.format(url_base, subreddit, 'controversial')
# use default
else:
url = 'http://www.reddit.com/r/{}/{}.json'.format(subreddit, reddit_sort)
url = '{}/r/{}/{}.json'.format(url_base, subreddit, reddit_sort)

# Get items after item with 'id' of previd.

Expand Down
27 changes: 27 additions & 0 deletions redditdownload/redditdownload.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import StringIO
import sys
import logging
import imghdr
from urllib2 import urlopen, HTTPError, URLError
from httplib import InvalidURL
from argparse import ArgumentParser
Expand Down Expand Up @@ -160,6 +161,11 @@ def download_from_url(url, dest_file):
# Don't download files multiple times!
if pathexists(dest_file):
raise FileExistsException('URL [%s] already downloaded.' % url)
elif ('.jpg' in dest_file or '.jpeg' in dest_file) and 'imgur.com' in url:
dest_file_ext = '.jpg' if '.jpg' in dest_file else '.jpeg'
if pathexists(dest_file.replace(dest_file_ext, '.png')):
error_txt = 'URL [{}] may already downloaded with [.png] extensions.'
raise FileExistsException(error_txt.format(url))

response = request(url)
info = response.info()
Expand Down Expand Up @@ -190,6 +196,25 @@ def download_from_url(url, dest_file):
filehandle.close()


def fix_image_ext(filename):
"""fix image extension using python imghdr."""
logger = logging.getLogger(__name__)
new_filename = None
basename, file_ext = pathsplitext(filename)
ihdr_ext = imghdr.what(filename)
if '.{}'.format(ihdr_ext) != file_ext and ihdr_ext is not None:
if ihdr_ext == 'jpeg' and file_ext in ['.jpeg', '.jpg']:
# don't do anything for jpg/jpeg file
pass
else:
new_filename = '{}.{}'.format(basename, ihdr_ext)
if new_filename is not None:
if pathexists(new_filename):
logger.debug('Can\'t fix file Extension, file already exist.')
else:
logger.info('Fix extension from [{}] to [{}]'.format(file_ext, ihdr_ext))
os.rename(filename, new_filename)

def process_imgur_url(url):
"""
Given an imgur URL, determine if it's a direct link to an image or an
Expand Down Expand Up @@ -507,6 +532,8 @@ def main():
print ' Sucessfully downloaded URL [%s] as [%s].' % (URL, FILENAME)
DOWNLOADED += 1
FILECOUNT += 1
if 'imgur.com' in URL:
fix_image_ext(FILEPATH)

except Exception,e:
print ' %s' % str(e)
Expand Down
Empty file added requirements.txt
Empty file.