-
Notifications
You must be signed in to change notification settings - Fork 1
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Function name fix, using uuid1mc, time format fix, TaskRegistry fix #60
Changes from 41 commits
152c153
039cdbd
2f5df3b
6b8e07f
221dcde
ede59b9
dfdf09a
df77ffc
2d7ff92
95dce28
5cead4e
d6766a7
b5906f9
7a5bcb9
28e656f
83128dd
144d4ac
dc961d6
907c7fe
a1372fb
127aa4c
e3228e4
3ca82e9
b581138
395b862
85b505c
9d17938
e42e750
fd8737c
0cc523e
64da948
eeb5b96
81ccc8d
7a71a7a
f2b4b26
154b23c
e7f46fa
5ceb68c
bb6a350
78bd8d3
993287c
481701a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,6 +3,7 @@ | |
from datetime import datetime, timedelta | ||
from glob import glob | ||
import os.path | ||
import os | ||
import re | ||
from subprocess import check_call, CalledProcessError | ||
import time | ||
|
@@ -14,9 +15,9 @@ | |
from imgee import app | ||
from imgee.models import db, Thumbnail, StoredFile | ||
from imgee.utils import ( | ||
newid, guess_extension, get_file_type, | ||
newid, guess_extension, get_file_type, is_animated_gif, | ||
path_for, get_s3_folder, get_s3_bucket, | ||
download_frm_s3, get_width_height, ALLOWED_MIMETYPES, | ||
download_from_s3, get_width_height, ALLOWED_MIMETYPES, | ||
exists_in_s3, THUMBNAIL_COMMANDS | ||
) | ||
|
||
|
@@ -29,17 +30,25 @@ def get_resized_image(img, size, is_thumbnail=False): | |
""" | ||
registry = imgee.registry | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a circular import. Can you possibly define the registry before other modules are imported, to avoid the circular import? One way is to move the registry's There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So, an There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No, an init_app method on the class. Not in the module. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also, the convention in Flask is that if you want to add something to the app, add it under |
||
img_name = img.name | ||
|
||
if img.mimetype == 'image/gif': | ||
# if the gif file is animated, not resizing it for now | ||
# but we will need to resize the gif, keeping animation intact | ||
# https://github.com/hasgeek/imgee/issues/55 | ||
src_path = download_from_s3(img.filename) | ||
if is_animated_gif(src_path): | ||
return img.name | ||
|
||
size_t = parse_size(size) | ||
if (size_t and size_t[0] != img.width and size_t[1] != img.height) or ('thumb_extn' in ALLOWED_MIMETYPES[img.mimetype] and ALLOWED_MIMETYPES[img.mimetype]['thumb_extn'] != img.extn): | ||
w_or_h = or_(Thumbnail.width == size_t[0], Thumbnail.height == size_t[1]) | ||
scaled = Thumbnail.query.filter(w_or_h, Thumbnail.stored_file == img).first() | ||
if scaled and exists_in_s3(scaled): | ||
img_name = scaled.name | ||
else: | ||
size = get_fitting_size((img.width, img.height), size_t) | ||
original_size = (img.width, img.height) | ||
size = get_fitting_size(original_size, size_t) | ||
resized_filename = get_resized_filename(img, size) | ||
registry = imgee.registry | ||
|
||
try: | ||
if resized_filename in registry: | ||
# file is still being processed | ||
|
@@ -48,12 +57,10 @@ def get_resized_image(img, size, is_thumbnail=False): | |
else: | ||
registry.add(resized_filename) | ||
img_name = resize_and_save(img, size, is_thumbnail=is_thumbnail) | ||
except Exception as e: | ||
# something broke while processing the file | ||
raise e | ||
finally: | ||
# file has been processed, remove from registry | ||
registry.remove(resized_filename) | ||
if resized_filename in registry: | ||
registry.remove(resized_filename) | ||
return img_name | ||
|
||
|
||
|
@@ -74,7 +81,7 @@ def save_file(fp, profile, title=None): | |
|
||
stored_file = save_img_in_db(name=id_, title=title, local_path=local_path, | ||
profile=profile, mimetype=content_type, orig_extn=extn) | ||
s3resp = save_on_s3(img_name, content_type=content_type) | ||
save_on_s3(img_name, content_type=content_type) | ||
return title, stored_file | ||
|
||
|
||
|
@@ -121,7 +128,8 @@ def save_on_s3(filename, remotename='', content_type='', bucket='', folder=''): | |
headers = { | ||
'Cache-Control': 'max-age=31536000', # 60*60*24*365 | ||
'Content-Type': get_file_type(fp, filename), | ||
'Expires': datetime.now() + timedelta(days=365) | ||
# once cached, it is set to expire after a year | ||
'Expires': datetime.utcnow() + timedelta(days=365) | ||
} | ||
k.set_contents_from_file(fp, policy='public-read', headers=headers) | ||
return filename | ||
|
@@ -146,7 +154,7 @@ def parse_size(size): | |
return tuple(map(int, size)) | ||
|
||
|
||
def get_fitting_size((orig_w, orig_h), size): | ||
def get_fitting_size(original_size, size): | ||
""" | ||
Return the size to fit the image to the box | ||
along the smaller side and preserve aspect ratio. | ||
|
@@ -171,6 +179,8 @@ def get_fitting_size((orig_w, orig_h), size): | |
>>> get_fitting_size((200, 500), (400, 600)) | ||
[240, 600] | ||
""" | ||
orig_w, orig_h = original_size | ||
|
||
if orig_w == 0 or orig_h == 0: | ||
# this is either a cdr file or a zero width file | ||
# just go with target size | ||
|
@@ -216,7 +226,7 @@ def resize_and_save(img, size, is_thumbnail=False): | |
Get the original image from local disk cache, download it from S3 if it misses. | ||
Resize the image and save resized image on S3 and size details in db. | ||
""" | ||
src_path = download_frm_s3(img.name + img.extn) | ||
src_path = download_from_s3(img.filename) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just noticed this is for download. Since There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We are storing the the filename as stored in S3. I'm not sure why this function exists https://github.com/hasgeek/imgee/blob/master/imgee/utils.py#L88 if the file name is There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh, I get it. We're not storing the exact name in s3 in db. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Filed as #69. |
||
|
||
if 'thumb_extn' in ALLOWED_MIMETYPES[img.mimetype]: | ||
format = ALLOWED_MIMETYPES[img.mimetype]['thumb_extn'] | ||
|
@@ -258,9 +268,10 @@ def resize_img(src, dest, size, mimetype, format, is_thumbnail): | |
|
||
def clean_local_cache(expiry=24): | ||
""" | ||
Remove files from local cache which are NOT accessed in the last `expiry` hours. | ||
Remove files from local cache | ||
which are NOT accessed in the last `expiry` hours. | ||
""" | ||
cache_path = app.config.get('UPLOADED_FILES_DEST') | ||
cache_path = os.path.join(app.static_folder, app.config.get('UPLOADED_FILES_DIR')) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'll check this out and make changes when I find it. |
||
cache_path = os.path.join(cache_path, '*') | ||
min_atime = time.time() - expiry*60*60 | ||
|
||
|
@@ -275,11 +286,16 @@ def clean_local_cache(expiry=24): | |
def delete(stored_file, commit=True): | ||
""" | ||
Delete all the thumbnails and images associated with a file, from local cache and S3. | ||
Wait for the upload/resize to complete if queued for the same image. | ||
""" | ||
registry = imgee.registry | ||
# remove all the keys related to the given file name | ||
# this is delete all keys matching `imgee:registry:<name>*` | ||
registry.remove_keys_starting_with(stored_file.name) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, UUID. It removes all the keys matching There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Shouldn't it be There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. oh, I'm sorry. it removes all keys matching File names are in
The There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Instead of deleting There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Cool. I'll do that. |
||
|
||
# remove locally | ||
cache_path = app.config.get('UPLOADED_FILES_DEST') | ||
cached_img_path = os.path.join(cache_path, '%s*' % stored_file.name) | ||
cache_path = os.path.join(app.static_folder, app.config.get('UPLOADED_FILES_DIR')) | ||
os.remove(os.path.join(cache_path, '%s' % stored_file.filename)) | ||
cached_img_path = os.path.join(cache_path, '%s_*' % stored_file.name) | ||
for f in glob(cached_img_path): | ||
os.remove(f) | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,35 +1,85 @@ | ||
import re | ||
import redis | ||
from imgee import app | ||
|
||
|
||
class InvalidRedisQueryException(Exception): | ||
pass | ||
|
||
|
||
class TaskRegistry(object): | ||
def __init__(self, name='default', connection=None): | ||
self.connection = redis.from_url(connection) if connection else None | ||
def __init__(self, name='default', connection=None, app=None): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Usually |
||
self.name = name | ||
self.key = 'imgee:registry:%s' % name | ||
self.connection = connection | ||
self.key_prefix = 'imgee:registry:%s' % name | ||
self.filename_pattern = '^[a-z0-9\_\.]+$' | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Did you mean to use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. making it a raw string and not escaping |
||
|
||
def set_connection(self, connection=None): | ||
connection = connection or app.config.get('REDIS_URL') | ||
self.connection = redis.from_url(connection) | ||
if app: | ||
self.init_app(app) | ||
else: | ||
self.app = None | ||
|
||
def add(self, taskid): | ||
self.connection.sadd(self.key, taskid) | ||
def init_app(self, app): | ||
self.app = app | ||
|
||
def remove(self, taskid): | ||
self.connection.srem(self.key, taskid) | ||
if not self.connection: | ||
url = self.app.config.get('REDIS_URL') | ||
self.connection = redis.from_url(url) | ||
self.pipe = self.connection.pipeline() | ||
|
||
def remove_all(self): | ||
for k in self.get_all_keys(): | ||
self.remove(k) | ||
def is_valid_query(self, query): | ||
return bool(re.match(self.filename_pattern, query)) | ||
|
||
def key_for(self, taskid): | ||
return u'{key_prefix}:{taskid}'.format(key_prefix=self.key_prefix, taskid=taskid) | ||
|
||
def __contains__(self, taskid): | ||
return self.connection.sismember(self.key, taskid) | ||
return len(self.connection.keys(self.key_for(taskid))) > 0 | ||
|
||
def keys_starting_with(self, query): | ||
return filter(lambda k: k.startswith(query), self.connection.smembers(self.key)) | ||
def add(self, taskid, expire=60): | ||
self.connection.set(self.key_for(taskid), taskid) | ||
# setting TTL of 60 seconds for the key | ||
# if the file doesn't get processed within 60 seconds, | ||
# it'll be removed from the registry | ||
self.connection.expire(self.key_for(taskid), expire) | ||
|
||
def search(self, query): | ||
return filter(lambda k: str(query) in k, self.connection.smembers(self.key)) | ||
# >> KEYS imgee:registry:default:*query* | ||
if not self.is_valid_query(query): | ||
raise InvalidRedisQueryException(u'Invalid query for searching redis keys: {}'.format(query)) | ||
return self.connection.keys(self.key_for('*{}*'.format(query))) | ||
|
||
def get_all_keys(self): | ||
return list(self.connection.smembers(self.key)) | ||
# >> KEYS imgee:registry:default:* | ||
return self.connection.keys(self.key_for('*')) | ||
|
||
def keys_starting_with(self, query): | ||
# >> KEYS imgee:registry:default:query_* | ||
# chances are that we'll use this function to find the | ||
# thumbnail keys, which look like "name_wNN_hNN", hence the _ | ||
if not self.is_valid_query(query): | ||
raise InvalidRedisQueryException(u'Invalid query for searching redis keys, starting with: {}'.format(query)) | ||
return self.connection.keys(self.key_for('{}_*'.format(query))) | ||
|
||
def remove(self, taskid): | ||
# remove a key with the taskid | ||
self.remove_by_key(self.key_for(taskid)) | ||
|
||
def remove_by_key(self, key): | ||
# remove a redis key directly | ||
self.connection.delete(key) | ||
|
||
def remove_keys(self, keys): | ||
for k in keys: | ||
self.pipe.delete(k) | ||
self.pipe.execute() | ||
|
||
def remove_keys_matching(self, query): | ||
self.remove_keys(self.search(query)) | ||
|
||
def remove_keys_starting_with(self, query): | ||
# query will most likely be stored_file.name, So | ||
# Delete keys for only `<query>` and `<query>_*` | ||
self.remove_keys([self.key_for(query)] + self.keys_starting_with(query)) | ||
|
||
def remove_all(self): | ||
self.remove_keys(self.get_all_keys()) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just a comment on the order of sections, to make this file more readable. It should be in
install
,before_script
,script
order.