Skip to content

Commit

Permalink
testing new path
Browse files Browse the repository at this point in the history
  • Loading branch information
femalves committed Jul 3, 2024
1 parent b34e809 commit e120c64
Show file tree
Hide file tree
Showing 3 changed files with 80 additions and 36 deletions.
17 changes: 11 additions & 6 deletions scan_explorer_service/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,20 +151,25 @@ def __init__(self, **kwargs):
@property
def image_url(self):
image_api_url = url_for_proxy('proxy.image_proxy', path=self.image_path)
current_app.logger.debug(f'Image url {image_api_url}')
return image_api_url

@property
def image_path(self):
separator = current_app.config.get('IMAGE_API_SLASH_SUB', '%2F')
# separator = current_app.config.get('IMAGE_API_SLASH_SUB', '%2F')
separator = "/" # 'bitmaps-~type-~journal-~volume-~600' -> bitmaps/type/journal/volume/600'
image_path = f'bitmaps{separator}{self.collection.type}{separator}{self.collection.journal}{separator}{self.collection.volume}{separator}600'
image_path = image_path.replace('.', '_')
image_path += f'{separator}{self.name}'
if self.color_type != PageColor.BW:
image_path += '.tif'
return image_path
image_path = image_path.replace('.', '_') # What exactly does this do? ApJ.. to ApJ__ maybe?
image_path += f'{separator}{self.name}' # 'bitmaps-~type-~journal-~volume-~600-~page'
current_app.logger.debug(f'Image path {image_path}')
# if self.color_type != PageColor.BW:
# image_path += '.tif'
return image_path # 'bitmaps-~type-~journal-~volume-~600-~page.tif' -> 'bitmaps/type/journal/volume/600/page'


@property
def thumbnail_url(self):
current_app.logger.debug(f'Thumbnail url {self.image_url}')
return f'{self.image_url}/square/480,480/0/{self.image_color_quality}.jpg'

@property
Expand Down
5 changes: 3 additions & 2 deletions scan_explorer_service/utils/s3_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,15 @@ class S3Provider:
Class for interacting with a particular S3 provider
"""

def __init__(self, config):
def __init__(self, config, bucket_name):
"""
input:
config:
"""

self.s3 = boto3.resource("s3")
self.bucket = self.s3.Bucket(config.get("AWS_BUCKET_NAME"))
self.bucket = self.s3.Bucket(config.get(bucket_name))


def write_object_s3(self, file_bytes, object_name):
Expand All @@ -37,6 +37,7 @@ def read_object_s3(self, object_name):
s3_file = s3_obj.read()
return s3_file
except (ClientError, ParamValidationError) as e:
logging.exception(e)
raise e


Expand Down
94 changes: 66 additions & 28 deletions scan_explorer_service/views/image_proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,11 +79,12 @@ def get_pages(item, session, page_start, page_end, page_limit):
query = session.query(Page).filter(Page.collection_id == item.id,
Page.volume_running_page_num >= page_start,
Page.volume_running_page_num <= page_end).order_by(Page.volume_running_page_num).limit(page_limit)
current_app.logger.info(f"Got pages {page_start}-{page_end}: {query}")
return query


@stream_with_context
def fetch_images(session, item, page_start, page_end, page_limit, scaling, dpi, memory_limit):
def fetch_images(session, item, page_start, page_end, page_limit, memory_limit):
n_pages = 0
memory_sum = 0
query = get_pages(item, session, page_start, page_end, page_limit)
Expand All @@ -92,38 +93,71 @@ def fetch_images(session, item, page_start, page_end, page_limit, scaling, dpi,

n_pages += 1

current_app.logger.info(f"Generating image for page: {n_pages}")
current_app.logger.info(f"Getting image for page: {n_pages}")
current_app.logger.info(f'Id: {page.id}, Volume_page: {page.volume_running_page_num}, memory: {memory_sum}')
if n_pages > page_limit:
break
if memory_sum > memory_limit:
current_app.logger.error(f"Memory limit reached: {memory_sum} > {memory_limit}")
break
size = 'full'
if dpi != 600:
size = str(int(page.width*scaling))+ ","
image_url = page.image_url + "/full/" + size + f"/0/{page.image_color_quality}.tif"
current_app.logger.info(f"image url: {page.image_url}")
path = urlparse.urlparse(image_url).path
current_app.logger.info(f"path: {path}")
remove = urlparse.urlparse(url_for_proxy('proxy.image_proxy', path='')).path
current_app.logger.info(f"remove: {remove}")
path = path.replace(remove, '')
current_app.logger.info(f"modified path: {path}")
im_data = image_proxy(path).get_data()
current_app.logger.info(f"Getting image data...: {im_data}")


object_name = page.image_path
current_app.logger.info(f"Image path: {object_name}")
im_data = fetch_image(object_name)
current_app.logger.info(f"File content: {im_data}")
yield im_data


# @stream_with_context
# def fetch_images(session, item, page_start, page_end, page_limit, scaling, dpi, memory_limit):
# n_pages = 0
# memory_sum = 0
# query = get_pages(item, session, page_start, page_end, page_limit)

# for page in query.all():

# n_pages += 1

# current_app.logger.info(f"Generating image for page: {n_pages}")
# current_app.logger.info(f'Id: {page.id}, Volume_page: {page.volume_running_page_num}, memory: {memory_sum}')
# if n_pages > page_limit:
# break
# if memory_sum > memory_limit:
# current_app.logger.error(f"Memory limit reached: {memory_sum} > {memory_limit}")
# break
# size = 'full'
# if dpi != 600:
# size = str(int(page.width*scaling))+ ","
# image_url = page.image_url + "/full/" + size + f"/0/{page.image_color_quality}.tif"
# current_app.logger.info(f"image url: {page.image_url}") # https://dev.adsabs.harvard.edu:443/v1/scan/image/iiif/2/bitmaps-~seri-~ApJ__-~0333-~600-~0000001%2C014
# path = urlparse.urlparse(image_url).path
# current_app.logger.info(f"path: {path}") # /v1/scan/image/iiif/2/bitmaps-~seri-~ApJ__-~0333-~600-~0000001%2C014/full/full/0/bitonal.tif
# remove = urlparse.urlparse(url_for_proxy('proxy.image_proxy', path='')).path
# current_app.logger.info(f"remove: {remove}") # /v1/scan/image/iiif/2/
# path = path.replace(remove, '')
# current_app.logger.info(f"modified path: {path}") # bitmaps-~seri-~ApJ__-~0333-~600-~0000001%2C014/full/full/0/bitonal.tif
# im_data = image_proxy(path).get_data()
# current_app.logger.info(f"Getting image data...: {im_data}")
# yield im_data

def fetch_pdf(object_name):

full_path = f'pdfs/{object_name}'
file_content = S3Provider(current_app.config).read_object_s3(full_path)
current_app.logger.info(f"Successfully fetched PDF from S3 bucket: {object_name}")
file_content = S3Provider(current_app.config, 'AWS_BUCKET_NAME').read_object_s3(full_path)
current_app.logger.info(f"Successfully fetched file from S3 bucket: {object_name}")
response = Response(file_content, mimetype='application/pdf')
response.headers['Content-Disposition'] = f'attachment; filename="{object_name}"'

return response


def fetch_image(object_name):
file_content = S3Provider(current_app.config, 'AWS_BUCKET_NAME').read_object_s3(object_name)
current_app.logger.info(f"Successfully fetched image from S3 bucket: {object_name}")
return file_content


@advertise(scopes=['api'], rate_limit=[5000, 3600*24])
@bp_proxy.route('/pdf', methods=['GET'])
def pdf_save():
Expand All @@ -146,17 +180,21 @@ def pdf_save():
item = get_item(session, id)
current_app.logger.info(f"Item retrieved successfully: {item.id}")

if isinstance(item, Article):
current_app.logger.info(f"Item is an article: {item.id}")
object_name = f'{item.id}.pdf'.lower()
try:
response = fetch_pdf(object_name)
except Exception as e:
current_app.logger.info(f"Failed to get PDF using fallback method for {object_name}: {str(e)}")
response = Response(img2pdf.convert([im for im in fetch_images(session, item, page_start, page_end, page_limit, scaling, dpi, memory_limit)]), mimetype='application/pdf')
else:
current_app.logger.info(f"Attempting to fetch PDF using cantaloupe: {item.id}")
response = Response(img2pdf.convert([im for im in fetch_images(session, item, page_start, page_end, page_limit, scaling, dpi, memory_limit)]), mimetype='application/pdf')
# returns a list of images
response = Response(img2pdf.convert([im for im in fetch_images(session, item, page_start, page_end, page_limit, memory_limit)]), mimetype='application/pdf')

current_app.logger.info(response)
# if isinstance(item, Article):
# current_app.logger.info(f"Item is an article: {item.id}")
# object_name = f'{item.id}.pdf'.lower()
# try:
# response = fetch_pdf(object_name)
# except Exception as e:
# current_app.logger.info(f"Failed to get PDF using fallback method for {object_name}: {str(e)}")
# response = Response(img2pdf.convert([im for im in fetch_images(session, item, page_start, page_end, page_limit, scaling, dpi, memory_limit)]), mimetype='application/pdf')
# else:
# current_app.logger.info(f"Attempting to fetch PDF using cantaloupe: {item.id}")
# response = Response(img2pdf.convert([im for im in fetch_images(session, item, page_start, page_end, page_limit, scaling, dpi, memory_limit)]), mimetype='application/pdf')


return response
Expand Down

0 comments on commit e120c64

Please sign in to comment.