Skip to content

Commit

Permalink
SFR-2349: Increasing CSV Field Size Limit (#456)
Browse files Browse the repository at this point in the history
  • Loading branch information
kylevillegas93 authored Nov 22, 2024
1 parent 0ac0446 commit 4e5744c
Showing 1 changed file with 7 additions and 1 deletion.
8 changes: 7 additions & 1 deletion processes/ingest/hathi_trust.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

class HathiTrustProcess(CoreProcess):
HATHI_RIGHTS_SKIPS = ['ic', 'icus', 'ic-world', 'und']
FIELD_SIZE_LIMIT = 131072 * 2 # 131072 is the default size limit

def __init__(self, *args):
super(HathiTrustProcess, self).__init__(*args[:4], batchSize=1000)
Expand Down Expand Up @@ -106,6 +107,8 @@ def importFromHathiFile(self, hathi_url, start_date_time=None):
self.readHathiFile(hathi_tsv, start_date_time)

def readHathiFile(self, hathi_tsv, start_date_time=None):
csv.field_size_limit(self.FIELD_SIZE_LIMIT)

for number_of_books_ingested, book in enumerate(hathi_tsv):
if self.ingest_limit and number_of_books_ingested > self.ingest_limit:
break
Expand All @@ -114,7 +117,10 @@ def readHathiFile(self, hathi_tsv, start_date_time=None):
book_date_updated = (len(book) > 14 and book[14]) or None

if book_date_updated:
hathi_date_modified = datetime.strptime(book_date_updated, '%Y-%m-%d %H:%M:%S').replace(tzinfo=None)
try:
hathi_date_modified = datetime.strptime(book_date_updated, '%Y-%m-%d %H:%M:%S').replace(tzinfo=None)
except Exception:
hathi_date_modified = None

if book_right and book_right not in self.HATHI_RIGHTS_SKIPS:
if not start_date_time or hathi_date_modified >= start_date_time:
Expand Down

0 comments on commit 4e5744c

Please sign in to comment.