Skip to content

Commit 13f82bf

Browse files
refactor: enhance logging in LinguisticProcessing to include total document count and newspaper name
1 parent 584af9d commit 13f82bf

File tree

1 file changed

+6
-2
lines changed

1 file changed

+6
-2
lines changed

lib/spacy_linguistic_processing.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -350,6 +350,8 @@ def run(self) -> None:
350350
collection: str = os.path.basename(infile).split("-")[0]
351351
year: str = infile.split("-")[-1][:4]
352352

353+
total_doc_count = len(self.lang_ident_data)
354+
newspaper = outfile.split("/")[-1].split(".")[0]
353355
start_time = time.time()
354356
processed_doc_count = 1
355357
log.info("Processing %s %s %s", infile, collection, year)
@@ -371,10 +373,12 @@ def run(self) -> None:
371373
end_time = time.time()
372374

373375
log.info(
374-
"Processed %d documents with content (total with"
375-
" unprocessable: %s) in %s secs/1k doc",
376+
"Processed %d content items with content (total with"
377+
" unprocessable: %d/%d in %s) in %d secs/1k content items",
376378
processed_doc_count,
377379
i,
380+
total_doc_count,
381+
newspaper,
378382
round((end_time - start_time), 1),
379383
)
380384
start_time = end_time

0 commit comments

Comments
 (0)