Skip to content

Commit

Permalink
Clean up logging statements
Browse files Browse the repository at this point in the history
  • Loading branch information
e-belfer committed Jan 8, 2024
1 parent 69d40d2 commit b472667
Show file tree
Hide file tree
Showing 3 changed files with 2 additions and 13 deletions.
6 changes: 1 addition & 5 deletions src/pudl/extract/epacems.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,20 +173,16 @@ def __init__(self, datastore: Datastore):

def get_data_frame(self, partition: EpaCemsPartition) -> pd.DataFrame:
"""Constructs dataframe from a zipfile for a given (year_quarter) partition."""
logger.info(f"Getting dataframe for {partition}")
archive = self.datastore.get_zipfile_resource(
"epacems", **partition.get_filters()
)
logger.info(f"Got zipfile for partition {partition}")
with archive.open(str(partition.get_quarterly_file()), "r") as csv_file:
logger.info(f"Opened zipfile for partition {partition}")
df = self._csv_to_dataframe(
csv_file,
ignore_cols=API_IGNORE_COLS,
rename_dict=API_RENAME_DICT,
dtype_dict=API_DTYPE_DICT,
)
logger.info(f"Returning DF for {partition}.")
return df

def _csv_to_dataframe(
Expand Down Expand Up @@ -233,7 +229,7 @@ def extract(year_quarter: str, ds: Datastore) -> pd.DataFrame:
year = partition.year
# We have to assign the reporting year for partitioning purposes
try:
logger.info(f"Processing data frame for {partition}")
logger.info(f"Extracting data frame for {year_quarter}")
df = ds.get_data_frame(partition).assign(year=year)
# If the requested quarter is not found, return an empty df with expected columns:
except KeyError:
Expand Down
6 changes: 1 addition & 5 deletions src/pudl/workspace/datastore.py
Original file line number Diff line number Diff line change
Expand Up @@ -376,13 +376,11 @@ def get_resources(
logger.info(f"{res} is already optimally cached.")
continue
if self._cache.contains(res):
logger.info(f"Retrieved {res} from cache.")
contents = self._cache.get(res)
logger.info(f"Got resource {res} from cache.")
logger.info(f"Retrieved {res} from cache.")
if not self._cache.is_optimally_cached(res):
logger.info(f"{res} was not optimally cached yet, adding.")
self._cache.add(res, contents)
logger.info("Yielding resource {res} from cache")
yield (res, contents)
elif not cached_only:
logger.info(f"Retrieved {res} from zenodo.")
Expand All @@ -396,7 +394,6 @@ def remove_from_cache(self, res: PudlResourceKey) -> None:

def get_unique_resource(self, dataset: str, **filters: Any) -> bytes:
"""Returns content of a resource assuming there is exactly one that matches."""
logger.info("Getting unique resource.")
res = self.get_resources(dataset, **filters)
try:
_, content = next(res)
Expand All @@ -410,7 +407,6 @@ def get_unique_resource(self, dataset: str, **filters: Any) -> bytes:

def get_zipfile_resource(self, dataset: str, **filters: Any) -> zipfile.ZipFile:
"""Retrieves unique resource and opens it as a ZipFile."""
logger.info("Getting zipfile resource.")
return zipfile.ZipFile(io.BytesIO(self.get_unique_resource(dataset, **filters)))

def get_zipfile_resources(
Expand Down
3 changes: 0 additions & 3 deletions src/pudl/workspace/resource_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,11 +203,8 @@ def num_layers(self):

def get(self, resource: PudlResourceKey) -> bytes:
"""Returns content of a given resource."""
logger.info(f"Getting resource {resource}")
for i, cache in enumerate(self._caches):
logger.info(f"Getting {i}, {cache}")
if cache.contains(resource):
logger.info(f"Cache contains {resource}. Getting cache.")
logger.debug(
f"get:{resource} found in {i}-th layer ({cache.__class__.__name__})."
)
Expand Down

0 comments on commit b472667

Please sign in to comment.