Skip to content

Commit

Permalink
Redact storage options, as this may include user keys. (#322)
Browse files Browse the repository at this point in the history
* Redact storage options, as this may include user keys.

* Add motivational comment.
  • Loading branch information
delucchi-cmu authored May 29, 2024
1 parent e11308f commit daad9df
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 1 deletion.
7 changes: 7 additions & 0 deletions src/hipscat_import/catalog/file_readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,16 @@ def read(self, input_file, read_columns=None):
def provenance_info(self) -> dict:
"""Create dictionary of parameters for provenance tracking.
If any `storage_options` have been provided as kwargs, we will replace the
value with ``REDACTED`` for the purpose of writing to provenance info, as it
may contain user names or API keys.
Returns:
dictionary with all argument_name -> argument_value as key -> value pairs.
"""
all_args = vars(self)
if "kwargs" in all_args and "storage_options" in all_args["kwargs"]:
all_args["kwargs"]["storage_options"] = "REDACTED"
return {"input_reader_type": type(self).__name__, **vars(self)}

def regular_file_exists(self, input_file, storage_options: Union[Dict[Any, Any], None] = None, **_kwargs):
Expand Down
10 changes: 9 additions & 1 deletion tests/hipscat_import/catalog/test_file_readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,12 +220,20 @@ def test_csv_reader_provenance_info(tmp_path, basic_catalog_info):
"empty": "Int64",
"numeric": int,
},
storage_options={"user_name": "user_pii", "user_key": "SECRETS!"},
)
provenance_info = reader.provenance_info()
catalog_base_dir = os.path.join(tmp_path, "test_catalog")
catalog_base_dir = tmp_path / "test_catalog"
os.makedirs(catalog_base_dir)
io.write_provenance_info(catalog_base_dir, basic_catalog_info, provenance_info)

with open(catalog_base_dir / "provenance_info.json", "r", encoding="utf-8") as file:
data = file.read()
assert "test_catalog" in data
assert "REDACTED" in data
assert "user_pii" not in data
assert "SECRETS" not in data


def test_parquet_reader(parquet_shards_shard_44_0):
"""Verify we can read the parquet file into a single data frame."""
Expand Down

0 comments on commit daad9df

Please sign in to comment.