Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow non ASCII in JSON dump with env var #3257

Open
wants to merge 7 commits into
base: develop
Choose a base branch
from
Open
5 changes: 5 additions & 0 deletions src/zenml/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,11 @@ def handle_int_env_var(var: str, default: int = 0) -> int:
"ZENML_PIPELINE_API_TOKEN_EXPIRATION"
)

# Materializers environment variables
ENV_ZENML_MATERIALIAZERS_ALLOW_NON_ASCII_JSON_DUMPS = (
"ZENML_MATERIALIAZERS_ALLOW_NON_ASCII_JSON_DUMPS"
)

# ZenML Server environment variables
ENV_ZENML_SERVER_PREFIX = "ZENML_SERVER_"
ENV_ZENML_SERVER_DEPLOYMENT_TYPE = f"{ENV_ZENML_SERVER_PREFIX}DEPLOYMENT_TYPE"
Expand Down
18 changes: 15 additions & 3 deletions src/zenml/materializers/built_in_materializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@
)

from zenml.artifact_stores.base_artifact_store import BaseArtifactStore
from zenml.constants import (
ENV_ZENML_MATERIALIAZERS_ALLOW_NON_ASCII_JSON_DUMPS,
handle_bool_env_var,
)
from zenml.enums import ArtifactType, VisualizationType
from zenml.logger import get_logger
from zenml.materializers.base_materializer import BaseMaterializer
Expand All @@ -48,7 +52,9 @@
str,
type(None),
) # complex/bytes are not JSON serializable

ZENML_MATERIALIAZERS_ALLOW_NON_ASCII_JSON_DUMPS = handle_bool_env_var(
ENV_ZENML_MATERIALIAZERS_ALLOW_NON_ASCII_JSON_DUMPS, False
)

class BuiltInMaterializer(BaseMaterializer):
"""Handle JSON-serializable basic types (`bool`, `float`, `int`, `str`)."""
Expand Down Expand Up @@ -94,7 +100,10 @@ def save(self, data: Union[bool, float, int, str]) -> None:
Args:
data: The data to store.
"""
yaml_utils.write_json(self.data_path, data)
yaml_utils.write_json(
self.data_path, data,
ensure_ascii=not ZENML_MATERIALIAZERS_ALLOW_NON_ASCII_JSON_DUMPS
)

def extract_metadata(
self, data: Union[bool, float, int, str]
Expand Down Expand Up @@ -371,7 +380,10 @@ def save(self, data: Any) -> None:

# If the data is serializable, just write it into a single JSON file.
if _is_serializable(data):
yaml_utils.write_json(self.data_path, data)
yaml_utils.write_json(
self.data_path, data,
ensure_ascii=not ZENML_MATERIALIAZERS_ALLOW_NON_ASCII_JSON_DUMPS
)
return

# non-serializable dict: Handle as non-serializable list of lists.
Expand Down
2 changes: 2 additions & 0 deletions src/zenml/utils/yaml_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ def write_json(
file_path: str,
contents: Any,
encoder: Optional[Type[json.JSONEncoder]] = None,
**json_dump_args
) -> None:
"""Write contents as JSON format to file_path.

Expand All @@ -143,6 +144,7 @@ def write_json(
json.dumps(
contents,
cls=encoder,
**json_dump_args
),
)

Expand Down