diff --git a/docs/local_filesystem_setup.rst b/docs/local_filesystem_setup.rst new file mode 100644 index 00000000..8a26fb69 --- /dev/null +++ b/docs/local_filesystem_setup.rst @@ -0,0 +1,45 @@ +Setting up experiment storage and database in local filesystem +============================================================== + +This page describes how to setup studioml to use +local filesystem for storing experiment artifacts and meta-data. +With this option, there is no need to setup any external +connection to S3/Minio/GCS etc. + +StudioML configuration +-------------------- + +:: + + "studio_ml_config": { + + ... + + "database": { + "type": "local", + "endpoint": SOME_DB_LOCAL_PATH, + "bucket": DB_BUCKET_NAME, + "authentication": "none" + }, + "storage": { + "type": "local", + "endpoint": SOME_ARTIFACTS_LOCAL_PATH, + "bucket": ARTIFACTS_BUCKET_NAME, + } + + ... + } + + +With StudioML database type set to "local", +all experiment meta-data will be stored locally under +directory: SOME_DB_LOCAL_PATH/DB_BUCKET_NAME. +Similarly, with storage type set to "local", +all experiment artifacts will be stored locally under +directory: SOME_ARTIFACTS_LOCAL_PATH/ARTIFACTS_BUCKET_NAME. + +Note: if you are using "local" mode, it is recommended to use it +for both storage and database configuration. +But it's technically possible to mix, for example, local storage configuration +and S3-based database configuration etc. + diff --git a/studio/artifact_store.py b/studio/artifact_store.py index 9710403e..c21de347 100644 --- a/studio/artifact_store.py +++ b/studio/artifact_store.py @@ -1,8 +1,8 @@ from .firebase_artifact_store import FirebaseArtifactStore from .gcloud_artifact_store import GCloudArtifactStore +from .local_artifact_store import LocalArtifactStore from .s3_artifact_store import S3ArtifactStore - def get_artifact_store(config, blocking_auth=True, verbose=10): if config['type'].lower() == 'firebase': return FirebaseArtifactStore( @@ -11,5 +11,7 @@ def get_artifact_store(config, blocking_auth=True, verbose=10): return GCloudArtifactStore(config, verbose=verbose) elif config['type'].lower() == 's3': return S3ArtifactStore(config, verbose=verbose) + elif config['type'].lower() == 'local': + return LocalArtifactStore(config, verbose=verbose) else: raise ValueError('Unknown storage type: ' + config['type']) diff --git a/studio/local_artifact_store.py b/studio/local_artifact_store.py new file mode 100644 index 00000000..4a06f589 --- /dev/null +++ b/studio/local_artifact_store.py @@ -0,0 +1,64 @@ +import calendar +import os +import shutil + +from .tartifact_store import TartifactStore + +class LocalArtifactStore(TartifactStore): + def __init__(self, config, + bucket_name=None, + verbose=10, + measure_timestamp_diff=False, + compression=None): + + if compression is None: + compression = config.get('compression') + + self.endpoint = config.get('endpoint', '~') + self.store_root = os.path.realpath(os.path.expanduser(self.endpoint)) + if not os.path.exists(self.store_root) \ + or not os.path.isdir(self.store_root): + raise ValueError() + + self.bucket = bucket_name + if self.bucket is None: + self.bucket = config.get('bucket') + self.store_root = os.path.join(self.store_root, self.bucket) + self._ensure_path_dirs_exist(self.store_root) + + super(LocalArtifactStore, self).__init__( + measure_timestamp_diff, + compression=compression, + verbose=verbose) + + def _ensure_path_dirs_exist(self, path): + dirs = os.path.dirname(path) + os.makedirs(dirs, mode = 0o777, exist_ok = True) + + def _upload_file(self, key, local_path): + target_path = os.path.join(self.store_root, key) + self._ensure_path_dirs_exist(target_path) + shutil.copyfile(local_path, target_path) + + def _download_file(self, key, local_path, bucket=None): + source_path = os.path.join(self.store_root, key) + self._ensure_path_dirs_exist(local_path) + shutil.copyfile(source_path, local_path) + + def _delete_file(self, key): + os.remove(os.path.join(self.store_root, key)) + + def _get_file_url(self, key, method='GET'): + return str(os.path.join(self.store_root, key)) + + def _get_file_post(self, key): + return str(os.path.join(self.store_root, key)) + + def _get_file_timestamp(self, key): + return None + + def get_qualified_location(self, key): + return 'file:/' + self.store_root + '/' + key + + def get_bucket(self): + return self.bucket diff --git a/studio/local_db_provider.py b/studio/local_db_provider.py new file mode 100644 index 00000000..3d95580a --- /dev/null +++ b/studio/local_db_provider.py @@ -0,0 +1,51 @@ +import os +import json + +from .keyvalue_provider import KeyValueProvider +from .local_artifact_store import LocalArtifactStore + +class LocalDbProvider(KeyValueProvider): + + def __init__(self, config, blocking_auth=True, verbose=10, store=None): + self.config = config + self.bucket = config.get('bucket', 'studioml-meta') + + self.endpoint = config.get('endpoint', '~') + self.db_root = os.path.realpath(os.path.expanduser(self.endpoint)) + if not os.path.exists(self.db_root) \ + or not os.path.isdir(self.db_root): + raise ValueError("Local DB root {} doesn't exist or not a directory!".format(self.db_root)) + + self.bucket = config.get('bucket') + self.db_root = os.path.join(self.db_root, self.bucket) + self._ensure_path_dirs_exist(self.db_root) + + super(LocalDbProvider, self).__init__( + config, + blocking_auth, + verbose, + store) + + def _ensure_path_dirs_exist(self, path): + dirs = os.path.dirname(path) + os.makedirs(dirs, mode = 0o777, exist_ok = True) + + def _get(self, key, shallow=False): + file_name = os.path.join(self.db_root, key) + if not os.path.exists(file_name): + return None + with open(file_name) as infile: + result = json.load(infile) + return result + + def _delete(self, key): + file_name = os.path.join(self.db_root, key) + if os.path.exists(file_name): + os.remove(file_name) + + def _set(self, key, value): + file_name = os.path.join(self.db_root, key) + self._ensure_path_dirs_exist(file_name) + with open(file_name, 'w') as outfile: + json.dump(value, outfile) + diff --git a/studio/model.py b/studio/model.py index 88eda913..7dd4af71 100644 --- a/studio/model.py +++ b/studio/model.py @@ -14,6 +14,8 @@ from .artifact_store import get_artifact_store from .http_provider import HTTPProvider from .firebase_provider import FirebaseProvider +from .local_artifact_store import LocalArtifactStore +from .local_db_provider import LocalDbProvider from .s3_provider import S3Provider from .gs_provider import GSProvider from .model_setup import setup_model @@ -102,6 +104,16 @@ def get_db_provider(config=None, blocking_auth=True): blocking_auth=blocking_auth) artifact_store = db_provider.get_artifact_store() + elif db_config['type'].lower() == 'local': + if artifact_store is None: + artifact_store = LocalArtifactStore(db_config, "storage", verbose) + + db_provider = LocalDbProvider(db_config, + verbose=verbose, + store=artifact_store, + blocking_auth=blocking_auth) + artifact_store = db_provider.get_artifact_store() + else: _model_setup = None raise ValueError('Unknown type of the database ' + db_config['type'])