Merge branch 'pytorch_implementation' into inference-no-dropout

mehta-lab · May 30, 2023 · be88e9b · be88e9b
2 parents b0fe3bd + 00375b0
commit be88e9b
Showing 1 changed file with 58 additions and 1 deletion.
diff --git a/micro_dl/data_organization.md b/micro_dl/data_organization.md
@@ -38,7 +38,7 @@ Currently computing the evaluation metrics does not depend on PyTorch.
 
 For run-time deployment, we export the model to the ONNX format.
 
-## Data hierarchy
+## Data hierarchy (Lightning Framework)
 
 Data generated by the pipeline is stored on the file system following this schema:
 
@@ -112,3 +112,60 @@ virtual_staining:
             ...
         ...
 ```
+
+## Data hierarchy (Gunpowder Framework) 
+This data hierarchy is deprecated, and is documented for archiving purposes.
+The hierarchy organizes subdirectories of config files, models/training logs and data first according to the data related to the computational experiment, then by the specific experiment. Each set of config files should have a corresponding sibling-level training log, and parent-level dataset in their respective directories
+```yaml
+# project root directory 
+torch_microDL:
+
+    #training and test data
+    data:
+        <yyyy_mm_dd>_<data_name>: # data-level sibling to dataset dir in config files
+            # Due to evolving data format, no single standard for each dataset's format
+            # Generally these directories are populated by one of the following:
+            #   tile directories (from old preprocessing)
+            #   single page tiff directories (from old raw data)
+            #   zarr stores (from new dataloading)
+            <data_directory1>:
+            <data_directory2>:
+            ...
+        ...
+
+    # configuration files (preprocessing, training, inference, etc)
+    config_files:
+        <yyyy_mm_dd>_<data_name>: # data_name is often an abbreviated tag to the microscopy experiment sourcing this data
+            <mm_dd_yyyy>_<experiment_name>: # config files often stored under additional subdirectories. No standard format
+                config0_<config_type>.yml
+                config1_<config_type>.yml
+                ...
+            ... 
+        ... 
+
+    # training logs and saved models
+    models:
+        <yyyy_mm_dd>_<data_name>: # data-level sibling to dataset dir in config files
+            <mm_dd_yyyy>_<experiment_name>:
+                model_<model_type>0:
+                    # Sometimes there is an additional subdirectory here deliniating different runs.
+                    # There are often *many* training models. These should be cleaned or sorted by size.
+                    training_model_<yyyy_mm_dd_mm_ss>:
+                        <tensorboard_logs>
+                        data_splits.yml
+                        saved_model_ep_<ep0>_testloss_<loss>.pt
+                        saved_model_ep_<ep1>_testloss_<loss>.pt
+                        ...
+                        prediction_ep_<ep0>.png
+                        prediction_ep_<ep1>.png
+                        ...
+                     inference_results_<yyyy_mm_dd_mm_ss>:
+                        <tensorboard_logs>
+                        <inference_result_1>.tiff
+                        <inference_result_2>.tiff
+                        ...
+                ...
+            ...
+        ...
+
+```