Add script, notebook, minor folders structure debugging when calling …

…some functions
AdrienC21 · Sep 18, 2023 · da710be · da710be
1 parent 7c5680a
commit da710be
Show file tree

Hide file tree

Showing 12 changed files with 1,591 additions and 12 deletions.
diff --git a/.github/workflows/apply_fixes.py b/.github/workflows/apply_fixes.py
@@ -12,7 +12,7 @@
 packages = site.getsitepackages()
 site_packages = None
 for p in packages:
-    if "site-packages" in p:
+    if ("dist-packages" in p) or ("site-packages" in p):
         site_packages = p
         break
 

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -1,6 +1,13 @@
 Changelog
 ==================================
 
+0.3.1 (2023/09/18)
+--------------------
+
+- Debug folder structure when calling some functions
+
+- Add example script and notebook
+
 0.3.0 (2023/09/18)
 --------------------
 

diff --git a/ccsd/__init__.py b/ccsd/__init__.py
@@ -5,6 +5,6 @@
 
 __author__ = "Adrien Carrel"
 __email__ = "[email protected]"
-__version__ = "0.3.0"
+__version__ = "0.3.1"
 
 __all__ = ["src", "data"]
diff --git a/ccsd/diffusion.py b/ccsd/diffusion.py
@@ -104,7 +104,7 @@ def run(self) -> None:
             ValueError: raise and error the experiment type is not one of [train, sample].
         """
         # Get the configuration and the general configuration
-        config = get_config(self.args.config, self.args.seed)
+        config = get_config(self.args.config, self.args.seed, self.args.folder)
         general_config = get_general_config()
 
         # Print the initial message

diff --git a/ccsd/src/parsers/config.py b/ccsd/src/parsers/config.py
@@ -12,17 +12,18 @@
 from easydict import EasyDict
 
 
-def get_config(config: str, seed: int) -> EasyDict:
+def get_config(config: str, seed: int, folder: str = "./") -> EasyDict:
     """Load the config file.
 
     Args:
         config (str): name of the config file.
         seed (int): random seed (to be added to the config object).
+        folder (str, optional): folder where the config folder is located. Defaults to "./".
 
     Returns:
         EasyDict: configuration object.
     """
-    config_dir = os.path.join("config", f"{config}.yaml")
+    config_dir = os.path.join(folder, "config", f"{config}.yaml")
     config = EasyDict(yaml.load(open(config_dir, "r"), Loader=yaml.FullLoader))
     config.seed = seed
 

diff --git a/ccsd/src/sampler.py b/ccsd/src/sampler.py
@@ -1156,7 +1156,9 @@ def sample(self) -> None:
         logger.log(f"GEN SEED: {self.config.sample.seed}")
         load_seed(self.config.sample.seed)
 
-        train_smiles, test_smiles = load_smiles(self.configt.data.data)
+        train_smiles, test_smiles = load_smiles(
+            self.configt.data.data, self.config.folder
+        )
         train_smiles, test_smiles = canonicalize_smiles(
             train_smiles
         ), canonicalize_smiles(test_smiles)

diff --git a/ccsd/src/utils/mol_utils.py b/ccsd/src/utils/mol_utils.py
@@ -105,11 +105,14 @@ def canonicalize_smiles(smiles: List[str]) -> List[str]:
     return [Chem.MolToSmiles(Chem.MolFromSmiles(smi)) for smi in smiles]
 
 
-def load_smiles(dataset: str = "QM9") -> Tuple[List[str], List[str]]:
+def load_smiles(
+    dataset: str = "QM9", folder: str = "./"
+) -> Tuple[List[str], List[str]]:
     """Loads SMILES strings from a dataset and return train and test splits.
 
     Args:
         dataset (str, optional): smiles dataset to load. Defaults to "QM9".
+        folder (str, optional): folder where the data folder is located. Defaults to "./".
 
     Raises:
         ValueError: raise an error if dataset is not supported
@@ -124,9 +127,9 @@ def load_smiles(dataset: str = "QM9") -> Tuple[List[str], List[str]]:
     else:
         raise ValueError(f"Wrong dataset name {dataset} in load_smiles")
 
-    df = pd.read_csv(os.path.join("data", f"{dataset.lower()}.csv"))
+    df = pd.read_csv(os.path.join(folder, "data", f"{dataset.lower()}.csv"))
 
-    with open(os.path.join("data", f"valid_idx_{dataset.lower()}.json")) as f:
+    with open(os.path.join(folder, "data", f"valid_idx_{dataset.lower()}.json")) as f:
         test_idx = json.load(f)
 
     if dataset == "QM9":  # special case for QM9

diff --git a/config/general_config.yaml b/config/general_config.yaml
@@ -5,5 +5,5 @@ project_name: "CCSD"  # name of the project in wandb
 entity: "a-carrel"  # name of the entity in wandb
 timezone: "Europe/London"  # timezone to name the output files
 print_initial: True  # print an initial message with logo and current experiment
-plotly_fig: False  # if True, create plotly figures (rotating 3D plots, diffusion animation, etc)
+plotly_fig: True  # if True, create plotly figures (rotating 3D plots, diffusion animation, etc)
 engine: "kaleido"  # engine for the plotly plots. Windows users should use "kaleido" instead of "orca"
diff --git a/config/sample_qm9_CC demonstration.yaml b/config/sample_qm9_CC demonstration.yaml
@@ -26,7 +26,7 @@ sampler:
   n_steps: 1
 
 sample:
-  # divide_batch: 4  # optional, only if RAM issue occurs
+  divide_batch: 4  # optional, only if RAM issue occurs
   n_samples: 16  # param only for mol datasets
   cc_nb_eval: 1000  # param only for cc datasets
   use_ema: False

diff --git a/main.py b/main.py
@@ -41,7 +41,7 @@ def main(args: argparse.Namespace) -> None:
     """
 
     # Get the configuration and the general configuration
-    config = get_config(args.config, args.seed)
+    config = get_config(args.config, args.seed, args.folder)
     general_config = get_general_config()
 
     # Print the initial message