diff --git a/multimolecule/datasets/eternabench_external/eternabench_external.py b/multimolecule/datasets/eternabench_external/eternabench_external.py index 6e21af27..2e6f56a8 100644 --- a/multimolecule/datasets/eternabench_external/eternabench_external.py +++ b/multimolecule/datasets/eternabench_external/eternabench_external.py @@ -48,8 +48,7 @@ def convert_dataset_(df: pd.DataFrame): def convert_dataset(convert_config): df = dl.load_pandas(convert_config.dataset_path) - fd = convert_dataset_(df) - save_dataset(convert_config, {"test": fd}) + save_dataset(convert_config, convert_dataset_(df), filename="test.parquet") class ConvertConfig(ConvertConfig_): diff --git a/multimolecule/datasets/rnacentral/README.md b/multimolecule/datasets/rnacentral/README.md index faa6e2f5..97587986 100644 --- a/multimolecule/datasets/rnacentral/README.md +++ b/multimolecule/datasets/rnacentral/README.md @@ -93,14 +93,14 @@ This is an UNOFFICIAL release of the [RNAcentral](https://rnacentral.org) by the ## Variations -This dataset is available in five variants: +This dataset is available in five additional variants: - [rnacentral](https://huggingface.co/datasets/multimolecule/rnacentral): The main RNAcentral dataset. -- [rnacentral-512](https://huggingface.co/datasets/multimolecule/rnacentral-1024): RNAcentral dataset with all sequences truncated to 512 nucleotides. -- [rnacentral-1024](https://huggingface.co/datasets/multimolecule/rnacentral-1024): RNAcentral dataset with all sequences truncated to 1024 nucleotides. -- [rnacentral-2048](https://huggingface.co/datasets/multimolecule/rnacentral-2048): RNAcentral dataset with all sequences truncated to 2048 nucleotides. -- [rnacentral-4096](https://huggingface.co/datasets/multimolecule/rnacentral-4096): RNAcentral dataset with all sequences truncated to 4096 nucleotides. -- [rnacentral-8192](https://huggingface.co/datasets/multimolecule/rnacentral-8192): RNAcentral dataset with all sequences truncated to 8192 nucleotides. +- [rnacentral.512](https://huggingface.co/datasets/multimolecule/rnacentral.512): RNAcentral dataset with all sequences truncated to 512 nucleotides. +- [rnacentral.1024](https://huggingface.co/datasets/multimolecule/rnacentral.1024): RNAcentral dataset with all sequences truncated to 1024 nucleotides. +- [rnacentral.2048](https://huggingface.co/datasets/multimolecule/rnacentral.2048): RNAcentral dataset with all sequences truncated to 2048 nucleotides. +- [rnacentral.4096](https://huggingface.co/datasets/multimolecule/rnacentral.4096): RNAcentral dataset with all sequences truncated to 4096 nucleotides. +- [rnacentral.8192](https://huggingface.co/datasets/multimolecule/rnacentral.8192): RNAcentral dataset with all sequences truncated to 8192 nucleotides. ## Derived Datasets diff --git a/multimolecule/datasets/rnacentral/rnacentral.py b/multimolecule/datasets/rnacentral/rnacentral.py index 958e622a..6943dfdb 100644 --- a/multimolecule/datasets/rnacentral/rnacentral.py +++ b/multimolecule/datasets/rnacentral/rnacentral.py @@ -49,7 +49,7 @@ class ConvertConfig(ConvertConfig_): def post(self): if self.max_seq_len is not None: - self.output_path = f"{self.output_path}-{self.max_seq_len}" + self.output_path = f"{self.output_path}.{self.max_seq_len}" super().post()