Internship of Baptiste

yanncalec · May 14, 2024 · 4ce97be · 4ce97be
1 parent a1081d9
commit 4ce97be
Show file tree

Hide file tree

Showing 4 changed files with 1,561 additions and 1,533 deletions.
diff --git a/dpmhm/datasets/__init__.py b/dpmhm/datasets/__init__.py
@@ -7,7 +7,6 @@
 import os
 from pathlib import Path
 import requests
-from bs4 import BeautifulSoup
 from tensorflow.data import Dataset
 
 # import pycurl
@@ -147,6 +146,8 @@ def extract_zenodo_urls(url:str) -> list:
     -------
     a list of extracted urls.
     """
+    from bs4 import BeautifulSoup
+
     header = url.split('/record/')[0]
     # Logger.debug(header)
     reqs = requests.get(url)

diff --git a/dpmhm/datasets/cwru/cwru.py b/dpmhm/datasets/cwru/cwru.py
@@ -94,17 +94,19 @@
 """
 
 # Load meta-information of all datafiles
-_METAINFO = pd.read_csv(Path(__file__).parent / 'metainfo.csv')
+# Use the option `keep_default_na` to preserving `None` (for e.g. 99.mat) as a string and not converting it to nan.
+# https://stackoverflow.com/questions/10867028/get-pandas-read-csv-to-read-empty-values-as-empty-string-instead-of-nan
+_METAINFO = pd.read_csv(Path(__file__).parent / 'metainfo.csv', keep_default_na=False)
 
 # URL to the zip file
 # _DATA_URLS = ('https://engineering.case.edu/sites/default/files/'+_METAINFO['FileName']).tolist()
 # _DATA_URLS = extract_zenodo_urls('https://sandbox.zenodo.org/record/1183527/)
 _DATA_URLS = [
     'https://sandbox.zenodo.org/record/1183527/files/cwru.zip'
+    # 'https://zenodo.org/api/records/7457149/draft/files/cwru.zip/content'
     ]
 
 
-
 class CWRU(tfds.core.GeneratorBasedBuilder):
     VERSION = tfds.core.Version('1.0.0')