Skip to content

Commit

Permalink
Datasets updated.
Browse files Browse the repository at this point in the history
  • Loading branch information
yanncalec committed Jun 11, 2024
1 parent 60fe89b commit dfad63f
Show file tree
Hide file tree
Showing 21 changed files with 182 additions and 99 deletions.
4 changes: 2 additions & 2 deletions dpmhm/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@
'mafaulda': 'Mafaulda',
'ottawa': 'Ottawa',
'paderborn': 'Paderborn',
# 'phmap2021': 'Phmap2021',
# 'seuc': 'SEUC',
'phmap2021': 'Phmap2021',
'seuc': 'SEUC',
'xjtu': 'XJTU'
}

Expand Down
27 changes: 13 additions & 14 deletions dpmhm/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,19 +18,6 @@

from .. import cli

# from .cwru import CWRU
# from .dcase import DCASE2021
# from .seuc import SEUC
# from .mfpt import MFPT
# from .dirg import DIRG
# from .mafaulda import MAFAULDA
# from .ims import IMS
# from .ottawa import Ottawa
# from .paderborn import Paderborn
# from .femto import FEMTO
# from .fraunhofer import Fraunhofer205, Fraunhofer151
# from .phmdc import Phmap2021

# Data type
_FLOAT16 = np.float16
_FLOAT32 = np.float32
Expand Down Expand Up @@ -163,4 +150,16 @@ def extract_zenodo_urls(url:str) -> list:
urls.append(header+'/'+s.split('?download=1')[0])
except:
pass
return urls
return urls


def load_compact(ds_name:str, split:str|list, **kwargs):
from .transformer import DatasetCompactor

ds0 = tfds.load(ds_name, split=split)

compactor = DatasetCompactor(
ds0, **kwargs
)

return compactor
4 changes: 1 addition & 3 deletions dpmhm/datasets/cwru/cwru.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,10 +100,8 @@

# URL to the zip file
# _DATA_URLS = ('https://engineering.case.edu/sites/default/files/'+_METAINFO['FileName']).tolist()
# _DATA_URLS = extract_zenodo_urls('https://sandbox.zenodo.org/record/1183527/)
_DATA_URLS = [
'https://sandbox.zenodo.org/record/1183527/files/cwru.zip'
# 'https://zenodo.org/api/records/7457149/draft/files/cwru.zip/content'
'https://zenodo.org/records/7457149/files/cwru.zip?download=1'
]


Expand Down
7 changes: 5 additions & 2 deletions dpmhm/datasets/dirg/dirg.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
- Format: Matlab
- Channels: 6, for two accelerometers in the x-y-z axis
- Split: 'Variable speed and load' test, 'Endurance' test
- Sampling rate: 51200 Hz for `Variable speed and load` test and 102400 Hz for `Endurance` test
- Sampling rate: 51200 Hz (51.2 kHz) for `Variable speed and load` test and 102400 Hz (102.4 kHz) for `Endurance` test
- Recording duration: 10 seconds for `Variable speed and load` test and 8 seconds for `Endurance` test
- Label: normal and faulty
- Size: ~ 3Gb unzipped
Expand Down Expand Up @@ -49,6 +49,7 @@
Notes
=====
- Conversion: load is converted from mV to N using the sensitivity factor 0.499 mV/N
- Only the bearing `B1` contains faults so `B2` and `B3` are not used.
- The endurance test was originally with the fault type 4A but in the processed data we marked its label as "unknown".
"""

Expand Down Expand Up @@ -97,7 +98,9 @@
'6A': ('Roller', 150),
}

_DATA_URLS = ['https://sandbox.zenodo.org/record/1183545/files/dirg.zip']
_DATA_URLS = [
'https://zenodo.org/records/11394418/files/dirg.zip?download=1'
]


class DIRG(tfds.core.GeneratorBasedBuilder):
Expand Down
4 changes: 0 additions & 4 deletions dpmhm/datasets/femto/femto.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,10 +117,6 @@
'https://github.com/Lucky-Loek/ieee-phm-2012-data-challenge-dataset/archive/refs/heads/master.zip'
]

# _DATA_URLS = [
# 'https://sandbox.zenodo.org/record/1183585/files/femto.zip'
# ]

# Date of experiment
_DATE = {
'Bearing1_1': datetime(2010,12,1),
Expand Down
4 changes: 3 additions & 1 deletion dpmhm/datasets/fraunhofer151/fraunhofer151.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,9 @@
}
"""

_DATA_URLS = ['https://fordatis.fraunhofer.de/bitstream/fordatis/151.2/1/fraunhofer_eas_dataset_for_unbalance_detection_v1.zip']
_DATA_URLS = [
'https://fordatis.fraunhofer.de/bitstream/fordatis/151.2/1/fraunhofer_eas_dataset_for_unbalance_detection_v1.zip'
]


_RADIUS = {'0': 0., '1': 14., '2': 18.5, '3':23., '4':23.}
Expand Down
6 changes: 4 additions & 2 deletions dpmhm/datasets/fraunhofer205/fraunhofer205.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,9 +102,11 @@

_COMPONENT = ['Ball', 'InnerRace', 'OuterRace', 'None']

_METAINFO = pd.read_csv(Path(__file__).parent/'metainfo.csv', index_col=0)
_METAINFO = pd.read_csv(Path(__file__).parent/'metainfo.csv', index_col=0, keep_default_na=False)

_DATA_URLS = ['https://fordatis.fraunhofer.de/bitstream/fordatis/205/1/fraunhofer_iis_eas_dataset_vibrations_acoustic_emissions_of_drive_train_v1.zip']
_DATA_URLS = [
'https://fordatis.fraunhofer.de/bitstream/fordatis/205/1/fraunhofer_iis_eas_dataset_vibrations_acoustic_emissions_of_drive_train_v1.zip'
]


class Fraunhofer205(tfds.core.GeneratorBasedBuilder):
Expand Down
13 changes: 9 additions & 4 deletions dpmhm/datasets/ims/ims.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,10 @@
}

# _DATA_URLS = 'https://phm-datasets.s3.amazonaws.com/NASA/4.+Bearings.zip'
_DATA_URLS = ['https://sandbox.zenodo.org/record/1184320/files/ims.zip']
_DATA_URLS = [
# 'https://sandbox.zenodo.org/record/1184320/files/ims.zip'
'https://zenodo.org/records/11545355/files/ims.zip?download=1'
]

_CITATION = """
- Hai Qiu, Jay Lee, Jing Lin. “Wavelet Filter-based Weak Signature Detection Method and its Application on Roller Bearing Prognostics.” Journal of Sound and Vibration 289 (2006) 1066-1090
Expand Down Expand Up @@ -115,9 +118,11 @@ def _info(self) -> tfds.core.DatasetInfo:
def _split_generators(self, dl_manager: tfds.download.DownloadManager):
def _get_split_dict(datadir):
return {
'dataset1': (datadir/'1st_test').glob('*'),
'dataset2': (datadir/'2nd_test').glob('*'),
'dataset3': (datadir/'3rd_test').glob('*'),
'dataset1': next(datadir.rglob('1st_test')).glob('*'),
'dataset2': next(datadir.rglob('2nd_test')).glob('*'),
'dataset3': next(datadir.rglob('3rd_test')).glob('*'),
# 'dataset2': (datadir/'2nd_test').glob('*'),
# 'dataset3': (datadir/'3rd_test').glob('*'),
}

if dl_manager._manual_dir.exists(): # prefer to use manually downloaded data
Expand Down
4 changes: 3 additions & 1 deletion dpmhm/datasets/paderborn/paderborn.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,9 @@
Christian Lessmeier et al., KAt-DataCenter: mb.uni-paderborn.de/kat/datacenter, Chair of Design and Drive Technology, Paderborn University.
"""

_METAINFO = pd.read_csv(Path(__file__).parent / 'metainfo.csv', index_col=0) # use 'Bearing Code' as index
# use 'Bearing Code' as index
# same as CWRU, use `keep_default_na` to preserve `None` as string
_METAINFO = pd.read_csv(Path(__file__).parent / 'metainfo.csv', index_col=0, keep_default_na=False)

# _DATA_URLS = ('http://groups.uni-paderborn.de/kat/BearingDataCenter/' + _METAINFO.index+'.rar').tolist()

Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,10 @@
'Bearing': ['train_1st_Bearing.csv', 'train_2nd_Bearing.csv']
}

_DATA_URLS = ['https://sandbox.zenodo.org/record/1184362/files/phmap.zip']
_DATA_URLS = [
# 'https://sandbox.zenodo.org/record/1184362/files/phmap.zip'
'https://zenodo.org/records/11546285/files/phmap.zip?download=1'
]


class Phmap2021(tfds.core.GeneratorBasedBuilder):
Expand Down Expand Up @@ -119,31 +122,54 @@ def _split_generators(self, dl_manager: tfds.download.DownloadManager):
'train': self._generate_examples(datadir),
}

def _generate_examples(self, path):
for sp, fnames in _SPLIT_PATH_MATCH.items():
for fn in fnames:
fp = path / fn

_signal = pd.read_csv(fp, index_col=0).T.values.astype(_DTYPE.as_numpy_dtype)

metadata = {
'Label': sp,
# 'OriginalSplit': sp,
'FileName': fp.name,
'Dataset': 'PHMAP2021',
}

yield hash(frozenset(metadata.items())), {
'signal': {'vibration': _signal},
# 'label': sp,
'sampling_rate': 10544,
'metadata': metadata
}

@staticmethod
def get_references():
try:
with open(Path(__file__).parent / 'Exported Items.bib') as fp:
return fp.read()
except:
pass
def _split_generators(self, dl_manager: tfds.download.DownloadManager):
def _get_split_dict(datadir):
# This doesn't work:
# return {sp: (datadir/fn).rglob('*.csv') for sp, fn in _SPLIT_PATH_MATCH.items()}
return {
'train': datadir.rglob('*.csv'),
}

if dl_manager._manual_dir.exists(): # prefer to use manually downloaded data
datadir = Path(dl_manager._manual_dir)
elif dl_manager._extract_dir.exists(): # automatically downloaded & extracted data
datadir = Path(dl_manager._extract_dir)
# elif dl_manager._download_dir.exists(): # automatically downloaded data
# datadir = Path(dl_manager._download_dir)
# tfds.download.iter_archive(fp, tfds.download.ExtractMethod.ZIP)
else:
raise FileNotFoundError()

return {sp: self._generate_examples(files) for sp, files in _get_split_dict(datadir).items()}

# def _generate_examples(self, path):
# for sp, fnames in _SPLIT_PATH_MATCH.items():
def _generate_examples(self, files):
for fp in files:
# for fn in fnames:
# fp = path / fn

_signal = pd.read_csv(fp, index_col=0).T.values.astype(_DTYPE)
sp = fp.stem.split('_')[-1].capitalize()

metadata = {
'Label': sp,
# 'OriginalSplit': sp,
'FileName': fp.name,
'Dataset': 'PHMAP2021',
}

yield hash(frozenset(metadata.items())), {
'signal': {'vibration': _signal},
# 'label': sp,
'sampling_rate': 10544,
'metadata': metadata
}

# @staticmethod
# def get_references():
# try:
# with open(Path(__file__).parent / 'Exported Items.bib') as fp:
# return fp.read()
# except:
# pass
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -67,10 +67,7 @@
```
"""

# import os
# import pathlib
# import itertools
# import json
from pathlib import Path
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds
Expand All @@ -89,7 +86,9 @@
doi={10.1109/TII.2018.2864759}}
"""

_DATA_URLs = 'https://github.com/cathysiyu/Mechanical-datasets/archive/refs/heads/master.zip'
_DATA_URLS = [
'https://github.com/cathysiyu/Mechanical-datasets/archive/refs/heads/master.zip'
]

# Components of fault
_FAULT_GEARBOX = ['Chipped', 'Missing', 'Root', 'Surface']
Expand Down Expand Up @@ -162,22 +161,27 @@ def _fname_parser(cls, fname):
return _component, _load

def _split_generators(self, dl_manager: tfds.download.DownloadManager):
def _get_split_dict(datadir):
return {
'gearbox': next(datadir.rglob('gearset')).rglob('*.csv'),
'bearing': next(datadir.rglob('bearingset')).rglob('*.csv'),
}

if dl_manager._manual_dir.exists(): # prefer to use manually downloaded data
datadir = dl_manager._manual_dir / 'gearbox'
else: # automatically download data
datadir = list(dl_manager.download_and_extract(_DATA_URLs).iterdir())[0] / 'gearbox'
# print(datadir)

return {
# Use the original splits
'gearbox': self._generate_examples(datadir/'gearset'),
'bearing': self._generate_examples(datadir/'bearingset'),
# 'train': self._generate_examples(datadir) # this will rewrite on precedent splits
}

def _generate_examples(self, path):
# !! Recursive glob `path.rglob` may not behave as expected
for fp in path.glob('*.csv'):
datadir = Path(dl_manager._manual_dir)
elif dl_manager._extract_dir.exists(): # automatically downloaded & extracted data
datadir = Path(dl_manager._extract_dir)
# elif dl_manager._download_dir.exists(): # automatically downloaded data
# datadir = Path(dl_manager._download_dir)
# tfds.download.iter_archive(fp, tfds.download.ExtractMethod.ZIP)
else:
raise FileNotFoundError()

return {sp: self._generate_examples(files) for sp, files in _get_split_dict(datadir).items()}


def _generate_examples(self, files):
for fp in files:
_component, _load = self._fname_parser(fp.name)
# try:
# df = pd.read_csv(fp,skiprows=15, sep='\t').iloc[:,:-1]
Expand All @@ -187,11 +191,11 @@ def _generate_examples(self, path):
# df = pd.read_csv(fp,skiprows=15, sep=',').iloc[:,:-1]
# if df.shape[1] != 8:
# raise Exception
df = pd.read_csv(fp,skiprows=15, sep=None, engine='python').iloc[:,:-1]
df = pd.read_csv(fp, skiprows=15, sep=None, engine='python').iloc[:,:-1]
if df.shape[1] != 8:
raise Exception()

_signal = df.T.values.astype(_DTYPE.as_numpy_dtype) # strangely, df.values.T will give a tuple
_signal = df.T.values.astype(_DTYPE) # strangely, df.values.T will give a tuple

metadata = {
'LoadForce': _load,
Expand Down
Loading

0 comments on commit dfad63f

Please sign in to comment.