Source code for stable_datasets.timeseries.seizures_neonatal

from pathlib import Path

import numpy as np

from stable_datasets.schema import DatasetInfo, DatasetSource, DownloadInfo, Features, Sequence, Value, Version
from stable_datasets.splits import Split, SplitGenerator
from stable_datasets.utils import BaseDatasetBuilder, bulk_download


[docs] class SeizuresNeonatal(BaseDatasetBuilder): """Neonatal EEG recordings with expert seizure annotations.""" VERSION = Version("1.0.0") SOURCE = DatasetSource( homepage="https://zenodo.org/records/2547147", assets={ "annotations": DownloadInfo( url="https://zenodo.org/record/2547147/files/annotations_2017.mat?download=1", fallbacks=["https://zenodo.org/records/2547147/files/annotations_2017.mat"], filename="annotations_2017.mat", ), **{ f"eeg{i}": DownloadInfo( url=f"https://zenodo.org/record/2547147/files/eeg{i}.edf?download=1", fallbacks=[f"https://zenodo.org/records/2547147/files/eeg{i}.edf"], filename=f"eeg{i}.edf", ) for i in range(1, 80) }, }, citation="See dataset homepage.", ) def _info(self): return DatasetInfo( description="Multichannel neonatal EEG recordings with expert seizure annotations.", features=Features( { "series": Sequence(Sequence(Value("float32"))), "annotations": Sequence(Sequence(Value("int32"))), "subject_id": Value("int32"), "filename": Value("string"), } ), supervised_keys=None, homepage=self.SOURCE["homepage"], citation=self.SOURCE["citation"], ) def _split_generators(self): source = self._source() asset_names = ["annotations", *[f"eeg{i}" for i in range(1, 80)]] local_paths = bulk_download( [self._normalize_download_info(source["assets"][name], asset_name=name) for name in asset_names], dest_folder=self._raw_download_dir, ) annotations_path = local_paths[0] eeg_paths = local_paths[1:] return [ SplitGenerator( name=Split.TRAIN, gen_kwargs={"annotations_path": annotations_path, "eeg_paths": eeg_paths, "split": "train"}, ) ] def _candidate_splits(self) -> list: return [Split.TRAIN] def _generate_examples(self, annotations_path, eeg_paths, split): del split import mne from scipy.io import loadmat annotations = loadmat(annotations_path)["annotat_new"][0] eeg_by_id = {int(Path(path).stem.replace("eeg", "")): path for path in eeg_paths} for subject_id in range(1, 80): eeg_path = eeg_by_id[subject_id] raw = mne.io.read_raw_edf(str(eeg_path), preload=True, verbose="ERROR") series = raw.get_data().T.astype("float32") annotation = np.asarray(annotations[subject_id - 1]).astype("int32") if annotation.ndim == 1: annotation = annotation[:, None] yield ( subject_id, { "series": series, "annotations": annotation.tolist(), "subject_id": subject_id, "filename": Path(eeg_path).name, }, )