Source code for stable_datasets.timeseries.UrbanSound

import zipfile

import numpy as np
from scipy.io import arff

from stable_datasets.schema import (
    ClassLabel,
    DatasetInfo,
    DatasetSource,
    DownloadInfo,
    Features,
    Sequence,
    Value,
    Version,
)
from stable_datasets.utils import BaseDatasetBuilder


[docs] class UrbanSound(BaseDatasetBuilder): """UrbanSound timeseries classification dataset.""" VERSION = Version("1.0.0") SOURCE = DatasetSource( homepage="http://www.timeseriesclassification.com/description.php?Dataset=UrbanSound", assets={ "train": DownloadInfo(url="http://www.timeseriesclassification.com/Downloads/UrbanSound.zip"), "test": DownloadInfo(url="http://www.timeseriesclassification.com/Downloads/UrbanSound.zip"), }, citation="See dataset homepage.", ) def _info(self): return DatasetInfo( description="UrbanSound audio-derived timeseries classification dataset.", features=Features( { "series": Sequence(Sequence(Value("float32"))), "label": ClassLabel(num_classes=10), } ), supervised_keys=("series", "label"), homepage=self.SOURCE["homepage"], citation=self.SOURCE["citation"], ) def _generate_examples(self, data_path, split): with zipfile.ZipFile(data_path) as archive: member = _find_member(archive, f"UrbanSound_{split.upper()}.arff") with archive.open(member) as fh: records, meta = arff.loadarff(fh) names = meta.names() columns = np.asarray([records[name] for name in names], dtype=object) series = columns[:-1].T.astype("float32") labels = columns[-1] label_to_id = _label_to_id(labels, names[-1], meta) for idx, (x, y) in enumerate(zip(series, labels)): yield idx, {"series": x[:, None], "label": label_to_id[_label_name(y)]}
def _find_member(archive: zipfile.ZipFile, suffix: str) -> str: suffix = suffix.lower() for name in archive.namelist(): if name.lower().endswith(suffix): return name raise FileNotFoundError(f"Could not find {suffix!r} in {archive.filename}") def _label_name(value) -> str: if isinstance(value, bytes): return value.decode("utf-8") if hasattr(value, "item"): value = value.item() return str(value) def _label_to_id(labels, label_attr: str, meta) -> dict[str, int]: try: declared = [_label_name(v) for v in meta[label_attr][1]] except Exception: declared = sorted({_label_name(v) for v in labels}) return {name: idx for idx, name in enumerate(declared)}