Source code for stable_datasets.timeseries.audiomnist

import zipfile

from stable_datasets.schema import (
    ClassLabel,
    DatasetInfo,
    DatasetSource,
    DownloadInfo,
    Features,
    Sequence,
    Value,
    Version,
)
from stable_datasets.utils import BaseDatasetBuilder

from ._audio_utils import wav_bytes_to_series



[docs]
class AudioMNIST(BaseDatasetBuilder):
    """AudioMNIST spoken-digit classification dataset."""

    VERSION = Version("1.0.0")
    SOURCE = DatasetSource(
        homepage="https://github.com/soerenab/AudioMNIST",
        assets={
            "train": DownloadInfo(
                url="https://github.com/soerenab/AudioMNIST/archive/master.zip",
                filename="AudioMNIST-master.zip",
            ),
        },
        citation="""@article{audiomnist2023,
                     title = {AudioMNIST: Exploring Explainable Artificial Intelligence for audio analysis on a simple benchmark},
                     journal = {Journal of the Franklin Institute},
                     year = {2023},
                     issn = {0016-0032},
                     doi = {https://doi.org/10.1016/j.jfranklin.2023.11.038},
                     url = {https://www.sciencedirect.com/science/article/pii/S0016003223007536},
                     author = {Sören Becker and Johanna Vielhaben and Marcel Ackermann and Klaus-Robert Müller and Sebastian Lapuschkin and Wojciech Samek},
                     keywords = {Deep learning, Neural networks, Interpretability, Explainable artificial intelligence, Audio classification, Speech recognition}}""",
    )

    def _info(self):
        return DatasetInfo(
            description="AudioMNIST spoken-digit recordings with speaker metadata.",
            features=Features(
                {
                    "series": Sequence(Sequence(Value("float32"))),
                    "label": ClassLabel(num_classes=10),
                    "speaker_id": Value("int32"),
                    "utterance_id": Value("int32"),
                    "filename": Value("string"),
                }
            ),
            supervised_keys=("series", "label"),
            homepage=self.SOURCE["homepage"],
            citation=self.SOURCE["citation"],
        )

    def _generate_examples(self, data_path, split):
        del split
        with zipfile.ZipFile(data_path) as archive:
            for name in sorted(archive.namelist()):
                if not name.lower().endswith(".wav"):
                    continue
                filename = name.rsplit("/", 1)[-1]
                stem = filename[:-4]
                try:
                    digit_str, speaker_str, utterance_str = stem.split("_")
                except ValueError:
                    continue
                yield (
                    filename,
                    {
                        "series": wav_bytes_to_series(archive.read(name)),
                        "label": int(digit_str),
                        "speaker_id": int(speaker_str) - 1,
                        "utterance_id": int(utterance_str),
                        "filename": filename,
                    },
                )