Source code for stable_datasets.images.beans

import zipfile

from stable_datasets.schema import ClassLabel, DatasetInfo, DatasetSource, DownloadInfo, Features, Version
from stable_datasets.schema import Image as ImageFeature
from stable_datasets.utils import BaseDatasetBuilder


[docs] class Beans(BaseDatasetBuilder): """Bean disease dataset for classification of three classes: Angular Leaf Spot, Bean Rust, and Healthy leaves.""" VERSION = Version("1.0.0") # Single source-of-truth for dataset provenance + download locations. SOURCE = DatasetSource( homepage="https://github.com/AI-Lab-Makerere/ibean/", assets={ "train": DownloadInfo(url="https://storage.googleapis.com/ibeans/train.zip"), "test": DownloadInfo(url="https://storage.googleapis.com/ibeans/test.zip"), "validation": DownloadInfo(url="https://storage.googleapis.com/ibeans/validation.zip"), }, citation="""@misc{makerere2020beans, author = "{Makerere AI Lab}", title = "{Bean Disease Dataset}", year = "2020", month = "January", url = "https://github.com/AI-Lab-Makerere/ibean/"}""", ) def _info(self): return DatasetInfo( description="""The IBeans dataset contains leaf images representing three classes: 1) Healthy leaves, 2) Angular Leaf Spot, and 3) Bean Rust. Images are collected in Uganda for disease classification in the field.""", features=Features( { "image": ImageFeature(), "label": ClassLabel(names=["healthy", "angular_leaf_spot", "bean_rust"]), } ), supervised_keys=("image", "label"), homepage=self.SOURCE["homepage"], license="MIT License", citation=self.SOURCE["citation"], ) def _generate_examples(self, data_path, split): with zipfile.ZipFile(data_path, "r") as archive: for file_name in archive.namelist(): if not file_name.endswith(".jpg"): continue image_bytes = archive.read(file_name) label_name = file_name.split("/")[1] label = self.info.features["label"].str2int(label_name) yield file_name, {"image": image_bytes, "label": label}