import os
import zipfile
from PIL import Image as PILImage
from stable_datasets.schema import ClassLabel, DatasetInfo, Features, Version
from stable_datasets.schema import Image as ImageFeature
from stable_datasets.utils import BaseDatasetBuilder
[docs]
class AWA2(BaseDatasetBuilder):
"""
The Animals with Attributes 2 (AwA2) dataset provides images across 50 animal classes, useful for attribute-based classification
and zero-shot learning research. See https://cvml.ista.ac.at/AwA2/ for more information.
"""
VERSION = Version("1.0.0")
SOURCE = {
"homepage": "https://cvml.ista.ac.at/AwA2/",
"citation": """@ARTICLE{8413121,
author={Xian, Yongqin and Lampert, Christoph H. and Schiele, Bernt and Akata, Zeynep},
journal={IEEE Transactions on Pattern Analysis and Machine Intelligence},
title={Zero-Shot Learning—A Comprehensive Evaluation of the Good, the Bad and the Ugly},
year={2019},
volume={41},
number={9},
pages={2251-2265},
keywords={Semantics;Visualization;Task analysis;Training;Fish;Protocols;Learning systems;Generalized zero-shot learning;transductive learning;image classification;weakly-supervised learning},
doi={10.1109/TPAMI.2018.2857768}}""",
"assets": {
"test": "https://cvml.ista.ac.at/AwA2/AwA2-data.zip",
},
}
# Single source-of-truth for dataset provenance + download locations.
SOURCE = {
"homepage": "https://cvml.ista.ac.at/AwA2/",
"assets": {
"test": "https://cvml.ista.ac.at/AwA2/AwA2-data.zip",
},
"citation": """@ARTICLE{8413121,
author={Xian, Yongqin and Lampert, Christoph H. and Schiele, Bernt and Akata, Zeynep},
journal={IEEE Transactions on Pattern Analysis and Machine Intelligence},
title={Zero-Shot Learning—A Comprehensive Evaluation of the Good, the Bad and the Ugly},
year={2019},
volume={41},
number={9},
pages={2251-2265},
keywords={Semantics;Visualization;Task analysis;Training;Fish;Protocols;Learning systems;Generalized zero-shot learning;transductive learning;image classification;weakly-supervised learning},
doi={10.1109/TPAMI.2018.2857768}}""",
}
def _info(self):
return DatasetInfo(
description="""The AWA2 dataset is an image classification dataset with images of 50 classes, primarily used in attribute-based image recognition research. See https://cvml.ista.ac.at/AwA2/ for more information.""",
features=Features(
{
"image": ImageFeature(),
"label": ClassLabel(
names=[
"antelope",
"grizzly+bear",
"killer+whale",
"beaver",
"dalmatian",
"persian+cat",
"horse",
"german+shepherd",
"blue+whale",
"siamese+cat",
"skunk",
"mole",
"tiger",
"hippopotamus",
"leopard",
"moose",
"spider+monkey",
"humpback+whale",
"elephant",
"gorilla",
"ox",
"fox",
"sheep",
"seal",
"chimpanzee",
"hamster",
"squirrel",
"rhinoceros",
"rabbit",
"bat",
"giraffe",
"wolf",
"chihuahua",
"rat",
"weasel",
"otter",
"buffalo",
"zebra",
"giant+panda",
"deer",
"bobcat",
"pig",
"lion",
"mouse",
"polar+bear",
"collie",
"walrus",
"raccoon",
"cow",
"dolphin",
]
),
}
),
supervised_keys=("image", "label"),
homepage=self.SOURCE["homepage"],
citation=self.SOURCE["citation"],
)
def _generate_examples(self, data_path, split):
# Note: split parameter is unused as AWA2 only contains a test split.
# Open the zip file
with zipfile.ZipFile(data_path, "r") as z:
# Use the class names from DatasetInfo for consistent label order
class_names = self.info.features["label"].names
# Create a mapping from class name to label index based on DatasetInfo order
label_mapping = {name: idx for idx, name in enumerate(class_names)}
root_dir = "Animals_with_Attributes2/JPEGImages/"
for class_name in class_names:
class_dir = os.path.join(root_dir, class_name)
# Iterate through each image in the class folder
for image_path in z.namelist():
if image_path.startswith(class_dir) and image_path.endswith(".jpg"):
with z.open(image_path) as image_file:
image = PILImage.open(image_file).convert("RGB")
label = label_mapping[class_name]
yield image_path, {"image": image, "label": label}