Source code for stable_datasets.images.small_norb

from zipfile import ZipFile

import datasets
import numpy as np
from PIL import Image

from stable_datasets.utils import BaseDatasetBuilder


def _read_binary_matrix_from_bytes(b: bytes):
    magic = int(np.frombuffer(b, dtype=np.int32, count=1)[0])
    ndim = int(np.frombuffer(b, dtype=np.int32, count=1, offset=4)[0])
    eff_dim = max(3, ndim)
    raw_dims = np.frombuffer(b, "int32", eff_dim, 8)

    dims = [int(raw_dims[i]) for i in range(ndim)]

    dtype_map = {
        507333717: "int8",
        507333716: "int32",
        507333713: "float",
        507333715: "double",
    }
    dtype = dtype_map[magic]

    data = np.frombuffer(b, dtype, offset=8 + eff_dim * 4)
    return data.reshape(tuple(dims))


def _load_small_norb_from_zip(zip_path: str):
    with ZipFile(zip_path, "r") as zf:
        dat_name = next(n for n in zf.namelist() if n.endswith("-dat.mat"))
        cat_name = next(n for n in zf.namelist() if n.endswith("-cat.mat"))
        info_name = next(n for n in zf.namelist() if n.endswith("-info.mat"))

        dat_bytes = zf.read(dat_name)
        cat_bytes = zf.read(cat_name)
        info_bytes = zf.read(info_name)

    norb = _read_binary_matrix_from_bytes(dat_bytes)
    images_left = norb[:, 0]
    images_right = norb[:, 1]

    norb_class = _read_binary_matrix_from_bytes(cat_bytes)
    norb_info = _read_binary_matrix_from_bytes(info_bytes)

    features = np.column_stack((norb_class, norb_info)).astype(np.int32)
    features[:, 3] = (features[:, 3] // 2).astype(np.int32)

    return images_left, images_right, features


[docs] class SmallNORB(BaseDatasetBuilder): """SmallNORB dataset: 96x96 stereo images with 5 known factors.""" VERSION = datasets.Version("1.0.0") SOURCE = { "homepage": "https://cs.nyu.edu/~ylclab/data/norb-v1.0-small/", "assets": { "train": "https://huggingface.co/datasets/randall-lab/small-norb/resolve/main/smallnorb-train.zip", "test": "https://huggingface.co/datasets/randall-lab/small-norb/resolve/main/smallnorb-test.zip", }, "license": "Apache-2.0", "citation": """@inproceedings{lecun2004learning, title={Learning methods for generic object recognition with invariance to pose and lighting}, author={LeCun, Yann and Huang, Fu Jie and Bottou, Leon}, booktitle={Proceedings of the 2004 IEEE Computer Society Conference on Computer Vision and Pattern Recognition, 2004. CVPR 2004.}, volume={2}, pages={II--104}, year={2004}, organization={IEEE} }""", } def _info(self): return datasets.DatasetInfo( description=( "SmallNORB dataset: stereo pair images of 3D toy objects, used for learning object recognition " "robust to pose and lighting. Each image pair corresponds to a combination of 5 factors: " "category, instance, elevation, azimuth, lighting." ), features=datasets.Features( { "left_image": datasets.Image(), "right_image": datasets.Image(), "label": datasets.Sequence(datasets.Value("int32")), "category": datasets.Value("int32"), "instance": datasets.Value("int32"), "elevation": datasets.Value("int32"), "azimuth": datasets.Value("int32"), "lighting": datasets.Value("int32"), } ), supervised_keys=("left_image", "label"), homepage=self.SOURCE["homepage"], license=self.SOURCE["license"], citation=self.SOURCE["citation"], ) def _generate_examples(self, data_path, split): images_left, images_right, features = _load_small_norb_from_zip(str(data_path)) for idx in range(len(images_left)): left_img = Image.fromarray(images_left[idx].astype(np.uint8), mode="L") right_img = Image.fromarray(images_right[idx].astype(np.uint8), mode="L") factors = features[idx].tolist() yield ( idx, { "left_image": left_img, "right_image": right_img, "label": factors, "category": factors[0], "instance": factors[1], "elevation": factors[2], "azimuth": factors[3], "lighting": factors[4], }, )