Source code for stable_datasets.images.small_norb

from zipfile import ZipFile

import datasets
import numpy as np
from PIL import Image

from stable_datasets.utils import BaseDatasetBuilder


def _read_binary_matrix_from_bytes(b: bytes):
    magic = int(np.frombuffer(b, dtype=np.int32, count=1)[0])
    ndim = int(np.frombuffer(b, dtype=np.int32, count=1, offset=4)[0])
    eff_dim = max(3, ndim)
    raw_dims = np.frombuffer(b, "int32", eff_dim, 8)

    dims = [int(raw_dims[i]) for i in range(ndim)]

    dtype_map = {
        507333717: "int8",
        507333716: "int32",
        507333713: "float",
        507333715: "double",
    }
    dtype = dtype_map[magic]

    data = np.frombuffer(b, dtype, offset=8 + eff_dim * 4)
    return data.reshape(tuple(dims))


def _load_small_norb_from_zip(zip_path: str):
    with ZipFile(zip_path, "r") as zf:
        dat_name = next(n for n in zf.namelist() if n.endswith("-dat.mat"))
        cat_name = next(n for n in zf.namelist() if n.endswith("-cat.mat"))
        info_name = next(n for n in zf.namelist() if n.endswith("-info.mat"))

        dat_bytes = zf.read(dat_name)
        cat_bytes = zf.read(cat_name)
        info_bytes = zf.read(info_name)

    norb = _read_binary_matrix_from_bytes(dat_bytes)
    images_left = norb[:, 0]
    images_right = norb[:, 1]

    norb_class = _read_binary_matrix_from_bytes(cat_bytes)
    norb_info = _read_binary_matrix_from_bytes(info_bytes)

    features = np.column_stack((norb_class, norb_info)).astype(np.int32)
    features[:, 3] = (features[:, 3] // 2).astype(np.int32)

    return images_left, images_right, features



[docs]
class SmallNORB(BaseDatasetBuilder):
    """SmallNORB dataset: 96x96 stereo images with 5 known factors."""

    VERSION = datasets.Version("1.0.0")

    SOURCE = {
        "homepage": "https://cs.nyu.edu/~ylclab/data/norb-v1.0-small/",
        "assets": {
            "train": "https://huggingface.co/datasets/randall-lab/small-norb/resolve/main/smallnorb-train.zip",
            "test": "https://huggingface.co/datasets/randall-lab/small-norb/resolve/main/smallnorb-test.zip",
        },
        "license": "Apache-2.0",
        "citation": """@inproceedings{lecun2004learning,
  title={Learning methods for generic object recognition with invariance to pose and lighting},
  author={LeCun, Yann and Huang, Fu Jie and Bottou, Leon},
  booktitle={Proceedings of the 2004 IEEE Computer Society Conference on Computer Vision and Pattern Recognition, 2004. CVPR 2004.},
  volume={2},
  pages={II--104},
  year={2004},
  organization={IEEE}
}""",
    }

    def _info(self):
        return datasets.DatasetInfo(
            description=(
                "SmallNORB dataset: stereo pair images of 3D toy objects, used for learning object recognition "
                "robust to pose and lighting. Each image pair corresponds to a combination of 5 factors: "
                "category, instance, elevation, azimuth, lighting."
            ),
            features=datasets.Features(
                {
                    "left_image": datasets.Image(),
                    "right_image": datasets.Image(),
                    "label": datasets.Sequence(datasets.Value("int32")),
                    "category": datasets.Value("int32"),
                    "instance": datasets.Value("int32"),
                    "elevation": datasets.Value("int32"),
                    "azimuth": datasets.Value("int32"),
                    "lighting": datasets.Value("int32"),
                }
            ),
            supervised_keys=("left_image", "label"),
            homepage=self.SOURCE["homepage"],
            license=self.SOURCE["license"],
            citation=self.SOURCE["citation"],
        )

    def _generate_examples(self, data_path, split):
        images_left, images_right, features = _load_small_norb_from_zip(str(data_path))

        for idx in range(len(images_left)):
            left_img = Image.fromarray(images_left[idx].astype(np.uint8), mode="L")
            right_img = Image.fromarray(images_right[idx].astype(np.uint8), mode="L")

            factors = features[idx].tolist()

            yield (
                idx,
                {
                    "left_image": left_img,
                    "right_image": right_img,
                    "label": factors,
                    "category": factors[0],
                    "instance": factors[1],
                    "elevation": factors[2],
                    "azimuth": factors[3],
                    "lighting": factors[4],
                },
            )