Source code for meerkat.datasets.pascal

import os
import shutil

import meerkat as mk

from ..abstract import DatasetBuilder
from ..info import DatasetInfo
from ..registry import datasets
from ..utils import download_url, extract


[docs]@datasets.register() class pascal(DatasetBuilder): VERSIONS = ["2012"] VERSION_TO_URL = { # flake8: noqa "2012": "http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar" } info = DatasetInfo( name="pascal", full_name="PASCAL", description="Image data sets for object class recognition.", homepage="http://host.robots.ox.ac.uk/pascal/VOC/", tags=["image", "object recognition"], citation=( "@Article{Everingham10," 'author = "Everingham, M. and Van~Gool, L. and Williams, C. K. I. and Winn,' 'J. and Zisserman, A.",' 'title = "The Pascal Visual Object Classes (VOC) Challenge",' 'journal = "International Journal of Computer Vision",' 'volume = "88",' 'year = "2010",' 'number = "2",' "month = jun," 'pages = "303--338",}' ), )
[docs] def build(self): if self.version == "2012": return build_pascal_2012_dp(dataset_dir=self.dataset_dir) else: raise ValueError()
[docs] def download(self): url = self.VERSION_TO_URL[self.version] downloaded_path = download_url(url, self.dataset_dir) extract(downloaded_path, os.path.join(self.dataset_dir, "tmp")) shutil.move( os.path.join(self.dataset_dir, "tmp/VOCdevkit"), os.path.join(self.dataset_dir, "VOCdevkit"), )
[docs]def build_pascal_2012_dp(dataset_dir: str): base_dir = os.path.join(dataset_dir, "VOCdevkit/VOC2012") train_dp = mk.DataPanel.from_csv( os.path.join(base_dir, "ImageSets/Main/train.txt"), header=None, names=["id"] ) train_dp["split"] = ["train"] * len(train_dp) val_dp = mk.DataPanel.from_csv( os.path.join(base_dir, "ImageSets/Main/val.txt"), header=None, names=["id"] ) val_dp["split"] = ["val"] * len(val_dp) dp = mk.concat([train_dp, val_dp], axis=0) # create filename column dp["file_name"] = dp["id"] + ".jpg" dp["image"] = mk.ImageColumn.from_filepaths( dp["file_name"], base_dir=os.path.join(base_dir, "JPEGImages") ) for class_name in PASCAL_CLASSES: label_dp = mk.DataPanel.from_csv( os.path.join( base_dir, f"ImageSets/Main/{class_name}_trainval.txt".format(class_name) ), header=None, delimiter=" ?", names=["id", class_name], engine="python", ) label_dp[class_name] = (label_dp[class_name] == 1).astype(int) dp = dp.merge(label_dp, on="id", validate="one_to_one") return dp
PASCAL_CLASSES = [ "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor", ]