meerkat.datasets package

Subpackages

Submodules

meerkat.datasets.abstract module

class DatasetBuilder(dataset_dir: Optional[str] = None, version: Optional[str] = None, download_mode: str = 'reuse', **kwargs)[source]

Bases: ABC

abstract build()[source]
abstract download()[source]
download_url(url: str)[source]
dump_download_meta()[source]
is_downloaded() bool[source]

This is a very weak check for the existence of the dataset.

Subclasses should ideally implement more thorough checks.

REVISIONS: List[str]
info: DatasetInfo = None

meerkat.datasets.fsdd module

meerkat.datasets.info module

class DatasetInfo(name: str, full_name: str = None, description: str = None, citation: str = None, homepage: str = None, license: str = None, tags: str = None)[source]

Bases: object

citation: str = None
description: str = None
full_name: str = None
homepage: str = None
license: str = None
name: str
tags: str = None

meerkat.datasets.registry module

class Registry(name: str)[source]

Bases: Registry

Extension of fvcore’s registry that supports aliases.

get(name: str, **kwargs) Any[source]
get_obj(name: str) type[source]
register(obj: Optional[object] = None, aliases: Optional[Sequence[str]] = None) Optional[object][source]

Register the given object under the the name obj.__name__. Can be used as either a decorator or not. See docstring of this class for usage.

property catalog: DataPanel
property names: List[str]

meerkat.datasets.utils module

download_google_drive(url: Optional[str] = None, id: Optional[str] = None, dst: Optional[str] = None, is_folder: bool = False)[source]
download_url(url: str, dataset_dir: str, force: bool = False)[source]
extract(path: str, dst: str, extractor: Optional[str] = None)[source]

Module contents

class celeba(dataset_dir: Optional[str] = None, version: Optional[str] = None, download_mode: str = 'reuse', **kwargs)[source]

Bases: DatasetBuilder

build()[source]
download()[source]
REVISIONS: List[str]
VERSIONS = ['main']
info: DatasetInfo = DatasetInfo(name='celeba', full_name='CelebFaces Attributes', description='CelebFaces Attributes Dataset (CelebA) is a large-scale face attributes dataset with more than 200K celebrity images, each with 40 attribute  annotations. The images in this dataset cover large pose variations and  background clutter.', citation=None, homepage='https://mmlab.ie.cuhk.edu.hk/projects/CelebA.html', license=None, tags=['image', 'face recognition'])
class coco(dataset_dir: Optional[str] = None, version: Optional[str] = None, download_mode: str = 'reuse', **kwargs)[source]

Bases: DatasetBuilder

build()[source]
download()[source]
REVISIONS: List[str]
VERSIONS = ['2014']
info: DatasetInfo = DatasetInfo(name='coco', full_name='Common Objects in Context', description='Image data sets for object class recognition.', citation=None, homepage='https://cocodataset.org/#home', license=None, tags=['image', 'object recognition'])
class expw(dataset_dir: Optional[str] = None, version: Optional[str] = None, download_mode: str = 'reuse', **kwargs)[source]

Bases: DatasetBuilder

build()[source]
download()[source]
REVISIONS: List[str]
VERSIONS = ['main']
VERSION_TO_GDRIVE_ID = {'main': '19Eb_WiTsWelYv7Faff0L5Lmo1zv0vzwR'}
info: DatasetInfo = DatasetInfo(name='expw', full_name='Expression in-the-Wild', description='Imagenette is a subset of 10 easily classified classes from Imagenet (tench, English springer, cassette player, chain saw, church, French horn, garbage truck, gas pump, golf ball, parachute).', citation=None, homepage='https://github.com/fastai/imagenette', license=None, tags=['image', 'classification'])
class fer(dataset_dir: Optional[str] = None, version: Optional[str] = None, download_mode: str = 'reuse', **kwargs)[source]

Bases: DatasetBuilder

build()[source]
download()[source]
REVISIONS: List[str]
VERSIONS = ['plus']
info: DatasetInfo = DatasetInfo(name='fer', full_name='Facial Expression Recognition Challenge', description='ImageNet is an image database organized according to the WordNet hierarchy (currently only the nouns), in which each node of the hierarchy is depicted by hundreds and thousands of images..', citation=None, homepage='https://www.kaggle.com/c/challenges-in-representation-learning-facial-expression-recognition-challenge/data?select=icml_face_data.csv', license=None, tags=['image', 'facial emotion recognition'])
class imagenet(dataset_dir: Optional[str] = None, version: Optional[str] = None, download_mode: str = 'reuse', **kwargs)[source]

Bases: DatasetBuilder

build()[source]
download()[source]
REVISIONS: List[str]
VERSIONS = ['ilsvrc2012']
info: DatasetInfo = DatasetInfo(name='imagenet', full_name='ImageNet', description='ImageNet is an image database organized according to the WordNet hierarchy (currently only the nouns), in which each node of the hierarchy is depicted by hundreds and thousands of images..', citation='@inproceedings{imagenet_cvpr09,AUTHOR = {Deng, J. and Dong, W. and Socher, R. and Li, L.-J. and Li, K. and Fei-Fei, L.},TITLE = {{ImageNet: A Large-Scale Hierarchical Image Database}},BOOKTITLE = {CVPR09},YEAR = {2009},BIBSOURCE = "http://www.image-net.org/papers/imagenet_cvpr09.bib"}', homepage='https://www.image-net.org/', license=None, tags=['image', 'classification'])
class imagenette(dataset_dir: Optional[str] = None, version: Optional[str] = None, download_mode: str = 'reuse', **kwargs)[source]

Bases: DatasetBuilder

build()[source]
download()[source]
REVISIONS: List[str]
VERSIONS = ['full', '320px', '160px']
VERSION_TO_URL = {'160px': 'https://s3.amazonaws.com/fast-ai-imageclas/imagenette2-160.tgz', '320px': 'https://s3.amazonaws.com/fast-ai-imageclas/imagenette2-320.tgz', 'full': 'https://s3.amazonaws.com/fast-ai-imageclas/imagenette2.tgz'}
property data_dir
info: DatasetInfo = DatasetInfo(name='imagenette', full_name='ImageNette', description='Imagenette is a subset of 10 easily classified classes from Imagenet (tench, English springer, cassette player, chain saw, church, French horn, garbage truck, gas pump, golf ball, parachute).', citation=None, homepage='https://github.com/fastai/imagenette', license=None, tags=['image', 'classification'])
class mirflickr(dataset_dir: Optional[str] = None, version: Optional[str] = None, download_mode: str = 'reuse', **kwargs)[source]

Bases: DatasetBuilder

build() DataPanel[source]
download()[source]
REVISIONS: List[str]
VERSIONS = ['25k']
VERSION_TO_URLS = {'25k': ['http://press.liacs.nl/mirflickr/mirflickr25k.v3b/mirflickr25k.zip', 'http://press.liacs.nl/mirflickr/mirflickr25k.v3b/mirflickr25k_annotations_v080.zip']}
info: DatasetInfo = DatasetInfo(name='mirflickr', full_name='PASCAL', description='The MIRFLICKR-25000 open evaluation project consists of 25000 images downloaded from the social photography site Flickr through its public API  coupled with complete manual annotations, pre-computed descriptors and software for bag-of-words based similarity and classification and a matlab-like tool for exploring and classifying imagery.', citation="@inproceedings{huiskes08,    author = {Mark J. Huiskes and Michael S. Lew},    title = {The MIR Flickr Retrieval Evaluation},    booktitle = {MIR '08: Proceedings of the 2008 ACM International Conference on Multimedia Information Retrieval},    year = {2008},    location = {Vancouver, Canada},    publisher = {ACM},    address = {New York, NY, USA},}", homepage='https://press.liacs.nl/mirflickr/', license=None, tags=['image', 'retrieval'])
class ngoa(dataset_dir: Optional[str] = None, version: Optional[str] = None, download_mode: str = 'reuse', **kwargs)[source]

Bases: DatasetBuilder

class Downloader(cache_dir: str, downloader: Optional[callable] = None)

Bases: object

build()[source]
download()[source]
REVISIONS: List[str]
VERSIONS = ['main']
info: DatasetInfo = DatasetInfo(name='ngoa', full_name='National Gallery of Art Open Data', description='The dataset provides data records relating to the 130,000+ artworks in our collection and the artists who created them. You can download the dataset free of charge without seeking authorization from the National Gallery of Art.', citation=None, homepage='https://github.com/NationalGalleryOfArt/opendata', license=None, tags=['art'])
class pascal(dataset_dir: Optional[str] = None, version: Optional[str] = None, download_mode: str = 'reuse', **kwargs)[source]

Bases: DatasetBuilder

build()[source]
download()[source]
REVISIONS: List[str]
VERSIONS = ['2012']
VERSION_TO_URL = {'2012': 'http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar'}
info: DatasetInfo = DatasetInfo(name='pascal', full_name='PASCAL', description='Image data sets for object class recognition.', citation='@Article{Everingham10,author = "Everingham, M. and Van~Gool, L. and Williams, C. K. I. and Winn,J. and Zisserman, A.",title = "The Pascal Visual Object Classes (VOC) Challenge",journal = "International Journal of Computer Vision",volume = "88",year = "2010",number = "2",month = jun,pages = "303--338",}', homepage='http://host.robots.ox.ac.uk/pascal/VOC/', license=None, tags=['image', 'object recognition'])
class rfw(dataset_dir: Optional[str] = None, version: Optional[str] = None, download_mode: str = 'reuse', **kwargs)[source]

Bases: DatasetBuilder

build()[source]
download()[source]
GROUPS = ['Caucasian', 'African', 'Asian', 'Indian']
REVISIONS: List[str]
VERSIONS = ['main']
info: DatasetInfo = DatasetInfo(name='fer', full_name='Racial Faces in-the-Wild', description='Racial Faces in-the-Wild (RFW) is a testing database for studying racial bias in face recognition. Four testing subsets, namely Caucasian, Asian, Indian and African, are constructed, and each contains about 3000 individuals with 6000 image pairs for face verification. They can be used to fairly evaluate and compare the recognition ability of the algorithm on different races.', citation=None, homepage='http://www.whdeng.cn/RFW/testing.html', license=None, tags=['image', 'facial recognition', 'algorithmic bias'])