Source code for meerkat.datasets.ngoa
import json
import os
import subprocess
import meerkat as mk
from ..abstract import DatasetBuilder
from ..info import DatasetInfo
from ..registry import datasets
from ..utils import download_url, extract
REPO = "https://github.com/NationalGalleryOfArt/opendata.git"
[docs]@datasets.register()
class ngoa(DatasetBuilder):
from meerkat.columns.file_column import Downloader
VERSIONS = ["main"]
info = DatasetInfo(
name="ngoa",
full_name="National Gallery of Art Open Data",
# flake8: noqa
description="The dataset provides data records relating to the 130,000+ artworks in our collection and the artists who created them. You can download the dataset free of charge without seeking authorization from the National Gallery of Art.",
homepage="https://github.com/NationalGalleryOfArt/opendata",
tags=["art"],
citation=None,
)
[docs] def build(self):
from meerkat.columns.file_column import Downloader
base_dir = os.path.join(self.dataset_dir, "data")
db = {}
db["objects"] = mk.DataPanel.from_csv(
os.path.join(base_dir, "objects.csv"),
)
db["published_images"] = mk.DataPanel.from_csv(
os.path.join(base_dir, "published_images.csv"),
)
db["published_images"]["image"] = mk.ImageColumn.from_filepaths(
db["published_images"]["iiifthumburl"],
loader=Downloader(cache_dir=os.path.join(base_dir, "iiifthumburl")),
)
return db
[docs] def download(self):
# clone the repo using subprocess
subprocess.call(["git", "clone", REPO, self.dataset_dir])