Source code for meerkat.contrib.visual_genome
import os
from typing import Dict, Mapping
import ujson as json
import meerkat as mk
# TODO (Sabri): Add support for downloading the data.
# For the time being, relevant files should be downloaded to the directory here
# https://visualgenome.org/VGViz/explore
[docs]def build_visual_genome_dps(
dataset_dir: str, write: bool = False
) -> Dict[str, mk.DataPanel]:
dps = {}
print("Loading objects and attributes...")
dps.update(_build_object_dps(dataset_dir))
print("Loading images...")
dps.update(_build_image_dp(dataset_dir=dataset_dir))
print("Loading relationships...")
dps.update(_build_relationships_dp(dataset_dir=dataset_dir))
if write:
write_visual_genome_dps(dps, dataset_dir=dataset_dir)
return dps
[docs]def read_visual_genome_dps(dataset_dir: str) -> Dict[str, mk.DataPanel]:
return {
key: mk.DataPanel.read(os.path.join(dataset_dir, f"{key}.mk"))
for key in ["attributes", "relationships", "objects", "images"]
}
[docs]def write_visual_genome_dps(dps: Mapping[str, mk.DataPanel], dataset_dir: str):
for key, dp in dps.items():
dp.write(os.path.join(dataset_dir, f"{key}.mk"))
def _build_object_dps(dataset_dir: str):
with open(os.path.join(dataset_dir, "attributes.json")) as f:
objects = json.load(f)
objects_dp = [] # create one table for objects
attributes_dp = [] # create one table of attributes
for image in objects:
for obj in image["attributes"]:
obj["image_id"] = image["image_id"]
# all names are length 1
names = obj.pop("names")
obj["name"] = names[0]
# add attributes to the table
attributes = obj.pop("attributes", None)
if attributes is not None:
for attribute in attributes:
attributes_dp.append(
{"object_id": obj["object_id"], "attribute": attribute}
)
# the vast majority of objects (99.7%) have 0 or 1 synonym in their
# synset, so we only consider the first synonym to keep things simple
synset = obj.pop("synsets")
obj["syn_name"] = synset[0] if len(synset) > 0 else ""
objects_dp.append(obj)
return {
"objects": mk.DataPanel(objects_dp),
"attributes": mk.DataPanel(attributes_dp),
}
def _build_image_dp(dataset_dir: str):
with open(os.path.join(dataset_dir, "image_data.json")) as f:
images = json.load(f)
image_dp = mk.DataPanel(images)
image_dp.remove_column("coco_id")
image_dp.remove_column("flickr_id")
image_dp["local_path"] = dataset_dir + (image_dp["url"].str.split("rak248")).apply(
lambda x: x[-1]
)
image_dp["image"] = mk.ImageColumn(image_dp["local_path"])
return {"images": image_dp}
def _build_relationships_dp(dataset_dir: str):
with open(os.path.join(dataset_dir, "relationships.json")) as f:
relationships = json.load(f)
rel_dp = []
for image in relationships:
image_id = image["image_id"]
for r in image["relationships"]:
object_synset = r["object"]["synsets"]
subject_synset = r["subject"]["synsets"]
rel_dp.append(
{
"image_id": image_id,
"predicate": r["predicate"],
"subject_object_id": r["subject"]["object_id"],
"subject_name": r["subject"]["name"]
if "name" in r["subject"]
else r["subject"]["names"][0],
"subject_syn": subject_synset[0] if len(subject_synset) > 0 else "",
"object_object_id": r["object"]["object_id"],
"object_name": r["object"]["name"]
if "name" in r["object"]
else r["object"]["names"][0],
"object_syn": object_synset[0] if len(object_synset) > 0 else "",
}
)
rel_dp = mk.DataPanel(rel_dp)
return {"relationships": rel_dp}