meerkat package#

Meerkat.

class AbstractCell(*args, **kwargs)[source]#

Bases: ABC

get(*args, **kwargs) object[source]#

Get me the thing that this cell exists for.

loader(*args, **kwargs) object[source]#
property metadata: dict#

Get the metadata associated with this cell.

class AbstractColumn(data: Sequence | None = None, collate_fn: Callable | None = None, formatter: Callable | None = None, *args, **kwargs)[source]#

Bases: BlockableMixin, CloneableMixin, CollateMixin, ColumnIOMixin, FunctionInspectorMixin, LambdaMixin, MappableMixin, MaterializationMixin, ProvenanceMixin, ABC

An abstract class for Meerkat columns.

append(column: AbstractColumn) None[source]#
batch(batch_size: int = 1, drop_last_batch: bool = False, collate: bool = True, num_workers: int = 0, materialize: bool = True, *args, **kwargs)[source]#

Batch the column.

Parameters:
  • batch_size – integer batch size

  • drop_last_batch – drop the last batch if its smaller than batch_size

  • collate – whether to collate the returned batches

Returns:

batches of data

static concat(columns: Sequence[AbstractColumn]) None[source]#
filter(function: Callable, with_indices=False, input_columns: str | List[str] | None = None, is_batched_fn: bool = False, batch_size: int | None = 1, drop_last_batch: bool = False, num_workers: int | None = 0, materialize: bool = True, pbar: bool = False, **kwargs) AbstractColumn | None[source]#

Filter the elements of the column using a function.

classmethod from_data(data: Columnable | AbstractColumn)[source]#

Convert data to a meerkat column using the appropriate Column type.

full_length()[source]#
classmethod get_writer(mmap: bool = False, template: AbstractColumn | None = None)[source]#
head(n: int = 5) AbstractColumn[source]#

Get the first n examples of the column.

is_equal(other: AbstractColumn) bool[source]#

Tests whether two columns.

Parameters:

other (AbstractColumn) – [description]

streamlit()[source]#
tail(n: int = 5) AbstractColumn[source]#

Get the last n examples of the column.

to_pandas() Series[source]#
Columnable#

alias of Union[Sequence, ndarray, Series, Tensor]

property data#

Get the underlying data.

property formatter: Callable#
property is_mmap#
logdir: Path = PosixPath('/home/docs/meerkat')#
property metadata#
class ArrowArrayColumn(data: Sequence, *args, **kwargs)[source]#

Bases: AbstractColumn

block_class#

alias of ArrowBlock

classmethod concat(columns: Sequence[ArrowArrayColumn])[source]#
is_equal(other: AbstractColumn) bool[source]#

Tests whether two columns.

Parameters:

other (AbstractColumn) – [description]

to_numpy()[source]#
to_pandas()[source]#
to_tensor()[source]#
class AudioColumn(data: Sequence[str] | None = None, transform: callable | None = None, loader: callable | None = None, base_dir: str | None = None, *args, **kwargs)[source]#

Bases: FileColumn

A lambda column where each cell represents an audio file on disk. The underlying data is a PandasSeriesColumn of strings, where each string is the path to an image. The column materializes the images into memory when indexed. If the column is lazy indexed with the lz indexer, the images are not materialized and an FileCell or an AudioColumn is returned instead.

Parameters:
  • data (Sequence[str]) – A list of filepaths to images.

  • transform (callable) –

    A function that transforms the image (e.g. torchvision.transforms.functional.center_crop).

    Warning

    In order for the column to be serializable, the transform function must be pickleable.

  • loader (callable) –

    A callable with signature def loader(filepath: str) -> PIL.Image:. Defaults to torchvision.datasets.folder.default_loader.

    Warning

    In order for the column to be serializable with write(), the loader function must be pickleable.

  • base_dir (str) – A base directory that the paths in data are relative to. If None, the paths are assumed to be absolute.

collate(batch)[source]#

Collate data.

classmethod default_loader(*args, **kwargs)[source]#
class CellColumn(cells: Sequence[AbstractCell] | None = None, *args, **kwargs)[source]#

Bases: AbstractColumn

static concat(columns: Sequence[CellColumn])[source]#
classmethod from_cells(cells: Sequence[AbstractCell], *args, **kwargs)[source]#
is_equal(other: AbstractColumn) bool[source]#

Tests whether two columns.

Parameters:

other (AbstractColumn) – [description]

property cells#
class DataPanel(data: dict | list | Dataset | None = None, *args, **kwargs)[source]#

Bases: CloneableMixin, FunctionInspectorMixin, LambdaMixin, MappableMixin, MaterializationMixin, ProvenanceMixin

Meerkat DataPanel class.

add_column(name: str, data: Sequence | ndarray | Series | Tensor, overwrite=False) None[source]#

Add a column to the DataPanel.

append(dp: DataPanel, axis: str | int = 'rows', suffixes: Tuple[str] = None, overwrite: bool = False) DataPanel[source]#

Append a batch of data to the dataset.

example_or_batch must have the same columns as the dataset (regardless of what columns are visible).

batch(batch_size: int = 1, drop_last_batch: bool = False, num_workers: int = 0, materialize: bool = True, shuffle: bool = False, *args, **kwargs)[source]#

Batch the dataset. TODO:

Parameters:
  • batch_size – integer batch size

  • drop_last_batch – drop the last batch if its smaller than batch_size

Returns:

batches of data

consolidate()[source]#
filter(function: Callable | None = None, with_indices=False, input_columns: str | List[str] | None = None, is_batched_fn: bool = False, batch_size: int | None = 1, drop_last_batch: bool = False, num_workers: int = 0, materialize: bool = True, pbar: bool = False, **kwargs) DataPanel | None[source]#

Filter operation on the DataPanel.

classmethod from_arrow(table: Table)[source]#

Create a Dataset from a pandas DataFrame.

classmethod from_batch(batch: Dict[str, List | AbstractColumn]) DataPanel[source]#

Convert a batch to a Dataset.

classmethod from_batches(batches: Sequence[Dict[str, List | AbstractColumn]]) DataPanel[source]#

Convert a list of batches to a dataset.

classmethod from_csv(filepath: str, *args, **kwargs)[source]#

Create a Dataset from a csv file.

Parameters:
  • filepath (str) – The file path or buffer to load from. Same as pandas.read_csv().

  • *args – Argument list for pandas.read_csv().

  • **kwargs – Keyword arguments for pandas.read_csv().

Returns:

The constructed datapanel.

Return type:

DataPanel

classmethod from_dict(d: Dict) DataPanel[source]#

Convert a dictionary to a dataset.

Alias for Dataset.from_batch(..).

classmethod from_feather(path: str)[source]#

Create a Dataset from a feather file.

classmethod from_huggingface(*args, **kwargs)[source]#

Load a Huggingface dataset as a DataPanel.

Use this to replace datasets.load_dataset, so

>>> dict_of_datasets = datasets.load_dataset('boolq')

becomes

>>> dict_of_datapanels = DataPanel.from_huggingface('boolq')
classmethod from_jsonl(json_path: str) DataPanel[source]#

Load a dataset from a .jsonl file on disk, where each line of the json file consists of a single example.

classmethod from_pandas(df: DataFrame)[source]#

Create a Dataset from a pandas DataFrame.

head(n: int = 5) DataPanel[source]#

Get the first n examples of the DataPanel.

items()[source]#
keys()[source]#
map(function: Callable | None = None, with_indices: bool = False, input_columns: str | List[str] | None = None, is_batched_fn: bool = False, batch_size: int | None = 1, drop_last_batch: bool = False, num_workers: int = 0, output_type: type | Dict[str, type] = None, mmap: bool = False, mmap_path: str = None, materialize: bool = True, pbar: bool = False, **kwargs) Dict | List | AbstractColumn | None[source]#
merge(right: DataPanel, how: str = 'inner', on: str | List[str] | None = None, left_on: str | List[str] | None = None, right_on: str | List[str] | None = None, sort: bool = False, suffixes: Sequence[str] = ('_x', '_y'), validate=None)[source]#
classmethod read(path: str, *args, **kwargs) DataPanel[source]#

Load a DataPanel stored on disk.

remove_column(column: str) None[source]#

Remove a column from the dataset.

streamlit()[source]#
tail(n: int = 5) DataPanel[source]#

Get the last n examples of the DataPanel.

to_jsonl(path: str) None[source]#

Save a Dataset to a jsonl file.

to_pandas() DataFrame[source]#

Convert a Dataset to a pandas DataFrame.

update(function: Callable | None = None, with_indices: bool = False, input_columns: str | List[str] | None = None, is_batched_fn: bool = False, batch_size: int | None = 1, remove_columns: List[str] | None = None, num_workers: int = 0, output_type: type | Dict[str, type] = None, mmap: bool = False, mmap_path: str = None, materialize: bool = True, pbar: bool = False, **kwargs) DataPanel[source]#

Update the columns of the dataset.

values()[source]#
write(path: str) None[source]#

Save a DataPanel to disk.

property columns#

Column names in the DataPanel.

property data: BlockManager#

Get the underlying data (excluding invisible rows).

To access underlying data with invisible rows, use _data.

logdir: Path = PosixPath('/home/docs/meerkat')#
property ncols#

Number of rows in the DataPanel.

property nrows#

Number of rows in the DataPanel.

property shape#

Shape of the DataPanel (num_rows, num_columns).

class FileCell(transform: callable | None = None, loader: callable | None = None, data: str | None = None, base_dir: str | None = None)[source]#

Bases: FileLoaderMixin, LambdaCell

property absolute_path#
class FileColumn(data: Sequence[str] | None = None, transform: callable | None = None, loader: callable | None = None, base_dir: str | None = None, *args, **kwargs)[source]#

Bases: FileLoaderMixin, LambdaColumn

A column where each cell represents an file stored on disk or the web. The underlying data is a PandasSeriesColumn of strings, where each string is the path to a file. The column materializes the files into memory when indexed. If the column is lazy indexed with the lz indexer, the files are not materialized and a FileCell or a FileColumn is returned instead.

Parameters:
  • data (Sequence[str]) – A list of filepaths to images.

  • transform (callable) –

    A function that transforms the image (e.g. torchvision.transforms.functional.center_crop).

    Warning

    In order for the column to be serializable, the transform function must be pickleable.

  • loader (callable) –

    A callable with signature def loader(filepath: str) -> PIL.Image:. Defaults to torchvision.datasets.folder.default_loader.

    Warning

    In order for the column to be serializable with write(), the loader function must be pickleable.

  • base_dir (str) – A base directory that the paths in data are relative to. If None, the paths are assumed to be absolute.

classmethod default_loader(*args, **kwargs)[source]#
classmethod from_filepaths(filepaths: Sequence[str], loader: callable | None = None, transform: callable | None = None, base_dir: str | None = None, *args, **kwargs)[source]#
is_equal(other: AbstractColumn) bool[source]#

Tests whether two columns.

Parameters:

other (AbstractColumn) – [description]

class ImageColumn(data: Sequence[str] | None = None, transform: callable | None = None, loader: callable | None = None, base_dir: str | None = None, *args, **kwargs)[source]#

Bases: FileColumn

A column where each cell represents an image stored on disk. The underlying data is a PandasSeriesColumn of strings, where each string is the path to an image. The column materializes the images into memory when indexed. If the column is lazy indexed with the lz indexer, the images are not materialized and an ImageCell or an ImageColumn is returned instead.

Parameters:
  • data (Sequence[str]) – A list of filepaths to images.

  • transform (callable) –

    A function that transforms the image (e.g. torchvision.transforms.functional.center_crop).

    Warning

    In order for the column to be serializable, the transform function must be pickleable.

  • loader (callable) –

    A callable with signature def loader(filepath: str) -> PIL.Image:. Defaults to torchvision.datasets.folder.default_loader.

    Warning

    In order for the column to be serializable with write(), the loader function must be pickleable.

  • base_dir (str) – A base directory that the paths in data are relative to. If None, the paths are assumed to be absolute.

classmethod default_loader(*args, **kwargs)[source]#
class LambdaCell(fn: callable | None = None, data: any | None = None)[source]#

Bases: AbstractCell

get(*args, **kwargs)[source]#

Get me the thing that this cell exists for.

property data: object#

Get the data associated with this cell.

class LambdaColumn(data: DataPanel | AbstractColumn, fn: callable | None = None, output_type: type | None = None, *args, **kwargs)[source]#

Bases: AbstractColumn

static concat(columns: Sequence[LambdaColumn])[source]#
fn(data: object)[source]#

Subclasses like ImageColumn should be able to implement their own version.

is_equal(other: AbstractColumn) bool[source]#

Tests whether two columns.

Parameters:

other (AbstractColumn) – [description]

class ListColumn(data: Sequence | None = None, *args, **kwargs)[source]#

Bases: AbstractColumn

batch(batch_size: int = 1, drop_last_batch: bool = False, collate: bool = True, *args, **kwargs)[source]#

Batch the column.

Parameters:
  • batch_size – integer batch size

  • drop_last_batch – drop the last batch if its smaller than batch_size

  • collate – whether to collate the returned batches

Returns:

batches of data

classmethod concat(columns: Sequence[ListColumn])[source]#
default_formatter()#
classmethod from_list(data: Sequence)[source]#
is_equal(other: AbstractColumn) bool[source]#

Tests whether two columns.

Parameters:

other (AbstractColumn) – [description]

class MedicalVolumeCell(paths: str | Path | PathLike | Sequence[str | Path | PathLike], loader: Callable | None = None, transform: Callable | None = None, cache_metadata: bool = False, *args, **kwargs)[source]#

Bases: PathsMixin, AbstractCell

Interface for loading medical volume data.

Examples

# Specify xray dicoms with default orientation ("SI", "AP"): >>> cell = MedicalVolumeCell(“/path/to/xray.dcm”, loader=DicomReader(group_by=None, default_ornt=(“SI”, “AP”))

# Load multi-echo MRI volumes >>> cell = MedicalVolumeCell(“/path/to/mri/scan/dir”, loader=DicomReader(group_by=”EchoNumbers”))

clear_metadata()[source]#
classmethod default_loader(paths: Sequence[Path], *args, **kwargs)[source]#
classmethod from_state(state, *args, **kwargs)[source]#
get(*args, cache_metadata: bool | None = None, **kwargs)[source]#

Get me the thing that this cell exists for.

get_metadata(ignore_bytes: bool = False, readable: bool = False, as_raw_type: bool = False, force_load: bool = False) Dict[source]#
get_state()[source]#
class MedicalVolumeColumn(*args, **kwargs)[source]#

Bases: CellColumn

classmethod from_filepaths(filepaths: Sequence[str] | None = None, loader: callable | None = None, transform: callable | None = None, *args, **kwargs)[source]#
class NumpyArrayColumn(data: Sequence, *args, **kwargs)[source]#

Bases: AbstractColumn, NDArrayOperatorsMixin

block_class#

alias of NumpyBlock

classmethod concat(columns: Sequence[NumpyArrayColumn])[source]#
classmethod from_array(data: ndarray, *args, **kwargs)[source]#
classmethod from_npy(path, mmap_mode=None, allow_pickle=False, fix_imports=True, encoding='ASCII')[source]#
classmethod get_writer(mmap: bool = False, template: AbstractColumn | None = None)[source]#
is_equal(other: AbstractColumn) bool[source]#

Tests whether two columns.

Parameters:

other (AbstractColumn) – [description]

to_pandas() Series[source]#
to_tensor() Tensor[source]#

Use column.to_tensor() instead of torch.tensor(column), which is very slow.

property is_mmap#
class PandasSeriesColumn(data: Sequence | None = None, collate_fn: Callable | None = None, formatter: Callable | None = None, *args, **kwargs)[source]#

Bases: AbstractColumn, NDArrayOperatorsMixin

block_class#

alias of PandasBlock

cat#

alias of _MeerkatCategoricalAccessor

dt#

alias of _MeerkatCombinedDatetimelikeProperties

str#

alias of _MeerkatStringMethods

classmethod concat(columns: Sequence[PandasSeriesColumn])[source]#
classmethod from_array(data: ndarray, *args, **kwargs)[source]#
is_equal(other: AbstractColumn) bool[source]#

Tests whether two columns.

Parameters:

other (AbstractColumn) – [description]

to_pandas() Series[source]#
to_tensor() Tensor[source]#

Use column.to_tensor() instead of torch.tensor(column), which is very slow.

class SpacyColumn(data: Sequence[spacy_tokens.Doc] = None, *args, **kwargs)[source]#

Bases: ListColumn

classmethod from_docs(data: Sequence[spacy_tokens.Doc], *args, **kwargs)[source]#
classmethod from_texts(texts: Sequence[str], lang: str = 'en_core_web_sm', *args, **kwargs)[source]#
classmethod read(path: str, nlp: spacy.language.Language = None, lang: str = None, *args, **kwargs) SpacyColumn[source]#
write(path: str, **kwargs) None[source]#
property docs#
property tokens#
class TensorColumn(data: Sequence | None = None, *args, **kwargs)[source]#

Bases: NDArrayOperatorsMixin, AbstractColumn

block_class#

alias of TensorBlock

classmethod concat(columns: Sequence[TensorColumn])[source]#
classmethod from_data(data: Sequence | ndarray | Series | Tensor | AbstractColumn)[source]#

Convert data to an EmbeddingColumn.

classmethod get_writer(mmap: bool = False, template: AbstractColumn | None = None)[source]#
is_equal(other: AbstractColumn) bool[source]#

Tests whether two columns.

Parameters:

other (AbstractColumn) – [description]

to_pandas() Series[source]#
to_tensor() Tensor[source]#
class VideoColumn(*args, **kwargs)[source]#

Bases: CellColumn

Interface for creating a CellColumn from VideoCell objects.

classmethod from_filepaths(filepaths: Sequence[str] | None = None, time_dim: int | None = 1, transform: Callable | None = None, *args, **kwargs)[source]#
class provenance(enabled: bool = True)[source]#

Bases: object

concat(objs: Sequence[DataPanel] | Sequence[AbstractColumn], axis: str | int = 'rows', suffixes: Tuple[str] = None, overwrite: bool = False) DataPanel | AbstractColumn[source]#

Concatenate a sequence of columns or a sequence of DataPanel`s. If sequence is empty, returns an empty `DataPanel.

  • If concatenating columns, all columns must be of the same type (e.g. all

ListColumn). - If concatenating `DataPanel`s along axis 0 (rows), all `DataPanel`s must have the same set of columns. - If concatenating `DataPanel`s along axis 1 (columns), all `DataPanel`s must have the same length and cannot have any of the same column names.

Parameters:
  • objs (Union[Sequence[DataPanel], Sequence[AbstractColumn]]) – sequence of columns or DataPanels.

  • axis (Union[str, int]) – The axis along which to concatenate. Ignored if concatenating columns.

Returns:

concatenated DataPanel or column

Return type:

Union[DataPanel, AbstractColumn]

merge(left: DataPanel, right: DataPanel, how: str = 'inner', on: str | List[str] = None, left_on: str | List[str] = None, right_on: str | List[str] = None, sort: bool = False, suffixes: Sequence[str] = ('_x', '_y'), validate=None)[source]#

Subpackages#

Submodules#

meerkat.config module#

class ContribOptions[source]#

Bases: object

download_dir: str = '/home/docs/.meerkat/datasets'#
class DisplayOptions[source]#

Bases: object

max_image_height: int = 128#
max_image_width: int = 128#
max_rows: int = 10#
show_audio: bool = True#
show_images: bool = True#

meerkat.datapanel module#

DataPanel class.

class DataPanel(data: dict | list | Dataset | None = None, *args, **kwargs)[source]#

Bases: CloneableMixin, FunctionInspectorMixin, LambdaMixin, MappableMixin, MaterializationMixin, ProvenanceMixin

Meerkat DataPanel class.

add_column(name: str, data: Sequence | ndarray | Series | Tensor, overwrite=False) None[source]#

Add a column to the DataPanel.

append(dp: DataPanel, axis: str | int = 'rows', suffixes: Tuple[str] = None, overwrite: bool = False) DataPanel[source]#

Append a batch of data to the dataset.

example_or_batch must have the same columns as the dataset (regardless of what columns are visible).

batch(batch_size: int = 1, drop_last_batch: bool = False, num_workers: int = 0, materialize: bool = True, shuffle: bool = False, *args, **kwargs)[source]#

Batch the dataset. TODO:

Parameters:
  • batch_size – integer batch size

  • drop_last_batch – drop the last batch if its smaller than batch_size

Returns:

batches of data

consolidate()[source]#
filter(function: Callable | None = None, with_indices=False, input_columns: str | List[str] | None = None, is_batched_fn: bool = False, batch_size: int | None = 1, drop_last_batch: bool = False, num_workers: int = 0, materialize: bool = True, pbar: bool = False, **kwargs) DataPanel | None[source]#

Filter operation on the DataPanel.

classmethod from_arrow(table: Table)[source]#

Create a Dataset from a pandas DataFrame.

classmethod from_batch(batch: Dict[str, List | AbstractColumn]) DataPanel[source]#

Convert a batch to a Dataset.

classmethod from_batches(batches: Sequence[Dict[str, List | AbstractColumn]]) DataPanel[source]#

Convert a list of batches to a dataset.

classmethod from_csv(filepath: str, *args, **kwargs)[source]#

Create a Dataset from a csv file.

Parameters:
  • filepath (str) – The file path or buffer to load from. Same as pandas.read_csv().

  • *args – Argument list for pandas.read_csv().

  • **kwargs – Keyword arguments for pandas.read_csv().

Returns:

The constructed datapanel.

Return type:

DataPanel

classmethod from_dict(d: Dict) DataPanel[source]#

Convert a dictionary to a dataset.

Alias for Dataset.from_batch(..).

classmethod from_feather(path: str)[source]#

Create a Dataset from a feather file.

classmethod from_huggingface(*args, **kwargs)[source]#

Load a Huggingface dataset as a DataPanel.

Use this to replace datasets.load_dataset, so

>>> dict_of_datasets = datasets.load_dataset('boolq')

becomes

>>> dict_of_datapanels = DataPanel.from_huggingface('boolq')
classmethod from_jsonl(json_path: str) DataPanel[source]#

Load a dataset from a .jsonl file on disk, where each line of the json file consists of a single example.

classmethod from_pandas(df: DataFrame)[source]#

Create a Dataset from a pandas DataFrame.

head(n: int = 5) DataPanel[source]#

Get the first n examples of the DataPanel.

items()[source]#
keys()[source]#
map(function: Callable | None = None, with_indices: bool = False, input_columns: str | List[str] | None = None, is_batched_fn: bool = False, batch_size: int | None = 1, drop_last_batch: bool = False, num_workers: int = 0, output_type: type | Dict[str, type] = None, mmap: bool = False, mmap_path: str = None, materialize: bool = True, pbar: bool = False, **kwargs) Dict | List | AbstractColumn | None[source]#
merge(right: DataPanel, how: str = 'inner', on: str | List[str] | None = None, left_on: str | List[str] | None = None, right_on: str | List[str] | None = None, sort: bool = False, suffixes: Sequence[str] = ('_x', '_y'), validate=None)[source]#
classmethod read(path: str, *args, **kwargs) DataPanel[source]#

Load a DataPanel stored on disk.

remove_column(column: str) None[source]#

Remove a column from the dataset.

streamlit()[source]#
tail(n: int = 5) DataPanel[source]#

Get the last n examples of the DataPanel.

to_jsonl(path: str) None[source]#

Save a Dataset to a jsonl file.

to_pandas() DataFrame[source]#

Convert a Dataset to a pandas DataFrame.

update(function: Callable | None = None, with_indices: bool = False, input_columns: str | List[str] | None = None, is_batched_fn: bool = False, batch_size: int | None = 1, remove_columns: List[str] | None = None, num_workers: int = 0, output_type: type | Dict[str, type] = None, mmap: bool = False, mmap_path: str = None, materialize: bool = True, pbar: bool = False, **kwargs) DataPanel[source]#

Update the columns of the dataset.

values()[source]#
write(path: str) None[source]#

Save a DataPanel to disk.

property columns#

Column names in the DataPanel.

property data: BlockManager#

Get the underlying data (excluding invisible rows).

To access underlying data with invisible rows, use _data.

logdir: Path = PosixPath('/home/docs/meerkat')#
property ncols#

Number of rows in the DataPanel.

property nrows#

Number of rows in the DataPanel.

property shape#

Shape of the DataPanel (num_rows, num_columns).

meerkat.errors module#

exception ConcatError[source]#

Bases: ValueError

exception ConcatWarning[source]#

Bases: RuntimeWarning

exception ConsolidationError[source]#

Bases: ValueError

exception ExperimentalWarning[source]#

Bases: FutureWarning

exception ImmutableError[source]#

Bases: ValueError

exception MergeError[source]#

Bases: ValueError

meerkat.provenance module#

class ProvenanceMixin(*args, **kwargs)[source]#

Bases: object

get_provenance(include_columns: bool = False, last_parent_only: bool = False)[source]#
property node#
class ProvenanceNode[source]#

Bases: object

add_child(node: ProvenanceNode, key: Tuple)[source]#
add_parent(node: ProvenanceNode, key: Tuple)[source]#
cache_repr()[source]#
get_provenance(last_parent_only: bool = False)[source]#
property children#
property last_parent#
property parents#
class ProvenanceObjNode(obj: ProvenanceMixin)[source]#

Bases: ProvenanceNode

class ProvenanceOpNode(fn: callable, inputs: dict, outputs: object, captured_args: dict)[source]#

Bases: ProvenanceNode

class provenance(enabled: bool = True)[source]#

Bases: object

capture_provenance(capture_args: Sequence[str] | None = None)[source]#
get_nested_objs(data)[source]#

Recursively get DataPanels and Columns from nested collections.

is_provenance_enabled()[source]#
set_provenance(enabled=True)[source]#
visualize_provenance(obj: ProvenanceObjNode | ProvenanceOpNode, show_columns: bool = False, last_parent_only: bool = False)[source]#

meerkat.version module#