Source code for meerkat.columns.cell_column

from __future__ import annotations

import logging
from typing import Sequence

import cytoolz as tz
import numpy as np

from meerkat.cells.abstract import AbstractCell
from meerkat.columns.abstract import AbstractColumn

logger = logging.getLogger(__name__)


[docs]class CellColumn(AbstractColumn): def __init__( self, cells: Sequence[AbstractCell] = None, *args, **kwargs, ): super(CellColumn, self).__init__( data=cells, *args, **kwargs, ) def _get_cell(self, index: int, materialize: bool = True): cell = self._data[index] if materialize: return cell.get() else: return cell def _get_batch(self, indices: np.ndarray, materialize: bool = True): if materialize: # if materializing, return a batch (by default, a list of objects returned # by `.get`, otherwise the batch format specified by `self.collate`) return self.collate([self._data[i].get() for i in indices]) else: return [self._data[i] for i in indices] def _get(self, index, materialize: bool = True, _data: np.ndarray = None): index = self._translate_index(index) if isinstance(index, int): if _data is None: _data = self._get_cell(index, materialize=materialize) return _data elif isinstance(index, np.ndarray): # support for blocks if _data is None: _data = self._get_batch(index, materialize=materialize) if materialize: # materialize could change the data in unknown ways, cannot clone return self.__class__.from_data(data=_data) else: return self._clone(data=_data)
[docs] @classmethod def from_cells(cls, cells: Sequence[AbstractCell], *args, **kwargs): return cls(cells=cells, *args, **kwargs)
@property def cells(self): return self.data def _repr_cell(self, index) -> object: return self.lz[index].__repr__()
[docs] @staticmethod def concat(columns: Sequence[CellColumn]): return columns[0].__class__.from_cells( list(tz.concat([c.data for c in columns])) )
[docs] def is_equal(self, other: AbstractColumn) -> bool: return ( (self.__class__ == other.__class__) and (len(self) == len(other)) and all([self.lz[idx] == other.lz[idx] for idx in range(len(self))]) )