Source code for tiled.structures.dataframe

from dataclasses import dataclass
from typing import List, Tuple, Union


[docs]@dataclass class DataFrameMicroStructure: meta: bytes # Arrow-encoded DataFrame divisions: bytes # Arrow-encoded DataFrame @classmethod def from_dask_dataframe(cls, ddf): # Make an *empty* DataFrame with the same structure as ddf. # TODO Look at make_meta_nonempty to see if the "objects" are str or # datetime or actually generic objects. import dask.dataframe.utils import pandas from ..serialization.dataframe import serialize_arrow meta = bytes(serialize_arrow(dask.dataframe.utils.make_meta(ddf), {})) divisions = bytes( serialize_arrow(pandas.DataFrame({"divisions": list(ddf.divisions)}), {}) ) return cls(meta=meta, divisions=divisions) @classmethod def from_dataframe(cls, df): # Make an *empty* DataFrame with the same structure as ddf. # TODO Look at make_meta_nonempty to see if the "objects" are str or # datetime or actually generic objects. import dask.dataframe import dask.dataframe.utils import pandas from ..serialization.dataframe import serialize_arrow ddf = dask.dataframe.from_pandas(df, npartitions=1) meta = bytes(serialize_arrow(dask.dataframe.utils.make_meta(ddf), {})) divisions = bytes( serialize_arrow(pandas.DataFrame({"divisions": list(ddf.divisions)}), {}) ) return cls(meta=meta, divisions=divisions) @property def meta_decoded(self): from ..serialization.dataframe import deserialize_arrow return deserialize_arrow(self.meta) @property def divisions_decoded(self): from ..serialization.dataframe import deserialize_arrow divisions_wrapped_in_df = deserialize_arrow(self.divisions) return tuple(divisions_wrapped_in_df["divisions"].values)
[docs]@dataclass class DataFrameMacroStructure: npartitions: int columns: List[str] resizable: Union[bool, Tuple[bool, ...]] = False @classmethod def from_dask_dataframe(cls, ddf): return cls(npartitions=ddf.npartitions, columns=list(ddf.columns))
[docs]@dataclass class DataFrameStructure: micro: DataFrameMicroStructure macro: DataFrameMacroStructure @classmethod def from_json(cls, content): return cls( micro=DataFrameMicroStructure(**content["micro"]), macro=DataFrameMacroStructure(**content["macro"]), )