import enum
import os
import sys
from collections.abc import Mapping
from dataclasses import dataclass
from typing import Any, ClassVar, List, Optional, Tuple, Union
import numpy
from tiled.structures.root import Structure
from ..type_aliases import Chunks
# from dtype.descr
FieldDescr = Union[Tuple[str, str], Tuple[str, str, Tuple[int, ...]]]
NumpyDescr = List[FieldDescr]
[docs]
class Endianness(str, enum.Enum):
"""
An enum of endian values: big, little, not_applicable.
"""
big = "big"
little = "little"
not_applicable = "not_applicable"
class ObjectArrayTypeDisabled(ValueError):
pass
[docs]
class Kind(str, enum.Enum):
"""
See https://numpy.org/devdocs/reference/arrays.interface.html#object.__array_interface__
The term "kind" comes from the numpy API as well.
Note: At import time, the environment variable ``TILED_ALLOW_OBJECT_ARRAYS``
is checked. If it is set to anything other than ``"0"``, then this
Enum gets an additional member::
object = "O"
to support numpy 'object'-type arrays which hold generic Python objects.
Numpy 'object'-type arrays are not enabled by default because their binary
representation is not interpretable by clients other than Python. It is
recommended to convert your data to a non-object type if possible so that it
can be read by non-Python clients.
"""
bit_field = "t"
boolean = "b"
integer = "i"
unsigned_integer = "u"
floating_point = "f"
complex_floating_point = "c"
timedelta = "m"
datetime = "M"
string = "S" # fixed-length sequence of char
unicode = "U" # fixed-length sequence of Py_UNICODE
other = "V" # "V" is for "void" -- generic fixed-size chunk of memory
# By default, do not tolerate numpy objectg arrays
if os.getenv("TILED_ALLOW_OBJECT_ARRAYS", "0") != "0":
object = "O" # Object (i.e. the memory contains a pointer to PyObject)
@classmethod
def _missing_(cls, key: str):
if key == "O":
raise ObjectArrayTypeDisabled(
"Numpy 'object'-type arrays are not enabled by default "
"because their binary representation is not interpretable "
"by clients other than Python. "
"It is recommended to convert your data to a non-object type "
"if possible so that it can be read by non-Python clients. "
"If this is not possible, you may enable 'object'-type arrays "
"by setting the environment variable TILED_ALLOW_OBJECT_ARRAYS=1 "
"in the server."
)
[docs]
@dataclass
class BuiltinDtype:
endianness: Endianness
kind: Kind
itemsize: int
dt_units: Optional[str] = None
_endianness_map: ClassVar[Mapping[str, str]] = {
">": "big",
"<": "little",
"=": sys.byteorder,
"|": "not_applicable",
}
_endianness_reverse_map: ClassVar[Mapping[str, str]] = {
v: k for k, v in _endianness_map.items() if k != "="
}
@classmethod
def from_numpy_dtype(cls, dtype: numpy.dtype) -> "BuiltinDtype":
# Extract datetime units from the dtype string representation,
# e.g. `'<M8[ns]'` has `dt_units = '[ns]'`. Count determines the number of base units in a step.
dt_units = None
if dtype.kind in ("m", "M"):
unit, count = numpy.datetime_data(dtype)
dt_units = f"[{count if count > 1 else ''}{unit}]"
return cls(
endianness=cls._endianness_map[dtype.byteorder],
kind=Kind(dtype.kind),
itemsize=dtype.itemsize,
dt_units=dt_units,
)
def to_numpy_dtype(self) -> numpy.dtype:
return numpy.dtype(self.to_numpy_str())
def to_numpy_str(self) -> str:
endianness = self._endianness_reverse_map[self.endianness]
# dtype.itemsize always reports bytes. The format string from the
# numeric types the string format is: {type_code}{byte_count} so we can
# directly use the item size.
#
# for unicode the pattern is 'U{char_count}', however
# which numpy always represents as 4 byte UCS4 encoding
# (because variable width encodings do not fit with fixed-stride arrays)
# so the reported itemsize is 4x the char count. To get back to the string
# we need to divide by 4.
size = self.itemsize if self.kind != Kind.unicode else self.itemsize // 4
return f"{endianness}{self.kind.value}{size}{self.dt_units or ''}"
def to_numpy_descr(self):
"An alias for to_numpy_str() to match the StructDtype interface."
return self.to_numpy_str()
@classmethod
def from_json(cls, structure: Mapping[str, Any]) -> "BuiltinDtype":
return cls(
kind=Kind(structure["kind"]),
itemsize=structure["itemsize"],
endianness=Endianness(structure["endianness"]),
dt_units=structure.get("dt_units"),
)
@dataclass
class Field:
name: str
dtype: Union[BuiltinDtype, "StructDtype"]
shape: Optional[Tuple[int, ...]]
@classmethod
def from_numpy_descr(cls, field: FieldDescr) -> "Field":
name, *rest = field
if name == "":
raise ValueError(
f"You seem to have gotten descr of a base or subdtype: {field}"
)
if len(rest) == 1:
(f_type,) = rest
shape = None
else:
f_type, shape = rest
if isinstance(f_type, str):
FType = BuiltinDtype.from_numpy_dtype(numpy.dtype(f_type))
else:
FType = StructDtype.from_numpy_dtype(numpy.dtype(f_type))
return cls(name=name, dtype=FType, shape=shape)
def to_numpy_descr(self) -> FieldDescr:
if isinstance(self.dtype, BuiltinDtype):
base = [self.name, self.dtype.to_numpy_str()]
else:
base = [self.name, self.dtype.to_numpy_descr()]
if self.shape is None:
return tuple(base)
else:
return tuple(base + [self.shape])
@classmethod
def from_json(cls, structure: Mapping[str, Any]) -> "Field":
name = structure["name"]
if "fields" in structure["dtype"]:
ftype = StructDtype.from_json(structure["dtype"])
else:
ftype = BuiltinDtype.from_json(structure["dtype"])
return cls(name=name, dtype=ftype, shape=structure["shape"])
@dataclass
class StructDtype:
itemsize: int
fields: List[Field]
@classmethod
def from_numpy_dtype(cls, dtype: numpy.dtype) -> "StructDtype":
# subdtypes push extra dimensions into arrays, we should handle these
# a layer up and report an array with bigger dimensions.
if dtype.subdtype is not None:
raise ValueError(f"We do not know how to encode subdtypes: {dtype}")
# If this is a builtin type, require the use of BuiltinDtype (nee .array.BuiltinDtype)
if dtype.fields is None:
raise ValueError(f"You have a base type: {dtype}")
return cls(
itemsize=dtype.itemsize,
fields=[Field.from_numpy_descr(f) for f in dtype.descr],
)
def to_numpy_dtype(self) -> numpy.dtype:
return numpy.dtype(self.to_numpy_descr())
def to_numpy_descr(self) -> NumpyDescr:
return [f.to_numpy_descr() for f in self.fields]
def max_depth(self) -> int:
return max(
1 if isinstance(f.dtype, BuiltinDtype) else 1 + f.dtype.max_depth()
for f in self.fields
)
@classmethod
def from_json(cls, structure: Mapping[str, Any]) -> "StructDtype":
return cls(
itemsize=structure["itemsize"],
fields=[Field.from_json(f) for f in structure["fields"]],
)
[docs]
@dataclass
class ArrayStructure(Structure):
data_type: Union[BuiltinDtype, StructDtype]
chunks: Chunks # tuple-of-tuples-of-ints like ((3,), (3,))
shape: Tuple[int, ...] # tuple of ints like (3, 3)
dims: Optional[Tuple[str, ...]] = None # None or tuple of names like ("x", "y")
resizable: Union[bool, Tuple[bool, ...]] = False
@classmethod
def from_json(cls, structure: Mapping[str, Any]) -> "ArrayStructure":
if "fields" in structure["data_type"]:
data_type = StructDtype.from_json(structure["data_type"])
else:
data_type = BuiltinDtype.from_json(structure["data_type"])
dims = structure["dims"]
if dims is not None:
dims = tuple(dims)
return cls(
data_type=data_type,
chunks=tuple(map(tuple, structure["chunks"])),
shape=tuple(structure["shape"]),
dims=dims,
resizable=structure.get("resizable", False),
)
@classmethod
def from_array(cls, array, shape=None, chunks=None, dims=None) -> "ArrayStructure":
from dask.array.core import normalize_chunks
if not hasattr(array, "__array__"):
# may be a list of something; convert to array
array = numpy.asanyarray(array)
# Why would shape ever be different from array.shape, you ask?
# Some formats (notably Zarr) force shape to be a multiple of
# a chunk size, such that array.shape may include a margin beyond the
# actual data.
if shape is None:
shape = array.shape
if chunks is None:
if hasattr(array, "chunks"):
chunks = array.chunks # might be None
else:
chunks = None
if chunks is None:
chunks = ("auto",) * len(shape)
normalized_chunks = normalize_chunks(
chunks,
shape=shape,
dtype=array.dtype,
)
if array.dtype.fields is not None:
data_type = StructDtype.from_numpy_dtype(array.dtype)
else:
data_type = BuiltinDtype.from_numpy_dtype(array.dtype)
return cls(
data_type=data_type,
shape=shape,
chunks=normalized_chunks,
dims=dims,
)