diff --git a/changes/3781.feature.md b/changes/3781.feature.md new file mode 100644 index 0000000000..191ca5ed4a --- /dev/null +++ b/changes/3781.feature.md @@ -0,0 +1 @@ +Added `Struct` class (subclass of `Structured`) implementing the zarr-extensions `struct` dtype spec. Uses object-style field format and dict fill values. Legacy `Structured` remains available for backward compatibility. diff --git a/docs/user-guide/data_types.md b/docs/user-guide/data_types.md index aa19baf891..3cdafb5f28 100644 --- a/docs/user-guide/data_types.md +++ b/docs/user-guide/data_types.md @@ -229,6 +229,37 @@ here, it's possible to create it yourself: see [Adding New Data Types](#adding-n #### Struct-like - [Structured][zarr.dtype.Structured] +!!! note "Zarr V3 Structured Data Types" + + In Zarr V3, structured data types are specified using the `struct` extension defined in the + [zarr-extensions repository](https://github.com/zarr-developers/zarr-extensions/tree/main/data-types/struct). + The JSON representation uses an object format for fields: + + ```json + { + "name": "struct", + "configuration": { + "fields": [ + {"name": "x", "data_type": "float32"}, + {"name": "y", "data_type": "int64"} + ] + } + } + ``` + + For backward compatibility, Zarr Python also accepts the legacy `structured` name with + tuple-format fields when reading existing data. + + Fill values for structured types are represented as JSON objects mapping field names to values: + + ```json + {"x": 1.5, "y": 42} + ``` + + When using structured types with multi-byte fields, the `bytes` codec must specify an + explicit `endian` parameter. If omitted, Zarr Python assumes little-endian for legacy + compatibility but emits a warning. + ### Example Usage This section will demonstrates the basic usage of Zarr data types. diff --git a/src/zarr/codecs/bytes.py b/src/zarr/codecs/bytes.py index 86bb354fb5..48f8b57c49 100644 --- a/src/zarr/codecs/bytes.py +++ b/src/zarr/codecs/bytes.py @@ -1,6 +1,7 @@ from __future__ import annotations import sys +import warnings from dataclasses import dataclass, replace from enum import Enum from typing import TYPE_CHECKING @@ -9,6 +10,7 @@ from zarr.core.buffer import Buffer, NDBuffer from zarr.core.common import JSON, parse_enum, parse_named_configuration from zarr.core.dtype.common import HasEndianness +from zarr.core.dtype.npy.structured import Struct if TYPE_CHECKING: from typing import Self @@ -56,7 +58,20 @@ def to_dict(self) -> dict[str, JSON]: return {"name": "bytes", "configuration": {"endian": self.endian.value}} def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self: - if not isinstance(array_spec.dtype, HasEndianness): + if isinstance(array_spec.dtype, Struct): + if array_spec.dtype.has_multi_byte_fields(): + if self.endian is None: + warnings.warn( + "Missing 'endian' for structured dtype with multi-byte fields. " + "Assuming little-endian for legacy compatibility.", + UserWarning, + stacklevel=2, + ) + return replace(self, endian=Endian.little) + else: + if self.endian is not None: + return replace(self, endian=None) + elif not isinstance(array_spec.dtype, HasEndianness): if self.endian is not None: return replace(self, endian=None) elif self.endian is None: diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 4736805b9d..9c6c54c97a 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -69,6 +69,7 @@ ) from zarr.core.config import config as zarr_config from zarr.core.dtype import ( + Structured, VariableLengthBytes, VariableLengthUTF8, ZDType, @@ -5291,10 +5292,13 @@ def default_serializer_v3(dtype: ZDType[Any, Any]) -> ArrayBytesCodec: length strings and variable length bytes have hard-coded serializers -- ``VLenUTF8Codec`` and ``VLenBytesCodec``, respectively. + Structured data types with multi-byte fields use ``BytesCodec`` with little-endian encoding. """ serializer: ArrayBytesCodec = BytesCodec(endian=None) - if isinstance(dtype, HasEndianness): + if isinstance(dtype, HasEndianness) or ( + isinstance(dtype, Structured) and dtype.has_multi_byte_fields() + ): serializer = BytesCodec(endian="little") elif isinstance(dtype, HasObjectCodec): if dtype.object_codec_id == "vlen-bytes": diff --git a/src/zarr/core/dtype/__init__.py b/src/zarr/core/dtype/__init__.py index 7c7b0fc5c6..95af6f8389 100644 --- a/src/zarr/core/dtype/__init__.py +++ b/src/zarr/core/dtype/__init__.py @@ -21,7 +21,13 @@ from zarr.core.dtype.npy.complex import Complex64, Complex128 from zarr.core.dtype.npy.float import Float16, Float32, Float64 from zarr.core.dtype.npy.int import Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64 -from zarr.core.dtype.npy.structured import Structured, StructuredJSON_V2, StructuredJSON_V3 +from zarr.core.dtype.npy.structured import ( + Struct, + StructJSON_V3, + Structured, + StructuredJSON_V2, + StructuredJSON_V3, +) from zarr.core.dtype.npy.time import ( DateTime64, DateTime64JSON_V2, @@ -75,6 +81,8 @@ "RawBytes", "RawBytesJSON_V2", "RawBytesJSON_V3", + "Struct", + "StructJSON_V3", "Structured", "StructuredJSON_V2", "StructuredJSON_V3", @@ -124,7 +132,7 @@ | ComplexFloatDType | StringDType | BytesDType - | Structured + | Struct | TimeDType | VariableLengthBytes ) @@ -137,7 +145,7 @@ *COMPLEX_FLOAT_DTYPE, *STRING_DTYPE, *BYTES_DTYPE, - Structured, + Struct, *TIME_DTYPE, VariableLengthBytes, ) diff --git a/src/zarr/core/dtype/npy/structured.py b/src/zarr/core/dtype/npy/structured.py index 8bedee07ef..d111924e6c 100644 --- a/src/zarr/core/dtype/npy/structured.py +++ b/src/zarr/core/dtype/npy/structured.py @@ -61,11 +61,10 @@ class StructuredJSON_V3( NamedConfig[Literal["structured"], dict[str, Sequence[Sequence[str | DTypeJSON]]]] ): """ - A JSON representation of a structured data type in Zarr V3. + A JSON representation of a structured data type in Zarr V3 (legacy format). - References - ---------- - This representation is not currently defined in an external specification. + This is the legacy format using tuple-style field definitions. + For the canonical format, see ``StructJSON_V3``. Examples -------- @@ -83,14 +82,44 @@ class StructuredJSON_V3( """ +class StructJSON_V3( + NamedConfig[Literal["struct"], dict[str, Sequence[dict[str, str | DTypeJSON]]]] +): + """ + A JSON representation of a structured data type in Zarr V3 (canonical format). + + References + ---------- + The Zarr V3 specification for this data type is defined in the zarr-extensions repository: + https://github.com/zarr-developers/zarr-extensions/tree/main/data-types/struct + + Examples + -------- + ```python + { + "name": "struct", + "configuration": { + "fields": [ + {"name": "f0", "data_type": "int32"}, + {"name": "f1", "data_type": "float64"}, + ] + } + } + ``` + """ + + @dataclass(frozen=True, kw_only=True) class Structured(ZDType[np.dtypes.VoidDType[int], np.void], HasItemSize): """ - A Zarr data type for arrays containing structured scalars, AKA "record arrays". + A Zarr data type for arrays containing structured scalars, AKA "record arrays" (legacy format). Wraps the NumPy `np.dtypes.VoidDType` if the data type has fields. Scalars for this data type are instances of `np.void`, with a ``fields`` attribute. + This class handles the legacy "structured" format with tuple-style field definitions. + For the canonical "struct" format, see ``Struct``. + Attributes ---------- fields : Sequence[tuple[str, ZDType]] @@ -98,8 +127,6 @@ class Structured(ZDType[np.dtypes.VoidDType[int], np.void], HasItemSize): References ---------- - This data type does not have a Zarr V3 specification. - The Zarr V2 data type specification can be found [here](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v2/v2.0.rst#data-type-encoding). """ @@ -234,7 +261,6 @@ def _check_json_v3(cls, data: DTypeJSON) -> TypeGuard[StructuredJSON_V3]: True if the input is a valid JSON representation of a structured data type for Zarr V3, False otherwise. """ - return ( isinstance(data, dict) and set(data.keys()) == {"name", "configuration"} @@ -268,7 +294,6 @@ def _from_json_v2(cls, data: DTypeJSON) -> Self: @classmethod def _from_json_v3(cls, data: DTypeJSON) -> Self: - # avoid circular import from zarr.core.dtype import get_data_type_from_json if cls._check_json_v3(data): @@ -445,7 +470,7 @@ def from_json_scalar(self, data: JSON, *, zarr_format: ZarrFormat) -> np.void: return cast("np.void", np.array([as_bytes]).view(dtype)[0]) raise TypeError(f"Invalid type: {data}. Expected a string.") - def to_json_scalar(self, data: object, *, zarr_format: ZarrFormat) -> str: + def to_json_scalar(self, data: object, *, zarr_format: ZarrFormat) -> str | dict[str, JSON]: """ Convert a scalar to a JSON-serializable string representation. @@ -458,9 +483,10 @@ def to_json_scalar(self, data: object, *, zarr_format: ZarrFormat) -> str: Returns ------- - str + str | dict[str, JSON] A string representation of the scalar, which is a base64-encoded - string of the bytes that make up the scalar. + string of the bytes that make up the scalar. Subclasses may return + a dict for V3 format. """ return bytes_to_json(self.cast_scalar(data).tobytes(), zarr_format) @@ -475,3 +501,166 @@ def item_size(self) -> int: The size of a single scalar in bytes. """ return self.to_native_dtype().itemsize + + def has_multi_byte_fields(self) -> bool: + """ + Check if this structured dtype has any fields with item_size > 1. + + Returns + ------- + bool + True if any field has item_size > 1, False otherwise. + """ + return any( + isinstance(field_dtype, HasItemSize) and field_dtype.item_size > 1 + for _, field_dtype in self.fields + ) + + +@dataclass(frozen=True, kw_only=True) +class Struct(Structured): + """ + A Zarr data type for arrays containing structured scalars, AKA "record arrays". + + Wraps the NumPy `np.dtypes.VoidDType` if the data type has fields. Scalars for this data + type are instances of `np.void`, with a ``fields`` attribute. + + This is the canonical data type registered for structured arrays. It reads both + the canonical ``"struct"`` format (object-style fields) and the legacy ``"structured"`` + format (tuple-style fields), but always writes the canonical ``"struct"`` format. + + Attributes + ---------- + fields : Sequence[tuple[str, ZDType]] + The fields of the structured dtype. + + References + ---------- + The Zarr V3 specification for this data type is defined in the zarr-extensions repository: + https://github.com/zarr-developers/zarr-extensions/tree/main/data-types/struct + + The Zarr V2 data type specification can be found [here](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v2/v2.0.rst#data-type-encoding). + """ + + _zarr_v3_name: ClassVar[Literal["struct"]] = "struct" # type: ignore[assignment] + + @classmethod + def _check_json_v3(cls, data: DTypeJSON) -> TypeGuard[StructJSON_V3]: # type: ignore[override] + return ( + isinstance(data, dict) + and set(data.keys()) == {"name", "configuration"} + and data["name"] in ("struct", "structured") + and isinstance(data["configuration"], dict) + and set(data["configuration"].keys()) == {"fields"} + ) + + @classmethod + def _from_json_v3(cls, data: DTypeJSON) -> Self: + from zarr.core.dtype import get_data_type_from_json + + if cls._check_json_v3(data): + config = data["configuration"] + meta_fields = config["fields"] + parsed_fields: list[tuple[str, ZDType[TBaseDType, TBaseScalar]]] = [] + for field in meta_fields: + if isinstance(field, dict): + f_name = field["name"] + f_dtype = field["data_type"] + else: + # Legacy tuple-style field format from "structured" dtype + f_name, f_dtype = field # type: ignore[unreachable] + parsed_fields.append((f_name, get_data_type_from_json(f_dtype, zarr_format=3))) # type: ignore[arg-type] + return cls(fields=tuple(parsed_fields)) + msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected a JSON object with the key {cls._zarr_v3_name!r}" + raise DataTypeValidationError(msg) + + @overload # type: ignore[override] + def to_json(self, zarr_format: Literal[2]) -> StructuredJSON_V2: ... + + @overload + def to_json(self, zarr_format: Literal[3]) -> StructJSON_V3: ... + + def to_json(self, zarr_format: ZarrFormat) -> StructuredJSON_V2 | StructJSON_V3: + if zarr_format == 2: + fields_v2 = [ + [f_name, f_dtype.to_json(zarr_format=zarr_format)["name"]] + for f_name, f_dtype in self.fields + ] + return {"name": fields_v2, "object_codec_id": None} + elif zarr_format == 3: + v3_unstable_dtype_warning(self) + fields_v3 = [ + {"name": f_name, "data_type": f_dtype.to_json(zarr_format=zarr_format)} + for f_name, f_dtype in self.fields + ] + return cast( + "StructJSON_V3", + {"name": self._zarr_v3_name, "configuration": {"fields": fields_v3}}, + ) + raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}") # pragma: no cover + + def from_json_scalar(self, data: JSON, *, zarr_format: ZarrFormat) -> np.void: + """ + Read a JSON-serializable value as a NumPy structured scalar. + + Parameters + ---------- + data : JSON + The JSON-serializable value. Can be either: + - A dict mapping field names to values (primary format for V3) + - A base64-encoded string (legacy format, for backward compatibility) + zarr_format : ZarrFormat + The zarr format version. + + Returns + ------- + np.void + The NumPy structured scalar. + + Raises + ------ + TypeError + If the input is not a dict or base64-encoded string. + """ + if isinstance(data, dict): + field_values = [] + for field_name, field_dtype in self.fields: + if field_name in data: + field_values.append( + field_dtype.from_json_scalar(data[field_name], zarr_format=zarr_format) + ) + else: + field_values.append(field_dtype.default_scalar()) + return self._cast_scalar_unchecked(tuple(field_values)) + elif check_json_str(data): + as_bytes = bytes_from_json(data, zarr_format=zarr_format) + dtype = self.to_native_dtype() + return cast("np.void", np.array([as_bytes]).view(dtype)[0]) + raise TypeError(f"Invalid type: {data}. Expected a dict or base64-encoded string.") + + def to_json_scalar(self, data: object, *, zarr_format: ZarrFormat) -> str | dict[str, JSON]: + """ + Convert a scalar to a JSON-serializable representation. + + Parameters + ---------- + data : object + The scalar to convert. + zarr_format : ZarrFormat + The zarr format version. + + Returns + ------- + str | dict[str, JSON] + For V2: A base64-encoded string of the bytes that make up the scalar. + For V3: A dict mapping field names to their JSON-serialized values. + """ + scalar = self.cast_scalar(data) + if zarr_format == 2: + return bytes_to_json(scalar.tobytes(), zarr_format) + result: dict[str, JSON] = {} + for field_name, field_dtype in self.fields: + result[field_name] = field_dtype.to_json_scalar( + scalar[field_name], zarr_format=zarr_format + ) + return result diff --git a/src/zarr/dtype.py b/src/zarr/dtype.py index 2c7eb651b0..f75219aab8 100644 --- a/src/zarr/dtype.py +++ b/src/zarr/dtype.py @@ -22,6 +22,8 @@ RawBytes, RawBytesJSON_V2, RawBytesJSON_V3, + Struct, + StructJSON_V3, Structured, StructuredJSON_V2, StructuredJSON_V3, @@ -68,6 +70,8 @@ "RawBytes", "RawBytesJSON_V2", "RawBytesJSON_V3", + "Struct", + "StructJSON_V3", "Structured", "StructuredJSON_V2", "StructuredJSON_V3", diff --git a/tests/test_dtype/conftest.py b/tests/test_dtype/conftest.py index 0650d143c6..4c585bfdf6 100644 --- a/tests/test_dtype/conftest.py +++ b/tests/test_dtype/conftest.py @@ -6,14 +6,13 @@ from zarr.core.dtype import data_type_registry from zarr.core.dtype.common import HasLength -from zarr.core.dtype.npy.structured import Structured +from zarr.core.dtype.npy.structured import Struct from zarr.core.dtype.npy.time import DateTime64, TimeDelta64 from zarr.core.dtype.wrapper import ZDType zdtype_examples: tuple[ZDType[Any, Any], ...] = () for wrapper_cls in data_type_registry.contents.values(): - # The Structured dtype has to be constructed with some actual fields - if wrapper_cls is Structured: + if wrapper_cls is Struct: with warnings.catch_warnings(): warnings.simplefilter("ignore") zdtype_examples += ( diff --git a/tests/test_dtype/test_npy/test_structured.py b/tests/test_dtype/test_npy/test_structured.py index e2cd2a6dfe..554c3b4e41 100644 --- a/tests/test_dtype/test_npy/test_structured.py +++ b/tests/test_dtype/test_npy/test_structured.py @@ -11,12 +11,16 @@ Float64, Int32, Int64, + Struct, Structured, + UInt8, ) -class TestStructured(BaseTestZDType): - test_cls = Structured +class TestStruct(BaseTestZDType): + """Test the canonical 'struct' dtype format.""" + + test_cls = Struct valid_dtype = ( np.dtype([("field1", np.int32), ("field2", np.float64)]), np.dtype([("field1", np.int64), ("field2", np.int32)]), @@ -32,29 +36,32 @@ class TestStructured(BaseTestZDType): ) valid_json_v3 = ( { - "name": "structured", + "name": "struct", "configuration": { "fields": [ - ["field1", "int32"], - ["field2", "float64"], + {"name": "field1", "data_type": "int32"}, + {"name": "field2", "data_type": "float64"}, ] }, }, { - "name": "structured", + "name": "struct", "configuration": { "fields": [ - [ - "field1", - { + { + "name": "field1", + "data_type": { "name": "numpy.datetime64", "configuration": {"unit": "s", "scale_factor": 1}, }, - ], - [ - "field2", - {"name": "fixed_length_utf32", "configuration": {"length_bytes": 32}}, - ], + }, + { + "name": "field2", + "data_type": { + "name": "fixed_length_utf32", + "configuration": {"length_bytes": 32}, + }, + }, ] }, }, @@ -65,7 +72,7 @@ class TestStructured(BaseTestZDType): ) invalid_json_v3 = ( { - "name": "structured", + "name": "struct", "configuration": { "fields": [ ("field1", {"name": "int32", "configuration": {"endianness": "invalid"}}), @@ -77,35 +84,38 @@ class TestStructured(BaseTestZDType): ) scalar_v2_params = ( - (Structured(fields=(("field1", Int32()), ("field2", Float64()))), "AQAAAAAAAAAAAPA/"), - (Structured(fields=(("field1", Float16()), ("field2", Int32()))), "AQAAAAAA"), + (Struct(fields=(("field1", Int32()), ("field2", Float64()))), "AQAAAAAAAAAAAPA/"), + (Struct(fields=(("field1", Float16()), ("field2", Int32()))), "AQAAAAAA"), ) scalar_v3_params = ( - (Structured(fields=(("field1", Int32()), ("field2", Float64()))), "AQAAAAAAAAAAAPA/"), - (Structured(fields=(("field1", Int64()), ("field2", Int32()))), "AQAAAAAAAAAAAPA/"), + ( + Struct(fields=(("field1", Int32()), ("field2", Float64()))), + {"field1": 1, "field2": 1.0}, + ), + (Struct(fields=(("field1", Int64()), ("field2", Int32()))), {"field1": 1, "field2": 1}), ) cast_value_params = ( ( - Structured(fields=(("field1", Int32()), ("field2", Float64()))), + Struct(fields=(("field1", Int32()), ("field2", Float64()))), (1, 2.0), np.array((1, 2.0), dtype=[("field1", np.int32), ("field2", np.float64)]), ), ( - Structured(fields=(("field1", Int64()), ("field2", Int32()))), + Struct(fields=(("field1", Int64()), ("field2", Int32()))), (3, 4.5), np.array((3, 4.5), dtype=[("field1", np.int64), ("field2", np.int32)]), ), ) item_size_params = ( - Structured(fields=(("field1", Int32()), ("field2", Float64()))), - Structured(fields=(("field1", Int64()), ("field2", Int32()))), + Struct(fields=(("field1", Int32()), ("field2", Float64()))), + Struct(fields=(("field1", Int64()), ("field2", Int32()))), ) invalid_scalar_params = ( - (Structured(fields=(("field1", Int32()), ("field2", Float64()))), "i am a string"), - (Structured(fields=(("field1", Int32()), ("field2", Float64()))), {"type": "dict"}), + (Struct(fields=(("field1", Int32()), ("field2", Float64()))), "i am a string"), + (Struct(fields=(("field1", Int32()), ("field2", Float64()))), {"type": "dict"}), ) def scalar_equals(self, scalar1: Any, scalar2: Any) -> bool: @@ -114,11 +124,139 @@ def scalar_equals(self, scalar1: Any, scalar2: Any) -> bool: return super().scalar_equals(scalar1, scalar2) +class TestStructured: + """Test the legacy 'structured' dtype format.""" + + def test_invalid_size(self) -> None: + """Test that it's impossible to create a data type that has no fields.""" + fields = () + msg = f"must have at least one field. Got {fields!r}" + with pytest.raises(ValueError, match=msg): + Structured(fields=fields) + + def test_structured_legacy_name_with_tuple_format(self) -> None: + """Test that the legacy 'structured' name with tuple field format is accepted.""" + json_v3 = { + "name": "structured", + "configuration": { + "fields": [ + ["field1", "int32"], + ["field2", "float64"], + ] + }, + } + dtype = Structured.from_json(json_v3, zarr_format=3) + assert dtype.fields[0][0] == "field1" + assert dtype.fields[1][0] == "field2" + + @pytest.mark.filterwarnings("ignore::zarr.errors.UnstableSpecificationWarning") + def test_structured_writes_tuple_format(self) -> None: + """Test that 'structured' writes the tuple field format.""" + dtype = Structured(fields=(("field1", Int32()), ("field2", Float64()))) + json_v3 = dtype.to_json(zarr_format=3) + assert json_v3["name"] == "structured" + assert json_v3["configuration"]["fields"][0] == ["field1", "int32"] + + def test_invalid_size() -> None: - """ - Test that it's impossible to create a data type that has no fields - """ + """Test that it's impossible to create a data type that has no fields.""" fields = () msg = f"must have at least one field. Got {fields!r}" with pytest.raises(ValueError, match=msg): - Structured(fields=fields) + Struct(fields=fields) + + +@pytest.mark.filterwarnings("ignore::zarr.errors.UnstableSpecificationWarning") +def test_struct_name_is_primary() -> None: + """Test that 'struct' is the primary name written to JSON.""" + dtype = Struct(fields=(("field1", Int32()), ("field2", Float64()))) + json_v3 = dtype.to_json(zarr_format=3) + assert json_v3["name"] == "struct" + + +def test_struct_reads_legacy_tuple_format() -> None: + """Test that 'struct' dtype reads the legacy tuple field format.""" + json_v3 = { + "name": "struct", + "configuration": { + "fields": [ + ["field1", "int32"], + ["field2", "float64"], + ] + }, + } + dtype = Struct.from_json(json_v3, zarr_format=3) + assert isinstance(dtype, Struct) + assert dtype.fields[0][0] == "field1" + assert dtype.fields[1][0] == "field2" + + +def test_struct_reads_canonical_object_format() -> None: + """Test that 'struct' dtype reads the new object field format.""" + json_v3 = { + "name": "struct", + "configuration": { + "fields": [ + {"name": "field1", "data_type": "int32"}, + {"name": "field2", "data_type": "float64"}, + ] + }, + } + dtype = Struct.from_json(json_v3, zarr_format=3) + assert isinstance(dtype, Struct) + assert dtype.fields[0][0] == "field1" + assert dtype.fields[1][0] == "field2" + + +def test_fill_value_dict_form() -> None: + """Test that dict form fill values are properly parsed.""" + dtype = Struct(fields=(("x", Int32()), ("y", Float64()))) + fill_value = dtype.from_json_scalar({"x": 42, "y": 3.14}, zarr_format=3) + assert fill_value["x"] == 42 + assert fill_value["y"] == 3.14 + + +def test_fill_value_dict_form_missing_fields() -> None: + """Test that missing fields in dict form fill values use defaults.""" + dtype = Struct(fields=(("x", Int32()), ("y", Float64()))) + fill_value = dtype.from_json_scalar({"x": 42}, zarr_format=3) + assert fill_value["x"] == 42 + assert fill_value["y"] == 0.0 + + +def test_fill_value_legacy_base64() -> None: + """Test that legacy base64-encoded fill values are still readable.""" + dtype = Struct(fields=(("field1", Int32()), ("field2", Float64()))) + fill_value = dtype.from_json_scalar("AQAAAAAAAAAAAPA/", zarr_format=3) + assert fill_value["field1"] == 1 + assert fill_value["field2"] == 1.0 + + +def test_fill_value_to_json_dict_form() -> None: + """Test that fill values are serialized as dict form.""" + dtype = Struct(fields=(("x", Int32()), ("y", Float64()))) + scalar = np.array((42, 3.14), dtype=[("x", np.int32), ("y", np.float64)])[()] + json_val = dtype.to_json_scalar(scalar, zarr_format=3) + assert isinstance(json_val, dict) + assert json_val["x"] == 42 + assert json_val["y"] == 3.14 + + +def test_has_multi_byte_fields_true() -> None: + """Test that has_multi_byte_fields returns True for dtypes with multi-byte fields.""" + dtype = Struct(fields=(("field1", Int32()), ("field2", Float64()))) + assert dtype.has_multi_byte_fields() is True + + +def test_has_multi_byte_fields_false() -> None: + """Test that has_multi_byte_fields returns False for dtypes with only single-byte fields.""" + dtype = Struct(fields=(("field1", UInt8()), ("field2", UInt8()))) + assert dtype.has_multi_byte_fields() is False + + +def test_struct_from_native_dtype() -> None: + """Test that Struct can be created from native numpy dtype.""" + dtype = np.dtype([("field1", np.int32), ("field2", np.float64)]) + struct = Struct.from_native_dtype(dtype) + assert struct.fields[0][0] == "field1" + assert struct.fields[1][0] == "field2" diff --git a/tests/test_v2.py b/tests/test_v2.py index cb990f6159..3a063ac509 100644 --- a/tests/test_v2.py +++ b/tests/test_v2.py @@ -14,8 +14,9 @@ from zarr import config from zarr.abc.store import Store from zarr.core.buffer.core import default_buffer_prototype -from zarr.core.dtype import FixedLengthUTF32, Structured, VariableLengthUTF8 +from zarr.core.dtype import FixedLengthUTF32, VariableLengthUTF8 from zarr.core.dtype.npy.bytes import NullTerminatedBytes +from zarr.core.dtype.npy.structured import Struct from zarr.core.dtype.wrapper import ZDType from zarr.core.group import Group from zarr.core.sync import sync @@ -283,7 +284,7 @@ def test_structured_dtype_roundtrip(fill_value: float | bytes, tmp_path: Path) - def test_parse_structured_fill_value_valid( fill_value: Any, dtype: np.dtype[Any], expected_result: Any ) -> None: - zdtype = Structured.from_native_dtype(dtype) + zdtype = Struct.from_native_dtype(dtype) result = zdtype.cast_scalar(fill_value) assert result.dtype == expected_result.dtype assert result == expected_result