diff --git a/changes/3826.feature.md b/changes/3826.feature.md new file mode 100644 index 0000000000..41cc555a92 --- /dev/null +++ b/changes/3826.feature.md @@ -0,0 +1 @@ +Added a `subchunk_write_order` option to `ShardingCodec` to allow for `morton`, `unordered`, `lexicographic`, and `colexicographic` subchunk orderings. \ No newline at end of file diff --git a/docs/user-guide/performance.md b/docs/user-guide/performance.md index 0e0fa3cd55..2a083af6bf 100644 --- a/docs/user-guide/performance.md +++ b/docs/user-guide/performance.md @@ -113,6 +113,13 @@ bytes within chunks of an array may improve the compression ratio, depending on the structure of the data, the compression algorithm used, and which compression filters (e.g., byte-shuffle) have been applied. +### Subchunk memory layout + +The order of chunks **within each shard** can be changed via the `subchunk_write_order` parameter of the `ShardingCodec`. That parameter is a string which must be one of `["morton", "lexicographic", "colexicographic", "unordered"]`. + +By default [`morton`](https://en.wikipedia.org/wiki/Z-order_curve) order provides good spatial locality however [`lexicographic` (i.e., row-major)](https://en.wikipedia.org/wiki/Row-_and_column-major_order), for example, may be better suited to "batched" workflows where some form of sequential reading through a fixed number of outer dimensions is desired. The options are `lexicographic`, `morton`, `unordered` (i.e., random), and `colexicographic`. + + ### Empty chunks It is possible to configure how Zarr handles the storage of chunks that are "empty" diff --git a/src/zarr/codecs/__init__.py b/src/zarr/codecs/__init__.py index 4c621290e7..8dc67737e6 100644 --- a/src/zarr/codecs/__init__.py +++ b/src/zarr/codecs/__init__.py @@ -27,7 +27,7 @@ Zlib, Zstd, ) -from zarr.codecs.sharding import ShardingCodec, ShardingCodecIndexLocation +from zarr.codecs.sharding import ShardingCodec, ShardingCodecIndexLocation, SubchunkWriteOrder from zarr.codecs.transpose import TransposeCodec from zarr.codecs.vlen_utf8 import VLenBytesCodec, VLenUTF8Codec from zarr.codecs.zstd import ZstdCodec @@ -43,6 +43,7 @@ "GzipCodec", "ShardingCodec", "ShardingCodecIndexLocation", + "SubchunkWriteOrder", "TransposeCodec", "VLenBytesCodec", "VLenUTF8Codec", diff --git a/src/zarr/codecs/sharding.py b/src/zarr/codecs/sharding.py index 9f26bc57b1..01b0dc0d3d 100644 --- a/src/zarr/codecs/sharding.py +++ b/src/zarr/codecs/sharding.py @@ -5,7 +5,7 @@ from enum import Enum from functools import lru_cache from operator import itemgetter -from typing import TYPE_CHECKING, Any, NamedTuple, cast +from typing import TYPE_CHECKING, Any, Literal, NamedTuple, cast import numpy as np import numpy.typing as npt @@ -47,8 +47,6 @@ BasicIndexer, ChunkProjection, SelectorTuple, - _morton_order, - _morton_order_keys, c_order_iter, get_indexer, morton_order_iter, @@ -59,7 +57,7 @@ if TYPE_CHECKING: from collections.abc import Iterator - from typing import Self + from typing import Final, Self from zarr.core.common import JSON from zarr.core.dtype.wrapper import TBaseDType, TBaseScalar, ZDType @@ -78,6 +76,15 @@ class ShardingCodecIndexLocation(Enum): end = "end" +SubchunkWriteOrder = Literal["morton", "unordered", "lexicographic", "colexicographic"] +SUBCHUNK_WRITE_ORDER: Final[tuple[str, str, str, str]] = ( + "morton", + "unordered", + "lexicographic", + "colexicographic", +) + + def parse_index_location(data: object) -> ShardingCodecIndexLocation: return parse_enum(data, ShardingCodecIndexLocation) @@ -227,6 +234,7 @@ def create_empty(cls, chunks_per_shard: tuple[int, ...]) -> _ShardIndex: class _ShardReader(ShardMapping): buf: Buffer index: _ShardIndex + order: SubchunkWriteOrder @classmethod async def from_bytes( @@ -283,15 +291,13 @@ def to_dict_vectorized( dict mapping chunk coordinate tuples to Buffer or None """ starts, ends, valid = self.index.get_chunk_slices_vectorized(chunk_coords_array) - chunks_per_shard = tuple(self.index.offsets_and_lengths.shape[:-1]) - chunk_coords_keys = _morton_order_keys(chunks_per_shard) result: dict[tuple[int, ...], Buffer | None] = {} - for i, coords in enumerate(chunk_coords_keys): + for i, coords in enumerate(chunk_coords_array): if valid[i]: - result[coords] = self.buf[int(starts[i]) : int(ends[i])] + result[tuple(coords.ravel())] = self.buf[int(starts[i]) : int(ends[i])] else: - result[coords] = None + result[tuple(coords.ravel())] = None return result @@ -305,7 +311,9 @@ class ShardingCodec( chunk_shape: tuple[int, ...] codecs: tuple[Codec, ...] index_codecs: tuple[Codec, ...] + rng: np.random.Generator | None index_location: ShardingCodecIndexLocation = ShardingCodecIndexLocation.end + subchunk_write_order: SubchunkWriteOrder = "morton" def __init__( self, @@ -314,16 +322,24 @@ def __init__( codecs: Iterable[Codec | dict[str, JSON]] = (BytesCodec(),), index_codecs: Iterable[Codec | dict[str, JSON]] = (BytesCodec(), Crc32cCodec()), index_location: ShardingCodecIndexLocation | str = ShardingCodecIndexLocation.end, + subchunk_write_order: SubchunkWriteOrder = "morton", + rng: np.random.Generator | None = None, ) -> None: chunk_shape_parsed = parse_shapelike(chunk_shape) codecs_parsed = parse_codecs(codecs) index_codecs_parsed = parse_codecs(index_codecs) index_location_parsed = parse_index_location(index_location) + if subchunk_write_order not in SUBCHUNK_WRITE_ORDER: + raise ValueError( + f"Unrecognized subchunk write order: {subchunk_write_order}. Only {SUBCHUNK_WRITE_ORDER} are allowed." + ) object.__setattr__(self, "chunk_shape", chunk_shape_parsed) object.__setattr__(self, "codecs", codecs_parsed) object.__setattr__(self, "index_codecs", index_codecs_parsed) object.__setattr__(self, "index_location", index_location_parsed) + object.__setattr__(self, "subchunk_write_order", subchunk_write_order) + object.__setattr__(self, "rng", rng) # Use instance-local lru_cache to avoid memory leaks @@ -336,7 +352,7 @@ def __init__( # todo: typedict return type def __getstate__(self) -> dict[str, Any]: - return self.to_dict() + return {"rng": self.rng, **self.to_dict()} def __setstate__(self, state: dict[str, Any]) -> None: config = state["configuration"] @@ -344,6 +360,7 @@ def __setstate__(self, state: dict[str, Any]) -> None: object.__setattr__(self, "codecs", parse_codecs(config["codecs"])) object.__setattr__(self, "index_codecs", parse_codecs(config["index_codecs"])) object.__setattr__(self, "index_location", parse_index_location(config["index_location"])) + object.__setattr__(self, "rng", state["rng"]) # Use instance-local lru_cache to avoid memory leaks # object.__setattr__(self, "_get_chunk_spec", lru_cache()(self._get_chunk_spec)) @@ -523,6 +540,22 @@ async def _decode_partial_single( else: return out + def _subchunk_order_iter(self, chunks_per_shard: tuple[int, ...]) -> Iterable[tuple[int, ...]]: + match self.subchunk_write_order: + case "morton": + subchunk_iter = morton_order_iter(chunks_per_shard) + case "lexicographic": + subchunk_iter = np.ndindex(chunks_per_shard) + case "colexicographic": + subchunk_iter = (c[::-1] for c in np.ndindex(chunks_per_shard[::-1])) + case "unordered": + subchunk_list = list(np.ndindex(chunks_per_shard)) + (self.rng if self.rng is not None else np.random.default_rng()).shuffle( + subchunk_list + ) + subchunk_iter = iter(subchunk_list) + return subchunk_iter + async def _encode_single( self, shard_array: NDBuffer, @@ -540,8 +573,11 @@ async def _encode_single( chunk_grid=RegularChunkGrid(chunk_shape=chunk_shape), ) ) - - shard_builder = dict.fromkeys(morton_order_iter(chunks_per_shard)) + shard_builder = dict.fromkeys(np.array(list(np.ndindex(chunks_per_shard)))) + assert ( + shard_builder.keys() + == dict.fromkeys(self._subchunk_order_iter(chunks_per_shard)).keys() + ) await self.codec_pipeline.write( [ @@ -582,7 +618,7 @@ async def _encode_partial_single( ) if self._is_complete_shard_write(indexer, chunks_per_shard): - shard_dict = dict.fromkeys(morton_order_iter(chunks_per_shard)) + shard_dict = dict.fromkeys(np.ndindex(chunks_per_shard)) else: shard_reader = await self._load_full_shard_maybe( byte_getter=byte_setter, @@ -592,7 +628,7 @@ async def _encode_partial_single( shard_reader = shard_reader or _ShardReader.create_empty(chunks_per_shard) # Use vectorized lookup for better performance shard_dict = shard_reader.to_dict_vectorized( - np.asarray(_morton_order(chunks_per_shard)) + np.array(list(np.ndindex(chunks_per_shard))) ) await self.codec_pipeline.write( @@ -631,7 +667,7 @@ async def _encode_shard_dict( template = buffer_prototype.buffer.create_zero_length() chunk_start = 0 - for chunk_coords in morton_order_iter(chunks_per_shard): + for chunk_coords in self._subchunk_order_iter(chunks_per_shard): value = map.get(chunk_coords) if value is None: continue diff --git a/src/zarr/testing/strategies.py b/src/zarr/testing/strategies.py index 330f220b56..69471e7e2e 100644 --- a/src/zarr/testing/strategies.py +++ b/src/zarr/testing/strategies.py @@ -13,7 +13,9 @@ import zarr from zarr.abc.store import RangeByteRequest, Store from zarr.codecs.bytes import BytesCodec -from zarr.core.array import Array +from zarr.codecs.crc32c_ import Crc32cCodec +from zarr.codecs.sharding import SUBCHUNK_WRITE_ORDER, ShardingCodec, SubchunkWriteOrder +from zarr.core.array import Array, CompressorsLike, SerializerLike from zarr.core.chunk_grids import RegularChunkGrid from zarr.core.chunk_key_encodings import DefaultChunkKeyEncoding from zarr.core.common import JSON, ZarrFormat @@ -128,6 +130,9 @@ def dimension_names(draw: st.DrawFn, *, ndim: int | None = None) -> list[None | return draw(st.none() | st.lists(st.none() | simple_text, min_size=ndim, max_size=ndim)) # type: ignore[arg-type] +subchunk_write_orders: st.SearchStrategy[SubchunkWriteOrder] = st.sampled_from(SUBCHUNK_WRITE_ORDER) + + @st.composite def array_metadata( draw: st.DrawFn, @@ -249,6 +254,7 @@ def arrays( arrays: st.SearchStrategy | None = None, attrs: st.SearchStrategy = attrs, zarr_formats: st.SearchStrategy = zarr_formats, + subchunk_write_orders: SearchStrategy[SubchunkWriteOrder] = subchunk_write_orders, ) -> AnyArray: store = draw(stores, label="store") path = draw(paths, label="array parent") @@ -260,12 +266,22 @@ def arrays( nparray = draw(arrays, label="array data") chunk_shape = draw(chunk_shapes(shape=nparray.shape), label="chunk shape") dim_names: None | list[str | None] = None + serializer: SerializerLike = "auto" + compressors_unsearched: CompressorsLike = "auto" if zarr_format == 3 and all(c > 0 for c in chunk_shape): shard_shape = draw( st.none() | shard_shapes(shape=nparray.shape, chunk_shape=chunk_shape), label="shard shape", ) dim_names = draw(dimension_names(ndim=nparray.ndim), label="dimension names") + subchunk_write_order = draw(subchunk_write_orders) + serializer = ShardingCodec( + subchunk_write_order=subchunk_write_order, + codecs=[BytesCodec()], + index_codecs=[BytesCodec(), Crc32cCodec()], + chunk_shape=chunk_shape, + ) + compressors_unsearched = None else: shard_shape = None # test that None works too. @@ -284,9 +300,10 @@ def arrays( shards=shard_shape, dtype=nparray.dtype, attributes=attributes, - # compressor=compressor, # FIXME + compressors=compressors_unsearched, # FIXME fill_value=fill_value, dimension_names=dim_names, + serializer=serializer, ) assert isinstance(a, Array) @@ -298,7 +315,8 @@ def arrays( assert isinstance(root[array_path], Array) assert nparray.shape == a.shape assert chunk_shape == a.chunks - assert shard_shape == a.shards + if shard_shape is not None: + assert shard_shape == a.shards assert a.basename == name, (a.basename, name) assert dict(a.attrs) == expected_attrs diff --git a/tests/test_codecs/test_sharding.py b/tests/test_codecs/test_sharding.py index d7cbeb5bdb..062c2d4f1c 100644 --- a/tests/test_codecs/test_sharding.py +++ b/tests/test_codecs/test_sharding.py @@ -1,6 +1,6 @@ import pickle import re -from typing import Any +from typing import Any, get_args import numpy as np import numpy.typing as npt @@ -13,12 +13,15 @@ from zarr.abc.store import Store from zarr.codecs import ( BloscCodec, + BytesCodec, + Crc32cCodec, ShardingCodec, ShardingCodecIndexLocation, TransposeCodec, ) +from zarr.codecs.sharding import SubchunkWriteOrder, _ShardReader from zarr.core.buffer import NDArrayLike, default_buffer_prototype -from zarr.storage import StorePath, ZipStore +from zarr.storage import MemoryStore, StorePath, ZipStore from ..conftest import ArrayRequest from .test_codecs import _AsyncArrayProxy, order_from_dim @@ -555,3 +558,133 @@ def test_sharding_mixed_integer_list_indexing(store: Store) -> None: s3 = sharded[0:5, 1, 0:3] assert c3.shape == s3.shape == (5, 3) # type: ignore[union-attr] np.testing.assert_array_equal(c3, s3) + + +async def stored_data_and_get_order( + codec: ShardingCodec, chunks_per_shard: tuple[int, ...] +) -> list[tuple[int, ...]]: + shard_shape = tuple(c * s for c, s in zip(chunks_per_shard, codec.chunk_shape, strict=True)) + store = MemoryStore() + arr = zarr.create_array( + StorePath(store), + shape=shard_shape, + dtype="uint8", + chunks=shard_shape, + serializer=codec, + filters=None, + compressors=None, + fill_value=0, + ) + + arr[:] = np.arange(np.prod(shard_shape), dtype="uint8").reshape(shard_shape) + + shard_buf = await store.get("c/0/0", prototype=default_buffer_prototype()) + if shard_buf is None: + raise RuntimeError("data write failed") + index = (await _ShardReader.from_bytes(shard_buf, codec, chunks_per_shard)).index + offset_to_coord: dict[int, tuple[int, ...]] = dict( + zip( + index.get_chunk_slices_vectorized(np.array(list(np.ndindex(chunks_per_shard))))[ + 0 + ], # start + list(np.ndindex(chunks_per_shard)), # coord + strict=True, + ) + ) + + # The physical write order is recovered by sorting coordinates by start offset. + return [coord for _, coord in sorted(offset_to_coord.items())] + + +@pytest.mark.parametrize( + "subchunk_write_order", + get_args(SubchunkWriteOrder), +) +async def test_encoded_subchunk_write_order(subchunk_write_order: SubchunkWriteOrder) -> None: + """Subchunks must be physically laid out in the shard in the order specified by + ``subchunk_write_order``. We verify this by decoding the shard index and sorting + the chunk coordinates by their byte offset.""" + # Use a non-square chunks_per_shard so all three orderings are distinguishable. + chunks_per_shard = (3, 2) + chunk_shape = (4, 4) + seed = 0 + codec = ShardingCodec( + chunk_shape=chunk_shape, + codecs=[BytesCodec()], + index_codecs=[BytesCodec(), Crc32cCodec()], + index_location=ShardingCodecIndexLocation.end, + subchunk_write_order=subchunk_write_order, + rng=np.random.default_rng(seed=seed), + ) + + actual_order = await stored_data_and_get_order(codec, chunks_per_shard) + if subchunk_write_order != "unordered": + expected_order = list(codec._subchunk_order_iter(chunks_per_shard)) + assert actual_order == expected_order + else: + same_order_same_seed = list( + ShardingCodec( + chunk_shape=chunk_shape, + codecs=[BytesCodec()], + index_codecs=[BytesCodec(), Crc32cCodec()], + index_location=ShardingCodecIndexLocation.end, + subchunk_write_order=subchunk_write_order, + rng=np.random.default_rng(seed=seed), + )._subchunk_order_iter(chunks_per_shard) + ) + assert actual_order == same_order_same_seed + + +async def test_unordered_can_be_seeded() -> None: + orders = [] + chunks_per_shard = (3, 2) + chunk_shape = (4, 4) + seed = 0 + for _ in range(4): + codec = ShardingCodec( + chunk_shape=chunk_shape, + codecs=[BytesCodec()], + index_codecs=[BytesCodec(), Crc32cCodec()], + index_location=ShardingCodecIndexLocation.end, + subchunk_write_order="unordered", + rng=np.random.default_rng(seed=seed), + ) + # The physical write order is recovered by sorting coordinates by start offset. + orders.append(await stored_data_and_get_order(codec, chunks_per_shard)) + assert all(orders[0] == o for o in orders) + + +@pytest.mark.parametrize( + "subchunk_write_order", + get_args(SubchunkWriteOrder), +) +@pytest.mark.parametrize("do_partial", [True, False], ids=["partial", "complete"]) +def test_subchunk_write_order_roundtrip( + subchunk_write_order: SubchunkWriteOrder, do_partial: bool +) -> None: + """Data written with any ``subchunk_write_order`` must round-trip correctly.""" + chunks_per_shard = (3, 2) + chunk_shape = (4, 4) + shard_shape = tuple(c * s for c, s in zip(chunks_per_shard, chunk_shape, strict=True)) + data = np.arange(np.prod(shard_shape), dtype="uint16").reshape(shard_shape) + arr = zarr.create_array( + StorePath(MemoryStore()), + shape=shard_shape, + dtype=data.dtype, + chunks=shard_shape, + serializer=ShardingCodec( + chunk_shape=chunk_shape, + codecs=[BytesCodec()], + subchunk_write_order=subchunk_write_order, + ), + filters=None, + compressors=None, + fill_value=0, + ) + if do_partial: + sub_data = data[: (shard_shape[0] // 2)] + arr[: (shard_shape[0] // 2)] = data[: (shard_shape[0] // 2)] + data = np.vstack([sub_data, np.zeros_like(sub_data)]) + else: + arr[:] = data + np.testing.assert_array_equal(arr[:], data)