From 5477d70190ed0933ff7551785c954b56962504d5 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Tue, 24 Mar 2026 11:53:52 +0100 Subject: [PATCH 1/9] feat: subchunk write order --- src/zarr/codecs/sharding.py | 46 +++++++++++++-- tests/test_codecs/test_sharding.py | 92 +++++++++++++++++++++++++++++- 2 files changed, 132 insertions(+), 6 deletions(-) diff --git a/src/zarr/codecs/sharding.py b/src/zarr/codecs/sharding.py index 85162c2f74..2d59484391 100644 --- a/src/zarr/codecs/sharding.py +++ b/src/zarr/codecs/sharding.py @@ -1,5 +1,6 @@ from __future__ import annotations +import random from collections.abc import Iterable, Mapping, MutableMapping from dataclasses import dataclass, replace from enum import Enum @@ -46,7 +47,6 @@ from zarr.core.indexing import ( BasicIndexer, SelectorTuple, - _morton_order, _morton_order_keys, c_order_iter, get_indexer, @@ -77,10 +77,27 @@ class ShardingCodecIndexLocation(Enum): end = "end" +class SubchunkWriteOrder(Enum): + """ + Enum for the order of the chunks within a shard. + + unordered is implemented via `random.shuffle` over the lexicographic order. + """ + + morton = "morton" + unordered = "unordered" + lexicographic = "lexicographic" + colexicographic = "colexicographic" + + def parse_index_location(data: object) -> ShardingCodecIndexLocation: return parse_enum(data, ShardingCodecIndexLocation) +def parse_subchunk_write_order(data: object) -> SubchunkWriteOrder: + return parse_enum(data, SubchunkWriteOrder) + + @dataclass(frozen=True) class _ShardingByteGetter(ByteGetter): shard_dict: ShardMapping @@ -305,6 +322,7 @@ class ShardingCodec( codecs: tuple[Codec, ...] index_codecs: tuple[Codec, ...] index_location: ShardingCodecIndexLocation = ShardingCodecIndexLocation.end + subchunk_write_order: SubchunkWriteOrder = SubchunkWriteOrder.morton def __init__( self, @@ -313,16 +331,19 @@ def __init__( codecs: Iterable[Codec | dict[str, JSON]] = (BytesCodec(),), index_codecs: Iterable[Codec | dict[str, JSON]] = (BytesCodec(), Crc32cCodec()), index_location: ShardingCodecIndexLocation | str = ShardingCodecIndexLocation.end, + subchunk_write_order: SubchunkWriteOrder | str = SubchunkWriteOrder.morton, ) -> None: chunk_shape_parsed = parse_shapelike(chunk_shape) codecs_parsed = parse_codecs(codecs) index_codecs_parsed = parse_codecs(index_codecs) index_location_parsed = parse_index_location(index_location) + subchunk_write_order_parsed = parse_subchunk_write_order(subchunk_write_order) object.__setattr__(self, "chunk_shape", chunk_shape_parsed) object.__setattr__(self, "codecs", codecs_parsed) object.__setattr__(self, "index_codecs", index_codecs_parsed) object.__setattr__(self, "index_location", index_location_parsed) + object.__setattr__(self, "subchunk_write_order", subchunk_write_order_parsed) # Use instance-local lru_cache to avoid memory leaks @@ -522,6 +543,20 @@ async def _decode_partial_single( else: return out + def _subchunk_iter(self, chunks_per_shard: tuple[int, ...]) -> Iterable[tuple[int, ...]]: + match self.subchunk_write_order: + case SubchunkWriteOrder.morton: + subchunk_iter = morton_order_iter(chunks_per_shard) + case SubchunkWriteOrder.lexicographic: + subchunk_iter = np.ndindex(chunks_per_shard) + case SubchunkWriteOrder.colexicographic: + subchunk_iter = (c[::-1] for c in np.ndindex(chunks_per_shard[::-1])) + case SubchunkWriteOrder.unordered: + subchunk_list = list(np.ndindex(chunks_per_shard)) + random.shuffle(subchunk_list) + subchunk_iter = iter(subchunk_list) + return subchunk_iter + async def _encode_single( self, shard_array: NDBuffer, @@ -539,8 +574,7 @@ async def _encode_single( chunk_grid=RegularChunkGrid(chunk_shape=chunk_shape), ) ) - - shard_builder = dict.fromkeys(morton_order_iter(chunks_per_shard)) + shard_builder = dict.fromkeys(self._subchunk_iter(chunks_per_shard)) await self.codec_pipeline.write( [ @@ -581,7 +615,9 @@ async def _encode_partial_single( ) shard_reader = shard_reader or _ShardReader.create_empty(chunks_per_shard) # Use vectorized lookup for better performance - shard_dict = shard_reader.to_dict_vectorized(np.asarray(_morton_order(chunks_per_shard))) + shard_dict = shard_reader.to_dict_vectorized( + np.asarray(list(self._subchunk_iter(chunks_per_shard))) + ) indexer = list( get_indexer( @@ -625,7 +661,7 @@ async def _encode_shard_dict( template = buffer_prototype.buffer.create_zero_length() chunk_start = 0 - for chunk_coords in morton_order_iter(chunks_per_shard): + for chunk_coords in self._subchunk_iter(chunks_per_shard): value = map.get(chunk_coords) if value is None: continue diff --git a/tests/test_codecs/test_sharding.py b/tests/test_codecs/test_sharding.py index d7cbeb5bdb..594957c40d 100644 --- a/tests/test_codecs/test_sharding.py +++ b/tests/test_codecs/test_sharding.py @@ -13,12 +13,15 @@ from zarr.abc.store import Store from zarr.codecs import ( BloscCodec, + BytesCodec, + Crc32cCodec, ShardingCodec, ShardingCodecIndexLocation, TransposeCodec, ) +from zarr.codecs.sharding import SubchunkWriteOrder, _ShardReader from zarr.core.buffer import NDArrayLike, default_buffer_prototype -from zarr.storage import StorePath, ZipStore +from zarr.storage import MemoryStore, StorePath, ZipStore from ..conftest import ArrayRequest from .test_codecs import _AsyncArrayProxy, order_from_dim @@ -555,3 +558,90 @@ def test_sharding_mixed_integer_list_indexing(store: Store) -> None: s3 = sharded[0:5, 1, 0:3] assert c3.shape == s3.shape == (5, 3) # type: ignore[union-attr] np.testing.assert_array_equal(c3, s3) + + +@pytest.mark.parametrize( + "subchunk_write_order", + list(SubchunkWriteOrder), +) +async def test_encoded_subchunk_write_order( + subchunk_write_order: SubchunkWriteOrder, +) -> None: + """Subchunks must be physically laid out in the shard in the order specified by + ``subchunk_write_order``. We verify this by decoding the shard index and sorting + the chunk coordinates by their byte offset.""" + # Use a non-square chunks_per_shard so all three orderings are distinguishable. + chunks_per_shard = (3, 2) + chunk_shape = (4, 4) + shard_shape = tuple(c * s for c, s in zip(chunks_per_shard, chunk_shape, strict=True)) + + codec = ShardingCodec( + chunk_shape=chunk_shape, + codecs=[BytesCodec()], + index_codecs=[BytesCodec(), Crc32cCodec()], + index_location=ShardingCodecIndexLocation.end, + subchunk_write_order=subchunk_write_order, + ) + store = MemoryStore() + arr = zarr.create_array( + StorePath(store), + shape=shard_shape, + dtype="uint8", + chunks=shard_shape, + serializer=codec, + filters=None, + compressors=None, + fill_value=0, + ) + + arr[:] = np.arange(np.prod(shard_shape), dtype="uint8").reshape(shard_shape) + + shard_buf = await store.get("c/0/0", prototype=default_buffer_prototype()) + if shard_buf is None: + raise RuntimeError("data write failed") + index = (await _ShardReader.from_bytes(shard_buf, codec, chunks_per_shard)).index + offset_to_coord: dict[int, tuple[int, ...]] = dict( + zip( + index.get_chunk_slices_vectorized(np.array(list(np.ndindex(chunks_per_shard))))[ + 0 + ], # start + list(np.ndindex(chunks_per_shard)), # coord + strict=True, + ) + ) + + # The physical write order is recovered by sorting coordinates by start offset. + actual_order = [coord for _, coord in sorted(offset_to_coord.items())] + expected_order = list(codec._subchunk_iter(chunks_per_shard)) + assert (actual_order == expected_order) == ( + subchunk_write_order != SubchunkWriteOrder.unordered + ) + + +@pytest.mark.parametrize( + "subchunk_write_order", + list(SubchunkWriteOrder), +) +def test_subchunk_write_order_roundtrip(subchunk_write_order: SubchunkWriteOrder) -> None: + """Data written with any ``subchunk_write_order`` must round-trip correctly.""" + chunks_per_shard = (3, 2) + chunk_shape = (4, 4) + shard_shape = tuple(c * s for c, s in zip(chunks_per_shard, chunk_shape, strict=True)) + data = np.arange(np.prod(shard_shape), dtype="uint16").reshape(shard_shape) + + arr = zarr.create_array( + StorePath(MemoryStore()), + shape=shard_shape, + dtype=data.dtype, + chunks=shard_shape, + serializer=ShardingCodec( + chunk_shape=chunk_shape, + codecs=[BytesCodec()], + subchunk_write_order=subchunk_write_order, + ), + filters=None, + compressors=None, + fill_value=0, + ) + arr[:] = data + np.testing.assert_array_equal(arr[:], data) From 2e3667963df918460af69e8b426f94022131a373 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Tue, 24 Mar 2026 11:56:09 +0100 Subject: [PATCH 2/9] chore: export `SubchunkWriteOrder` --- src/zarr/codecs/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/zarr/codecs/__init__.py b/src/zarr/codecs/__init__.py index 4c621290e7..8dc67737e6 100644 --- a/src/zarr/codecs/__init__.py +++ b/src/zarr/codecs/__init__.py @@ -27,7 +27,7 @@ Zlib, Zstd, ) -from zarr.codecs.sharding import ShardingCodec, ShardingCodecIndexLocation +from zarr.codecs.sharding import ShardingCodec, ShardingCodecIndexLocation, SubchunkWriteOrder from zarr.codecs.transpose import TransposeCodec from zarr.codecs.vlen_utf8 import VLenBytesCodec, VLenUTF8Codec from zarr.codecs.zstd import ZstdCodec @@ -43,6 +43,7 @@ "GzipCodec", "ShardingCodec", "ShardingCodecIndexLocation", + "SubchunkWriteOrder", "TransposeCodec", "VLenBytesCodec", "VLenUTF8Codec", From c6498b2ef8d3a1f198404944345797cd1568e7e2 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Tue, 24 Mar 2026 12:04:07 +0100 Subject: [PATCH 3/9] chore: docs --- docs/user-guide/performance.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/docs/user-guide/performance.md b/docs/user-guide/performance.md index 0e0fa3cd55..b9fda0f79c 100644 --- a/docs/user-guide/performance.md +++ b/docs/user-guide/performance.md @@ -113,6 +113,13 @@ bytes within chunks of an array may improve the compression ratio, depending on the structure of the data, the compression algorithm used, and which compression filters (e.g., byte-shuffle) have been applied. +### Subchunk memory layout + +The order of chunks **within each shard** can be changed via the `subchunk_write_order` parameter of the `ShardingCodec` using the `SubchunkWriteOrder` enum or a corresponding string. + +By default [`morton`](https://en.wikipedia.org/wiki/Z-order_curve) order provides good spatial locality however [`lexicographic` (i.e., row-major)](https://en.wikipedia.org/wiki/Row-_and_column-major_order), for example, may be better suited to "batched" workflows where some form of sequential reading through a fixed number of outer dimensions is desired. The options are `lexicographic`, `morton`, `unordered` (i.e., random), and `colexicographic`. + + ### Empty chunks It is possible to configure how Zarr handles the storage of chunks that are "empty" From 58e071c9d3c506a06d1e0e5055b7243b6c03e1f5 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Tue, 24 Mar 2026 12:05:30 +0100 Subject: [PATCH 4/9] chore: relnote --- changes/3826.feat.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 changes/3826.feat.md diff --git a/changes/3826.feat.md b/changes/3826.feat.md new file mode 100644 index 0000000000..41cc555a92 --- /dev/null +++ b/changes/3826.feat.md @@ -0,0 +1 @@ +Added a `subchunk_write_order` option to `ShardingCodec` to allow for `morton`, `unordered`, `lexicographic`, and `colexicographic` subchunk orderings. \ No newline at end of file From 11b94c0943bae76913e534206b52a13ca3852225 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Tue, 24 Mar 2026 12:27:00 +0100 Subject: [PATCH 5/9] rename --- changes/{3826.feat.md => 3826.feature.md} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename changes/{3826.feat.md => 3826.feature.md} (100%) diff --git a/changes/3826.feat.md b/changes/3826.feature.md similarity index 100% rename from changes/3826.feat.md rename to changes/3826.feature.md From b0c622d072f9847c60580cd7a646bea95ab445dc Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Tue, 24 Mar 2026 15:32:07 +0100 Subject: [PATCH 6/9] refactor: no enums --- src/zarr/codecs/sharding.py | 47 +++++++++++++----------------- tests/test_codecs/test_sharding.py | 10 +++---- 2 files changed, 25 insertions(+), 32 deletions(-) diff --git a/src/zarr/codecs/sharding.py b/src/zarr/codecs/sharding.py index 0ab9fb5990..ff87d41a02 100644 --- a/src/zarr/codecs/sharding.py +++ b/src/zarr/codecs/sharding.py @@ -6,7 +6,7 @@ from enum import Enum from functools import lru_cache from operator import itemgetter -from typing import TYPE_CHECKING, Any, NamedTuple, cast +from typing import TYPE_CHECKING, Any, Literal, NamedTuple, cast import numpy as np import numpy.typing as npt @@ -60,7 +60,7 @@ if TYPE_CHECKING: from collections.abc import Iterator - from typing import Self + from typing import Final, Self from zarr.core.common import JSON from zarr.core.dtype.wrapper import TBaseDType, TBaseScalar, ZDType @@ -79,27 +79,19 @@ class ShardingCodecIndexLocation(Enum): end = "end" -class SubchunkWriteOrder(Enum): - """ - Enum for the order of the chunks within a shard. - - unordered is implemented via `random.shuffle` over the lexicographic order. - """ - - morton = "morton" - unordered = "unordered" - lexicographic = "lexicographic" - colexicographic = "colexicographic" +SubchunkWriteOrder = Literal["morton", "unordered", "lexicographic", "colexicographic"] +SUBCHUNK_WRITE_ORDER: Final[tuple[str, str, str, str]] = ( + "morton", + "unordered", + "lexicographic", + "colexicographic", +) def parse_index_location(data: object) -> ShardingCodecIndexLocation: return parse_enum(data, ShardingCodecIndexLocation) -def parse_subchunk_write_order(data: object) -> SubchunkWriteOrder: - return parse_enum(data, SubchunkWriteOrder) - - @dataclass(frozen=True) class _ShardingByteGetter(ByteGetter): shard_dict: ShardMapping @@ -324,7 +316,7 @@ class ShardingCodec( codecs: tuple[Codec, ...] index_codecs: tuple[Codec, ...] index_location: ShardingCodecIndexLocation = ShardingCodecIndexLocation.end - subchunk_write_order: SubchunkWriteOrder = SubchunkWriteOrder.morton + subchunk_write_order: SubchunkWriteOrder = "morton" def __init__( self, @@ -333,19 +325,22 @@ def __init__( codecs: Iterable[Codec | dict[str, JSON]] = (BytesCodec(),), index_codecs: Iterable[Codec | dict[str, JSON]] = (BytesCodec(), Crc32cCodec()), index_location: ShardingCodecIndexLocation | str = ShardingCodecIndexLocation.end, - subchunk_write_order: SubchunkWriteOrder | str = SubchunkWriteOrder.morton, + subchunk_write_order: SubchunkWriteOrder = "morton", ) -> None: chunk_shape_parsed = parse_shapelike(chunk_shape) codecs_parsed = parse_codecs(codecs) index_codecs_parsed = parse_codecs(index_codecs) index_location_parsed = parse_index_location(index_location) - subchunk_write_order_parsed = parse_subchunk_write_order(subchunk_write_order) + if subchunk_write_order not in SUBCHUNK_WRITE_ORDER: + raise ValueError( + f"Unrecognized subchunk write order: {subchunk_write_order}. Only {SUBCHUNK_WRITE_ORDER} are allowed." + ) object.__setattr__(self, "chunk_shape", chunk_shape_parsed) object.__setattr__(self, "codecs", codecs_parsed) object.__setattr__(self, "index_codecs", index_codecs_parsed) object.__setattr__(self, "index_location", index_location_parsed) - object.__setattr__(self, "subchunk_write_order", subchunk_write_order_parsed) + object.__setattr__(self, "subchunk_write_order", subchunk_write_order) # Use instance-local lru_cache to avoid memory leaks @@ -547,13 +542,13 @@ async def _decode_partial_single( def _subchunk_order_iter(self, chunks_per_shard: tuple[int, ...]) -> Iterable[tuple[int, ...]]: match self.subchunk_write_order: - case SubchunkWriteOrder.morton: + case "morton": subchunk_iter = morton_order_iter(chunks_per_shard) - case SubchunkWriteOrder.lexicographic: + case "lexicographic": subchunk_iter = np.ndindex(chunks_per_shard) - case SubchunkWriteOrder.colexicographic: + case "colexicographic": subchunk_iter = (c[::-1] for c in np.ndindex(chunks_per_shard[::-1])) - case SubchunkWriteOrder.unordered: + case "unordered": subchunk_list = list(np.ndindex(chunks_per_shard)) random.shuffle(subchunk_list) subchunk_iter = iter(subchunk_list) @@ -561,7 +556,7 @@ def _subchunk_order_iter(self, chunks_per_shard: tuple[int, ...]) -> Iterable[tu def _subchunk_order_vectorized(self, chunks_per_shard: tuple[int, ...]) -> npt.NDArray[np.intp]: match self.subchunk_write_order: - case SubchunkWriteOrder.morton: + case "morton": subchunk_order_vectorized = _morton_order(chunks_per_shard) case _: subchunk_order_vectorized = np.fromiter( diff --git a/tests/test_codecs/test_sharding.py b/tests/test_codecs/test_sharding.py index 0bbdffb0bb..f6681a0ecd 100644 --- a/tests/test_codecs/test_sharding.py +++ b/tests/test_codecs/test_sharding.py @@ -1,6 +1,6 @@ import pickle import re -from typing import Any +from typing import Any, get_args import numpy as np import numpy.typing as npt @@ -562,7 +562,7 @@ def test_sharding_mixed_integer_list_indexing(store: Store) -> None: @pytest.mark.parametrize( "subchunk_write_order", - list(SubchunkWriteOrder), + get_args(SubchunkWriteOrder), ) async def test_encoded_subchunk_write_order(subchunk_write_order: SubchunkWriteOrder) -> None: """Subchunks must be physically laid out in the shard in the order specified by @@ -611,14 +611,12 @@ async def test_encoded_subchunk_write_order(subchunk_write_order: SubchunkWriteO # The physical write order is recovered by sorting coordinates by start offset. actual_order = [coord for _, coord in sorted(offset_to_coord.items())] expected_order = list(codec._subchunk_order_iter(chunks_per_shard)) - assert (actual_order == expected_order) == ( - subchunk_write_order != SubchunkWriteOrder.unordered - ) + assert (actual_order == expected_order) == (subchunk_write_order != "unordered") @pytest.mark.parametrize( "subchunk_write_order", - list(SubchunkWriteOrder), + get_args(SubchunkWriteOrder), ) @pytest.mark.parametrize("do_partial", [True, False], ids=["partial", "complete"]) def test_subchunk_write_order_roundtrip( From be7ac83753fe8367af2dd32e4d52961d727b2c1f Mon Sep 17 00:00:00 2001 From: Ilan Gold Date: Fri, 27 Mar 2026 09:30:49 +0100 Subject: [PATCH 7/9] Update docs/user-guide/performance.md Co-authored-by: Davis Bennett --- docs/user-guide/performance.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/user-guide/performance.md b/docs/user-guide/performance.md index b9fda0f79c..2a083af6bf 100644 --- a/docs/user-guide/performance.md +++ b/docs/user-guide/performance.md @@ -115,7 +115,7 @@ filters (e.g., byte-shuffle) have been applied. ### Subchunk memory layout -The order of chunks **within each shard** can be changed via the `subchunk_write_order` parameter of the `ShardingCodec` using the `SubchunkWriteOrder` enum or a corresponding string. +The order of chunks **within each shard** can be changed via the `subchunk_write_order` parameter of the `ShardingCodec`. That parameter is a string which must be one of `["morton", "lexicographic", "colexicographic", "unordered"]`. By default [`morton`](https://en.wikipedia.org/wiki/Z-order_curve) order provides good spatial locality however [`lexicographic` (i.e., row-major)](https://en.wikipedia.org/wiki/Row-_and_column-major_order), for example, may be better suited to "batched" workflows where some form of sequential reading through a fixed number of outer dimensions is desired. The options are `lexicographic`, `morton`, `unordered` (i.e., random), and `colexicographic`. From 7b663ff155049dbf6f6cf1c2da740f2125375762 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Fri, 27 Mar 2026 17:34:15 +0100 Subject: [PATCH 8/9] feat: deterministic but random order --- src/zarr/codecs/sharding.py | 27 ++++------ tests/test_codecs/test_sharding.py | 86 ++++++++++++++++++++++-------- 2 files changed, 73 insertions(+), 40 deletions(-) diff --git a/src/zarr/codecs/sharding.py b/src/zarr/codecs/sharding.py index ff87d41a02..562c39b88d 100644 --- a/src/zarr/codecs/sharding.py +++ b/src/zarr/codecs/sharding.py @@ -1,6 +1,5 @@ from __future__ import annotations -import random from collections.abc import Iterable, Mapping, MutableMapping, Sequence from dataclasses import dataclass, replace from enum import Enum @@ -48,7 +47,6 @@ BasicIndexer, ChunkProjection, SelectorTuple, - _morton_order, _morton_order_keys, c_order_iter, get_indexer, @@ -315,6 +313,7 @@ class ShardingCodec( chunk_shape: tuple[int, ...] codecs: tuple[Codec, ...] index_codecs: tuple[Codec, ...] + rng: np.random.Generator | None index_location: ShardingCodecIndexLocation = ShardingCodecIndexLocation.end subchunk_write_order: SubchunkWriteOrder = "morton" @@ -326,6 +325,7 @@ def __init__( index_codecs: Iterable[Codec | dict[str, JSON]] = (BytesCodec(), Crc32cCodec()), index_location: ShardingCodecIndexLocation | str = ShardingCodecIndexLocation.end, subchunk_write_order: SubchunkWriteOrder = "morton", + rng: np.random.Generator | None = None, ) -> None: chunk_shape_parsed = parse_shapelike(chunk_shape) codecs_parsed = parse_codecs(codecs) @@ -341,6 +341,7 @@ def __init__( object.__setattr__(self, "index_codecs", index_codecs_parsed) object.__setattr__(self, "index_location", index_location_parsed) object.__setattr__(self, "subchunk_write_order", subchunk_write_order) + object.__setattr__(self, "rng", rng) # Use instance-local lru_cache to avoid memory leaks @@ -353,7 +354,7 @@ def __init__( # todo: typedict return type def __getstate__(self) -> dict[str, Any]: - return self.to_dict() + return {"rng": self.rng, **self.to_dict()} def __setstate__(self, state: dict[str, Any]) -> None: config = state["configuration"] @@ -361,6 +362,7 @@ def __setstate__(self, state: dict[str, Any]) -> None: object.__setattr__(self, "codecs", parse_codecs(config["codecs"])) object.__setattr__(self, "index_codecs", parse_codecs(config["index_codecs"])) object.__setattr__(self, "index_location", parse_index_location(config["index_location"])) + object.__setattr__(self, "rng", state["rng"]) # Use instance-local lru_cache to avoid memory leaks # object.__setattr__(self, "_get_chunk_spec", lru_cache()(self._get_chunk_spec)) @@ -550,21 +552,12 @@ def _subchunk_order_iter(self, chunks_per_shard: tuple[int, ...]) -> Iterable[tu subchunk_iter = (c[::-1] for c in np.ndindex(chunks_per_shard[::-1])) case "unordered": subchunk_list = list(np.ndindex(chunks_per_shard)) - random.shuffle(subchunk_list) + (self.rng if self.rng is not None else np.random.default_rng()).shuffle( + subchunk_list + ) subchunk_iter = iter(subchunk_list) return subchunk_iter - def _subchunk_order_vectorized(self, chunks_per_shard: tuple[int, ...]) -> npt.NDArray[np.intp]: - match self.subchunk_write_order: - case "morton": - subchunk_order_vectorized = _morton_order(chunks_per_shard) - case _: - subchunk_order_vectorized = np.fromiter( - self._subchunk_order_iter(chunks_per_shard), - dtype=np.dtype((int, len(chunks_per_shard))), - ) - return subchunk_order_vectorized - async def _encode_single( self, shard_array: NDBuffer, @@ -623,7 +616,7 @@ async def _encode_partial_single( ) if self._is_complete_shard_write(indexer, chunks_per_shard): - shard_dict = dict.fromkeys(self._subchunk_order_iter(chunks_per_shard)) + shard_dict = dict.fromkeys(np.ndindex(chunks_per_shard)) else: shard_reader = await self._load_full_shard_maybe( byte_getter=byte_setter, @@ -633,7 +626,7 @@ async def _encode_partial_single( shard_reader = shard_reader or _ShardReader.create_empty(chunks_per_shard) # Use vectorized lookup for better performance shard_dict = shard_reader.to_dict_vectorized( - self._subchunk_order_vectorized(chunks_per_shard) + np.array(list(np.ndindex(chunks_per_shard))) ) await self.codec_pipeline.write( diff --git a/tests/test_codecs/test_sharding.py b/tests/test_codecs/test_sharding.py index f6681a0ecd..062c2d4f1c 100644 --- a/tests/test_codecs/test_sharding.py +++ b/tests/test_codecs/test_sharding.py @@ -560,26 +560,10 @@ def test_sharding_mixed_integer_list_indexing(store: Store) -> None: np.testing.assert_array_equal(c3, s3) -@pytest.mark.parametrize( - "subchunk_write_order", - get_args(SubchunkWriteOrder), -) -async def test_encoded_subchunk_write_order(subchunk_write_order: SubchunkWriteOrder) -> None: - """Subchunks must be physically laid out in the shard in the order specified by - ``subchunk_write_order``. We verify this by decoding the shard index and sorting - the chunk coordinates by their byte offset.""" - # Use a non-square chunks_per_shard so all three orderings are distinguishable. - chunks_per_shard = (3, 2) - chunk_shape = (4, 4) - shard_shape = tuple(c * s for c, s in zip(chunks_per_shard, chunk_shape, strict=True)) - - codec = ShardingCodec( - chunk_shape=chunk_shape, - codecs=[BytesCodec()], - index_codecs=[BytesCodec(), Crc32cCodec()], - index_location=ShardingCodecIndexLocation.end, - subchunk_write_order=subchunk_write_order, - ) +async def stored_data_and_get_order( + codec: ShardingCodec, chunks_per_shard: tuple[int, ...] +) -> list[tuple[int, ...]]: + shard_shape = tuple(c * s for c, s in zip(chunks_per_shard, codec.chunk_shape, strict=True)) store = MemoryStore() arr = zarr.create_array( StorePath(store), @@ -609,9 +593,65 @@ async def test_encoded_subchunk_write_order(subchunk_write_order: SubchunkWriteO ) # The physical write order is recovered by sorting coordinates by start offset. - actual_order = [coord for _, coord in sorted(offset_to_coord.items())] - expected_order = list(codec._subchunk_order_iter(chunks_per_shard)) - assert (actual_order == expected_order) == (subchunk_write_order != "unordered") + return [coord for _, coord in sorted(offset_to_coord.items())] + + +@pytest.mark.parametrize( + "subchunk_write_order", + get_args(SubchunkWriteOrder), +) +async def test_encoded_subchunk_write_order(subchunk_write_order: SubchunkWriteOrder) -> None: + """Subchunks must be physically laid out in the shard in the order specified by + ``subchunk_write_order``. We verify this by decoding the shard index and sorting + the chunk coordinates by their byte offset.""" + # Use a non-square chunks_per_shard so all three orderings are distinguishable. + chunks_per_shard = (3, 2) + chunk_shape = (4, 4) + seed = 0 + codec = ShardingCodec( + chunk_shape=chunk_shape, + codecs=[BytesCodec()], + index_codecs=[BytesCodec(), Crc32cCodec()], + index_location=ShardingCodecIndexLocation.end, + subchunk_write_order=subchunk_write_order, + rng=np.random.default_rng(seed=seed), + ) + + actual_order = await stored_data_and_get_order(codec, chunks_per_shard) + if subchunk_write_order != "unordered": + expected_order = list(codec._subchunk_order_iter(chunks_per_shard)) + assert actual_order == expected_order + else: + same_order_same_seed = list( + ShardingCodec( + chunk_shape=chunk_shape, + codecs=[BytesCodec()], + index_codecs=[BytesCodec(), Crc32cCodec()], + index_location=ShardingCodecIndexLocation.end, + subchunk_write_order=subchunk_write_order, + rng=np.random.default_rng(seed=seed), + )._subchunk_order_iter(chunks_per_shard) + ) + assert actual_order == same_order_same_seed + + +async def test_unordered_can_be_seeded() -> None: + orders = [] + chunks_per_shard = (3, 2) + chunk_shape = (4, 4) + seed = 0 + for _ in range(4): + codec = ShardingCodec( + chunk_shape=chunk_shape, + codecs=[BytesCodec()], + index_codecs=[BytesCodec(), Crc32cCodec()], + index_location=ShardingCodecIndexLocation.end, + subchunk_write_order="unordered", + rng=np.random.default_rng(seed=seed), + ) + # The physical write order is recovered by sorting coordinates by start offset. + orders.append(await stored_data_and_get_order(codec, chunks_per_shard)) + assert all(orders[0] == o for o in orders) @pytest.mark.parametrize( From 027c4699bb543a7c08d0499ed3981691cea27114 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Fri, 27 Mar 2026 18:58:01 +0100 Subject: [PATCH 9/9] fix: make vectorized fetching less reliant on matching order --- src/zarr/codecs/sharding.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/zarr/codecs/sharding.py b/src/zarr/codecs/sharding.py index 562c39b88d..01b0dc0d3d 100644 --- a/src/zarr/codecs/sharding.py +++ b/src/zarr/codecs/sharding.py @@ -47,7 +47,6 @@ BasicIndexer, ChunkProjection, SelectorTuple, - _morton_order_keys, c_order_iter, get_indexer, morton_order_iter, @@ -235,6 +234,7 @@ def create_empty(cls, chunks_per_shard: tuple[int, ...]) -> _ShardIndex: class _ShardReader(ShardMapping): buf: Buffer index: _ShardIndex + order: SubchunkWriteOrder @classmethod async def from_bytes( @@ -291,15 +291,13 @@ def to_dict_vectorized( dict mapping chunk coordinate tuples to Buffer or None """ starts, ends, valid = self.index.get_chunk_slices_vectorized(chunk_coords_array) - chunks_per_shard = tuple(self.index.offsets_and_lengths.shape[:-1]) - chunk_coords_keys = _morton_order_keys(chunks_per_shard) result: dict[tuple[int, ...], Buffer | None] = {} - for i, coords in enumerate(chunk_coords_keys): + for i, coords in enumerate(chunk_coords_array): if valid[i]: - result[coords] = self.buf[int(starts[i]) : int(ends[i])] + result[tuple(coords.ravel())] = self.buf[int(starts[i]) : int(ends[i])] else: - result[coords] = None + result[tuple(coords.ravel())] = None return result @@ -575,7 +573,11 @@ async def _encode_single( chunk_grid=RegularChunkGrid(chunk_shape=chunk_shape), ) ) - shard_builder = dict.fromkeys(self._subchunk_order_iter(chunks_per_shard)) + shard_builder = dict.fromkeys(np.array(list(np.ndindex(chunks_per_shard)))) + assert ( + shard_builder.keys() + == dict.fromkeys(self._subchunk_order_iter(chunks_per_shard)).keys() + ) await self.codec_pipeline.write( [