Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pixi.toml
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ pyarrow = { version = "*", index = "https://pypi.anaconda.org/scientific-python-

dask = { git = "https://github.com/dask/dask" }
distributed = { git = "https://github.com/dask/distributed" }
zarr = { git = "https://github.com/zarr-developers/zarr-python" }
zarr = { git = "https://github.com/jhamman/zarr-python", branch = "feature/rectilinear-chunk-grid" }
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

revert before merging:

Suggested change
zarr = { git = "https://github.com/jhamman/zarr-python", branch = "feature/rectilinear-chunk-grid" }
zarr = { git = "https://github.com/zarr-developers/zarr-python" }

numcodecs = { git = "https://github.com/zarr-developers/numcodecs" }
cftime = { git = "https://github.com/Unidata/cftime" }
# packaging = { git = "https://github.com/pypa/packaging"} #? Pixi warns if this is enabled
Expand Down
35 changes: 26 additions & 9 deletions xarray/backends/zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,13 @@
from xarray.core.datatree import DataTree
from xarray.core.types import ZarrArray, ZarrGroup

try:
from zarr import RectilinearChunks, RegularChunks # noqa: F401

has_variable_chunk_support = True
except ImportError:
has_variable_chunk_support = False
Comment on lines +48 to +53
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
try:
from zarr import RectilinearChunks, RegularChunks # noqa: F401
has_variable_chunk_support = True
except ImportError:
has_variable_chunk_support = False
try:
from zarr.core.chunk_grids import RegularChunkGrid
has_chunk_grid_support = True
except ImportError:
has_chunk_grid_support = False

Used for the variable chunk grid support later on, see note there about making it public API



def _get_mappers(*, storage_options, store, chunk_store):
# expand str and path-like arguments
Expand Down Expand Up @@ -280,7 +287,7 @@ async def async_getitem(self, key):
)


def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name):
def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name, zarr_format):
"""
Given encoding chunks (possibly None or []) and variable chunks
(possibly None or []).
Expand All @@ -302,18 +309,24 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name):
# while dask chunks can be variable sized
# https://dask.pydata.org/en/latest/array-design.html#chunks
if var_chunks and not enc_chunks:
if zarr_format == 3 and has_variable_chunk_support:
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if zarr_format == 3 and has_variable_chunk_support:
if zarr_format == 3 and has_chunk_grid_support:

return tuple(var_chunks)

if any(len(set(chunks[:-1])) > 1 for chunks in var_chunks):
raise ValueError(
"Zarr requires uniform chunk sizes except for final chunk. "
"Zarr v2 requires uniform chunk sizes except for final chunk. "
f"Variable named {name!r} has incompatible dask chunks: {var_chunks!r}. "
"Consider rechunking using `chunk()`."
"Consider rechunking using `chunk()`, or switching to the "
"zarr v3 format with zarr-python>=3.2."
Copy link
Copy Markdown
Collaborator Author

@keewis keewis Jan 8, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I still struggle with accurately expressing the prerequisites for rectilinear chunk support. Maybe this is fine, but we could also ask for "rectilinear chunk support"?

Suggested change
"zarr v3 format with zarr-python>=3.2."
"zarr v3 format with enabled rectilinear chunk support."

)
if any((chunks[0] < chunks[-1]) for chunks in var_chunks):
raise ValueError(
"Final chunk of Zarr array must be the same size or smaller "
f"than the first. Variable named {name!r} has incompatible Dask chunks {var_chunks!r}."
"Consider either rechunking using `chunk()` or instead deleting "
"or modifying `encoding['chunks']`."
"Final chunk of a Zarr v2 array or a Zarr v3 array without the "
"rectilinear chunks extension must be the same size or smaller "
f"than the first. Variable named {name!r} has incompatible Dask "
f"chunks {var_chunks!r}. "
"Consider switching to Zarr v3 with the rectilinear chunks extension, "
"rechunking using `chunk()` or deleting or modifying `encoding['chunks']`."
)
# return the first chunk for each dimension
return tuple(chunk[0] for chunk in var_chunks)
Expand Down Expand Up @@ -476,6 +489,7 @@ def extract_zarr_variable_encoding(
var_chunks=variable.chunks,
ndim=variable.ndim,
name=name,
zarr_format=zarr_format,
)
if _zarr_v3() and chunks is None:
chunks = "auto"
Expand Down Expand Up @@ -854,9 +868,12 @@ def open_store_variable(self, name):
)
attributes = dict(attributes)

chunks = tuple(zarr_array.chunks)
preferred_chunks = dict(zip(dimensions, chunks, strict=True))
Comment on lines +871 to +872
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
chunks = tuple(zarr_array.chunks)
preferred_chunks = dict(zip(dimensions, chunks, strict=True))
chunk_grid = zarr_array.metadata.chunk_grid
if has_chunk_grid_support and isinstance(chunk_grid, RegularChunkGrid):
chunks = chunk_grid.chunk_shape
preferred_chunks = dict(zip(dimensions, chunks, strict=True))
elif has_chunk_grid_support:
# RectilinearChunkGrid or other non-regular grids — store the
# full chunk_grid and skip preferred_chunks since there's no
# single chunk size per dimension
chunks = chunk_grid
preferred_chunks = {}
else:
# Fallback for older zarr-python without chunk_grid support
chunks = tuple(zarr_array.chunks)
preferred_chunks = dict(zip(dimensions, chunks, strict=True))

This suggestion adds support for RectilinearChunkGrids, which do not implement .chunks. It relies on private API, so probably worth advocating for Regular/RectilinearChunkGrid to be made public after the main PR lands


encoding = {
"chunks": zarr_array.chunks,
"preferred_chunks": dict(zip(dimensions, zarr_array.chunks, strict=True)),
"chunks": chunks,
"preferred_chunks": preferred_chunks,
}

if _zarr_v3():
Expand Down
Loading