Skip to content

Commit 9d15e65

Browse files
committed
Merge remote-tracking branch 'origin/main' into codex/issue-169-log-capture-export
2 parents b47e06f + df189d9 commit 9d15e65

File tree

7 files changed

+81
-16
lines changed

7 files changed

+81
-16
lines changed

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@ releases are available on [PyPI](https://pypi.org/project/pytask) and
77

88
## Unreleased
99

10+
- [#822](https://github.com/pytask-dev/pytask/pull/822) fixes unstable signatures
11+
for remote `UPath`-backed `PathNode`s and `PickleNode`s so unchanged remote inputs
12+
are no longer reported as missing from the state database on subsequent runs.
1013
- [#820](https://github.com/pytask-dev/pytask/pull/820) fixes collection and node
1114
display for remote `UPath`-backed nodes, while preserving correct handling of local
1215
`file://` and `local://` `UPath`s across platforms.

src/_pytask/_hashlib.py

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,17 @@
11
from __future__ import annotations
22

33
import hashlib
4+
import os
45
import sys
56
from contextlib import suppress
67
from pathlib import Path
78
from typing import Any
89

10+
from upath import UPath
11+
12+
_LOCAL_UPATH_PROTOCOLS = frozenset(("", "file", "local"))
13+
_WINDOWS_DRIVE_PREFIX_LENGTH = 3
14+
915

1016
if sys.version_info >= (3, 11): # pragma: no cover
1117
from hashlib import file_digest
@@ -227,8 +233,22 @@ def hash_value(value: Any) -> int | str:
227233
return 0xFCA86420
228234
if isinstance(value, (tuple, list)):
229235
value = "".join(str(hash_value(i)) for i in value)
230-
if isinstance(value, Path):
231-
value = str(value)
236+
if isinstance(value, UPath):
237+
if value.protocol in _LOCAL_UPATH_PROTOCOLS:
238+
local_path = value.path
239+
if (
240+
sys.platform == "win32"
241+
and local_path.startswith("/")
242+
and len(local_path) >= _WINDOWS_DRIVE_PREFIX_LENGTH
243+
and local_path[1].isalpha()
244+
and local_path[2] == ":"
245+
):
246+
local_path = local_path[1:]
247+
value = os.fspath(Path(local_path))
248+
else:
249+
value = str(value)
250+
elif isinstance(value, os.PathLike):
251+
value = os.fspath(value)
232252
if isinstance(value, str):
233253
value = value.encode()
234254
if isinstance(value, bytes):

tests/test_collect.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
import cloudpickle
99
import pytest
10+
import upath
1011

1112
from _pytask.collect import _find_shortest_uniquely_identifiable_name_for_tasks
1213
from _pytask.collect import pytask_collect_node
@@ -196,8 +197,6 @@ def test_pytask_collect_node(session, path, node_info, expected):
196197

197198

198199
def test_pytask_collect_remote_path_node_keeps_uri_name():
199-
upath = pytest.importorskip("upath")
200-
201200
session = Session.from_config(
202201
{"check_casing_of_paths": False, "paths": (Path.cwd(),), "root": Path.cwd()}
203202
)
@@ -220,8 +219,6 @@ def test_pytask_collect_remote_path_node_keeps_uri_name():
220219

221220
@pytest.mark.parametrize("protocol", ["file", "local"])
222221
def test_pytask_collect_local_upath_protocol_node_is_shortened(tmp_path, protocol):
223-
upath = pytest.importorskip("upath")
224-
225222
session = Session.from_config(
226223
{"check_casing_of_paths": False, "paths": (tmp_path,), "root": tmp_path}
227224
)

tests/test_collect_command.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -401,8 +401,6 @@ def test_task_name_is_shortened(runner, tmp_path):
401401

402402

403403
def test_collect_task_with_remote_upath_node(runner, tmp_path):
404-
pytest.importorskip("upath")
405-
406404
source = """
407405
from pathlib import Path
408406
from typing import Annotated
@@ -427,8 +425,6 @@ def task_example(
427425

428426
@pytest.mark.parametrize("protocol", ["file", "local"])
429427
def test_collect_task_with_local_upath_protocol_node(runner, tmp_path, protocol):
430-
pytest.importorskip("upath")
431-
432428
uri = _make_local_upath_uri(tmp_path / "in.pkl", protocol)
433429

434430
source = f"""

tests/test_hashlib.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,22 @@
11
from __future__ import annotations
22

3+
import os
34
from pathlib import Path
45

56
import pytest
7+
import upath
68

79
from _pytask._hashlib import hash_value
810

911

12+
class RemotePathLike(os.PathLike[str]):
13+
def __init__(self, value: str) -> None:
14+
self.value = value
15+
16+
def __fspath__(self) -> str:
17+
return self.value
18+
19+
1020
@pytest.mark.parametrize(
1121
("value", "expected"),
1222
[
@@ -24,8 +34,26 @@
2434
Path("file.py"),
2535
"48b38abeefb3ba2622b6d1534d36c1ffd9b4deebf2cd71e4af8a33723e734ada",
2636
),
37+
(
38+
RemotePathLike("s3://bucket/file.pkl"),
39+
"5bbedd1ab74242143481060b901083e77080661d97003b96e0cbae3a887ebce6",
40+
),
2741
],
2842
)
2943
def test_hash_value(value, expected):
3044
hash_ = hash_value(value)
3145
assert hash_ == expected
46+
47+
48+
def test_hash_value_of_remote_upath():
49+
hash_ = hash_value(upath.UPath("s3://bucket/file.pkl"))
50+
51+
assert hash_ == "5bbedd1ab74242143481060b901083e77080661d97003b96e0cbae3a887ebce6"
52+
53+
54+
@pytest.mark.parametrize("protocol", ["file", "local"])
55+
def test_hash_value_of_local_upath_matches_path(tmp_path, protocol):
56+
path = tmp_path / "file.pkl"
57+
upath_value = upath.UPath(f"{protocol}:///{path.as_posix().lstrip('/')}")
58+
59+
assert hash_value(upath_value) == hash_value(path)

tests/test_nodes.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,14 @@
11
from __future__ import annotations
22

3+
import hashlib
34
import pickle
45
import sys
56
from pathlib import Path
7+
from typing import cast
68

79
import cloudpickle
810
import pytest
11+
import upath
912

1013
from pytask import NodeInfo
1114
from pytask import PathNode
@@ -118,6 +121,29 @@ def test_hash_of_pickle_node(tmp_path, value, exists, expected):
118121
assert state is expected
119122

120123

124+
@pytest.mark.parametrize("node_cls", [PathNode, PickleNode])
125+
def test_signature_of_remote_upath_node(node_cls):
126+
node = node_cls(name="test", path=cast("Path", upath.UPath("s3://bucket/file.pkl")))
127+
128+
expected = hashlib.sha256(
129+
b"5bbedd1ab74242143481060b901083e77080661d97003b96e0cbae3a887ebce6"
130+
).hexdigest()
131+
132+
assert node.signature == expected
133+
134+
135+
@pytest.mark.parametrize("node_cls", [PathNode, PickleNode])
136+
@pytest.mark.parametrize("protocol", ["file", "local"])
137+
def test_signature_of_local_upath_node_matches_path(tmp_path, node_cls, protocol):
138+
path = tmp_path / "file.pkl"
139+
upath_value = upath.UPath(f"{protocol}:///{path.as_posix().lstrip('/')}")
140+
141+
local_node = node_cls(name="test", path=path)
142+
upath_node = node_cls(name="test", path=cast("Path", upath_value))
143+
144+
assert upath_node.signature == local_node.signature
145+
146+
121147
@pytest.mark.parametrize(
122148
("node", "protocol", "expected"),
123149
[

tests/test_path.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from typing import Any
1414

1515
import pytest
16+
import upath
1617

1718
from _pytask.path import _insert_missing_modules
1819
from _pytask.path import _module_name_from_path
@@ -118,17 +119,13 @@ def test_find_common_ancestor(path_1, path_2, expectation, expected):
118119

119120

120121
def test_shorten_path_keeps_non_local_uri():
121-
upath = pytest.importorskip("upath")
122-
123122
path = upath.UPath("s3://bucket/file.pkl")
124123

125124
assert shorten_path(path, [Path.cwd()]) == "s3://bucket/file.pkl"
126125

127126

128127
@pytest.mark.parametrize("protocol", ["file", "local"])
129128
def test_shorten_path_treats_local_upath_protocols_as_local(tmp_path, protocol):
130-
upath = pytest.importorskip("upath")
131-
132129
path = upath.UPath(_make_local_upath_uri(tmp_path / "file.pkl", protocol))
133130

134131
assert not is_non_local_path(path)
@@ -137,8 +134,6 @@ def test_shorten_path_treats_local_upath_protocols_as_local(tmp_path, protocol):
137134

138135
@pytest.mark.parametrize("protocol", ["file", "local"])
139136
def test_normalize_local_upath_strips_windows_drive_prefix(monkeypatch, protocol):
140-
upath = pytest.importorskip("upath")
141-
142137
monkeypatch.setattr(sys, "platform", "win32")
143138
path = upath.UPath(f"{protocol}:///C:/tmp/file.pkl")
144139

0 commit comments

Comments
 (0)