Skip to content

Commit d68d904

Browse files
authored
Merge pull request #260 from dunkmann00/performance
Add lazy metadata for module metadata
2 parents d651674 + de21b29 commit d68d904

File tree

3 files changed

+143
-15
lines changed

3 files changed

+143
-15
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,9 @@ venv.bak/
115115
# VSCode
116116
.vscode/
117117

118+
# Zed
119+
.zed/
120+
118121
.DS_STORE
119122

120123
# emacs

circup/lazy_metadata.py

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
# SPDX-FileCopyrightText: 2025 George Waters
2+
#
3+
# SPDX-License-Identifier: MIT
4+
"""
5+
Class that acts similar to a dictionary, but defers the loading of expensive
6+
data until that data is accessed.
7+
"""
8+
from typing import Any, Callable
9+
10+
11+
class LazyMetadata:
12+
"""
13+
Dictionary like class that stores module metadata. Expensive to load
14+
metadata won't be loaded until it is accessed.
15+
"""
16+
17+
def __init__(
18+
self,
19+
deferred_load: Callable[[], dict[str, Any]],
20+
initial_data: dict[str, Any] | None = None,
21+
):
22+
"""
23+
Initialize a LazyMetadata object by providing a callable and initial
24+
data.
25+
26+
:param deferred_load: A callable that returns a dictionary of metadata.
27+
This is not invoked until a key is accessed that is not available in
28+
:py:attr:`initial_data`.
29+
:param initial_data: A dictionary containing the initial metadata.
30+
"""
31+
self._deferred_load = deferred_load
32+
self.initial_data = initial_data.copy() if initial_data is not None else {}
33+
self._deferred_data: dict[str, Any] | None = None
34+
35+
@property
36+
def deferred_data(self) -> dict[str, Any]:
37+
"""
38+
Lazy load the metadata from :py:attr:`_deferred_load`.
39+
40+
:return: The "expensive" metadata that was loaded from
41+
:py:attr:`_deferred_load`.
42+
"""
43+
if self._deferred_data is None:
44+
self._deferred_data = self._deferred_load()
45+
return self._deferred_data
46+
47+
def __getitem__(self, key: str) -> Any:
48+
"""
49+
Get items via keyed index lookup, like a dictionary.
50+
51+
Keys are first looked for in :py:attr:`initial_data`, if the key isn't
52+
found it is then looked for in :py:attr:`deferred_data`.
53+
54+
:param key: Key to a metadata value.
55+
:return: Metadata value for the given key.
56+
:raises KeyError: If the key cannot be found.
57+
"""
58+
if key in self.initial_data: # pylint: disable=no-else-return
59+
return self.initial_data[key]
60+
elif key in self.deferred_data:
61+
return self.deferred_data[key]
62+
raise KeyError(key)
63+
64+
def __setitem__(self, key: str, item: Any) -> None:
65+
"""
66+
Sets the item under the given key.
67+
68+
The item is set in the :py:attr:`initial_data` dictionary.
69+
70+
:param key: Key to a metadata value.
71+
:param item: Metadata value
72+
"""
73+
self.initial_data[key] = item
74+
75+
def __contains__(self, key: str):
76+
"""
77+
Whether or not a key is present.
78+
79+
This checks both :py:attr:`initial_data` and :py:attr:`deferred_data`
80+
for the key. *Note* this will cause :py:attr:`deferred_data` to load
81+
the deferred data if it is not already.
82+
"""
83+
return key in self.initial_data or key in self.deferred_data
84+
85+
def get(self, key: str, default: Any = None):
86+
"""
87+
Get items via keyed index lookup, like a dictionary.
88+
89+
Also like a dictionary, this method doesn't error if the key is not
90+
found. :param default: is returned if the key is not found.
91+
92+
:param key: Key to a metadata value.
93+
:param default: Default value to return when the key doesn't exist.
94+
:return: Metadata value for the given key.
95+
"""
96+
if key in self:
97+
return self[key]
98+
return default
99+
100+
def __repr__(self) -> str:
101+
"""
102+
Helps with log files.
103+
104+
:return: A repr of a dictionary containing the metadata's values.
105+
"""
106+
return repr(
107+
{
108+
"initial_data": self.initial_data,
109+
"deferred_data": self._deferred_data
110+
if self._deferred_data is not None
111+
else "<Not Loaded>",
112+
}
113+
)

circup/shared.py

Lines changed: 27 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
import appdirs
1515
import requests
1616

17+
from circup.lazy_metadata import LazyMetadata
18+
1719
#: Version identifier for a bad MPY file format
1820
BAD_FILE_FORMAT = "Invalid"
1921

@@ -51,7 +53,7 @@
5153
BOARDLESS_COMMANDS = ["show", "bundle-add", "bundle-remove", "bundle-show"]
5254

5355

54-
def _get_modules_file(path, logger):
56+
def _get_modules_file(path, logger): # pylint: disable=too-many-locals
5557
"""
5658
Get a dictionary containing metadata about all the Python modules found in
5759
the referenced file system path.
@@ -71,8 +73,10 @@ def _get_modules_file(path, logger):
7173
]
7274
single_file_mods = single_file_py_mods + single_file_mpy_mods
7375
for sfm in [f for f in single_file_mods if not os.path.basename(f).startswith(".")]:
74-
metadata = extract_metadata(sfm, logger)
75-
metadata["path"] = sfm
76+
default_metadata = {"path": sfm, "mpy": sfm.endswith(".mpy")}
77+
metadata = LazyMetadata(
78+
lambda sfm=sfm: extract_metadata(sfm, logger), default_metadata
79+
)
7680
result[os.path.basename(sfm).replace(".py", "").replace(".mpy", "")] = metadata
7781
for package_path in package_dir_mods:
7882
name = os.path.basename(os.path.dirname(package_path))
@@ -81,19 +85,27 @@ def _get_modules_file(path, logger):
8185
all_files = py_files + mpy_files
8286
# put __init__ first if any, assumed to have the version number
8387
all_files.sort()
88+
89+
def get_metadata(all_files=all_files): # capture all_files
90+
selected_metadata = {}
91+
# explore all the submodules to detect bad ones
92+
for source in [
93+
f for f in all_files if not os.path.basename(f).startswith(".")
94+
]:
95+
metadata = extract_metadata(source, logger)
96+
if "__version__" in metadata:
97+
# don't replace metadata if already found
98+
if "__version__" not in selected_metadata:
99+
selected_metadata = metadata
100+
# break now if any of the submodules has a bad format
101+
if metadata["__version__"] == BAD_FILE_FORMAT:
102+
break
103+
return selected_metadata
104+
84105
# default value
85-
result[name] = {"path": package_path, "mpy": bool(mpy_files)}
86-
# explore all the submodules to detect bad ones
87-
for source in [f for f in all_files if not os.path.basename(f).startswith(".")]:
88-
metadata = extract_metadata(source, logger)
89-
if "__version__" in metadata:
90-
# don't replace metadata if already found
91-
if "__version__" not in result[name]:
92-
metadata["path"] = package_path
93-
result[name] = metadata
94-
# break now if any of the submodules has a bad format
95-
if metadata["__version__"] == BAD_FILE_FORMAT:
96-
break
106+
default_metadata = {"path": package_path, "mpy": bool(mpy_files)}
107+
metadata = LazyMetadata(get_metadata, default_metadata)
108+
result[name] = metadata
97109
return result
98110

99111

0 commit comments

Comments
 (0)