diff --git a/monai/apps/auto3dseg/auto_runner.py b/monai/apps/auto3dseg/auto_runner.py index 93b26487ea..8421893f51 100644 --- a/monai/apps/auto3dseg/auto_runner.py +++ b/monai/apps/auto3dseg/auto_runner.py @@ -26,7 +26,7 @@ from monai.apps.auto3dseg.hpo_gen import NNIGen from monai.apps.auto3dseg.utils import export_bundle_algo_history, import_bundle_algo_history from monai.apps.utils import get_logger -from monai.auto3dseg.utils import algo_to_pickle +from monai.auto3dseg.utils import algo_to_json from monai.bundle import ConfigParser from monai.transforms import SaveImage from monai.utils import AlgoKeys, has_option, look_up_option, optional_import @@ -740,7 +740,7 @@ def _train_algo_in_sequence(self, history: list[dict[str, Any]]) -> None: acc = algo.get_score() algo_meta_data = {str(AlgoKeys.SCORE): acc} - algo_to_pickle(algo, template_path=algo.template_path, **algo_meta_data) + algo_to_json(algo, template_path=algo.template_path, **algo_meta_data) def _train_algo_in_nni(self, history: list[dict[str, Any]]) -> None: """ diff --git a/monai/apps/auto3dseg/bundle_gen.py b/monai/apps/auto3dseg/bundle_gen.py index d053ea37ef..2308a32554 100644 --- a/monai/apps/auto3dseg/bundle_gen.py +++ b/monai/apps/auto3dseg/bundle_gen.py @@ -36,7 +36,7 @@ _prepare_cmd_torchrun, _run_cmd_bcprun, _run_cmd_torchrun, - algo_to_pickle, + algo_to_json, ) from monai.bundle.config_parser import ConfigParser from monai.config import PathLike @@ -367,6 +367,39 @@ def get_output_path(self): """Returns the algo output paths to find the algo scripts and configs.""" return self.output_path + def state_dict(self) -> dict: + """ + Return state for serialization. + + Returns: + A dictionary containing the BundleAlgo state to serialize. + + Note: + template_path is excluded as it is determined dynamically at load time + based on which path successfully imports the Algo class. + """ + return { + "data_stats_files": self.data_stats_files, + "data_list_file": self.data_list_file, + "mlflow_tracking_uri": self.mlflow_tracking_uri, + "mlflow_experiment_name": self.mlflow_experiment_name, + "output_path": self.output_path, + "name": self.name, + "best_metric": self.best_metric, + "fill_records": self.fill_records, + "device_setting": self.device_setting, + } + + def load_state_dict(self, state: dict) -> None: + """ + Restore state from a dictionary. + + Args: + state: A dictionary containing the state to restore. + """ + for key, value in state.items(): + setattr(self, key, value) + # path to download the algo_templates default_algo_zip = ( @@ -659,7 +692,7 @@ def generate( else: gen_algo.export_to_disk(output_folder, name, fold=f_id) - algo_to_pickle(gen_algo, template_path=algo.template_path) + algo_to_json(gen_algo, template_path=algo.template_path) self.history.append( {AlgoKeys.ID: name, AlgoKeys.ALGO: gen_algo} ) # track the previous, may create a persistent history diff --git a/monai/apps/auto3dseg/hpo_gen.py b/monai/apps/auto3dseg/hpo_gen.py index ed6d903897..0348ce9734 100644 --- a/monai/apps/auto3dseg/hpo_gen.py +++ b/monai/apps/auto3dseg/hpo_gen.py @@ -19,7 +19,7 @@ from monai.apps.auto3dseg.bundle_gen import BundleAlgo from monai.apps.utils import get_logger -from monai.auto3dseg import Algo, AlgoGen, algo_from_pickle, algo_to_pickle +from monai.auto3dseg import Algo, AlgoGen, algo_from_json, algo_to_json from monai.bundle.config_parser import ConfigParser from monai.config import PathLike from monai.utils import optional_import @@ -36,7 +36,7 @@ class HPOGen(AlgoGen): """ The base class for hyperparameter optimization (HPO) interfaces to generate algos in the Auto3Dseg pipeline. The auto-generated algos are saved at their ``output_path`` on the disk. The files in the ``output_path`` - may contain scripts that define the algo, configuration files, and pickle files that save the internal states + may contain scripts that define the algo, configuration files, and JSON files that save the internal states of the algo before/after the training. Compared to the BundleGen class, HPOGen generates Algo on-the-fly, so training and algo generation may be executed alternatively and take a long time to finish the generation process. @@ -72,7 +72,7 @@ class NNIGen(HPOGen): Args: algo: an Algo object (e.g. BundleAlgo) with defined methods: ``get_output_path`` and train - and supports saving to and loading from pickle files via ``algo_from_pickle`` and ``algo_to_pickle``. + and supports saving to and loading via ``algo_from_json`` and ``algo_to_json``. params: a set of parameter to override the algo if override is supported by Algo subclass. Examples:: @@ -81,16 +81,16 @@ class NNIGen(HPOGen): ├── algorithm_templates │ └── unet ├── unet_0 - │ ├── algo_object.pkl + │ ├── algo_object.json │ ├── configs │ └── scripts ├── unet_0_learning_rate_0.01 - │ ├── algo_object.pkl + │ ├── algo_object.json │ ├── configs │ ├── model_fold0 │ └── scripts └── unet_0_learning_rate_0.1 - ├── algo_object.pkl + ├── algo_object.json ├── configs ├── model_fold0 └── scripts @@ -129,10 +129,10 @@ def __init__(self, algo: Algo | None = None, params: dict | None = None): else: self.algo = algo - self.obj_filename = algo_to_pickle(self.algo, template_path=self.algo.template_path) + self.obj_filename = algo_to_json(self.algo, template_path=self.algo.template_path) def get_obj_filename(self): - """Return the filename of the dumped pickle algo object.""" + """Return the filename of the dumped algo object.""" return self.obj_filename def print_bundle_algo_instruction(self): @@ -190,7 +190,7 @@ def generate(self, output_folder: str = ".") -> None: task_id = self.get_task_id() task_prefix = os.path.basename(self.algo.get_output_path()) write_path = os.path.join(output_folder, task_prefix + task_id) - self.obj_filename = os.path.join(write_path, "algo_object.pkl") + self.obj_filename = os.path.join(write_path, "algo_object.json") if isinstance(self.algo, BundleAlgo): self.algo.export_to_disk( @@ -214,7 +214,7 @@ def run_algo(self, obj_filename: str, output_folder: str = ".", template_path: P The python interface for NNI to run. Args: - obj_filename: the pickle-exported Algo object. + obj_filename: the serialized Algo object. output_folder: the root path of the algorithms templates. template_path: the algorithm_template. It must contain algo.py in the follow path: ``{algorithm_templates_dir}/{network}/scripts/algo.py`` @@ -222,7 +222,7 @@ def run_algo(self, obj_filename: str, output_folder: str = ".", template_path: P if not os.path.isfile(obj_filename): raise ValueError(f"{obj_filename} is not found") - self.algo, algo_meta_data = algo_from_pickle(obj_filename, template_path=template_path) + self.algo, algo_meta_data = algo_from_json(obj_filename, template_path=template_path) # step 1 sample hyperparams params = self.get_hyperparameters() @@ -235,7 +235,7 @@ def run_algo(self, obj_filename: str, output_folder: str = ".", template_path: P acc = self.algo.get_score() algo_meta_data = {str(AlgoKeys.SCORE): acc} - algo_to_pickle(self.algo, template_path=self.algo.template_path, **algo_meta_data) + algo_to_json(self.algo, template_path=self.algo.template_path, **algo_meta_data) self.set_score(acc) @@ -250,7 +250,7 @@ class OptunaGen(HPOGen): Args: algo: an Algo object (e.g. BundleAlgo). The object must at least define two methods: get_output_path and train - and supports saving to and loading from pickle files via ``algo_from_pickle`` and ``algo_to_pickle``. + and supports saving to and loading via ``algo_from_json`` and ``algo_to_json``. params: a set of parameter to override the algo if override is supported by Algo subclass. Examples:: @@ -259,16 +259,16 @@ class OptunaGen(HPOGen): ├── algorithm_templates │ └── unet ├── unet_0 - │ ├── algo_object.pkl + │ ├── algo_object.json │ ├── configs │ └── scripts ├── unet_0_learning_rate_0.01 - │ ├── algo_object.pkl + │ ├── algo_object.json │ ├── configs │ ├── model_fold0 │ └── scripts └── unet_0_learning_rate_0.1 - ├── algo_object.pkl + ├── algo_object.json ├── configs ├── model_fold0 └── scripts @@ -296,10 +296,10 @@ def __init__(self, algo: Algo | None = None, params: dict | None = None) -> None else: self.algo = algo - self.obj_filename = algo_to_pickle(self.algo, template_path=self.algo.template_path) + self.obj_filename = algo_to_json(self.algo, template_path=self.algo.template_path) def get_obj_filename(self): - """Return the dumped pickle object of algo.""" + """Return the dumped object of algo.""" return self.obj_filename def get_hyperparameters(self): @@ -329,7 +329,7 @@ def __call__( Callable that Optuna will use to optimize the hyper-parameters Args: - obj_filename: the pickle-exported Algo object. + obj_filename: the serialized Algo object. output_folder: the root path of the algorithms templates. template_path: the algorithm_template. It must contain algo.py in the follow path: ``{algorithm_templates_dir}/{network}/scripts/algo.py`` @@ -364,7 +364,7 @@ def generate(self, output_folder: str = ".") -> None: task_id = self.get_task_id() task_prefix = os.path.basename(self.algo.get_output_path()) write_path = os.path.join(output_folder, task_prefix + task_id) - self.obj_filename = os.path.join(write_path, "algo_object.pkl") + self.obj_filename = os.path.join(write_path, "algo_object.json") if isinstance(self.algo, BundleAlgo): self.algo.export_to_disk(output_folder, task_prefix + task_id, fill_with_datastats=False) @@ -377,7 +377,7 @@ def run_algo(self, obj_filename: str, output_folder: str = ".", template_path: P The python interface for NNI to run. Args: - obj_filename: the pickle-exported Algo object. + obj_filename: the serialized Algo object. output_folder: the root path of the algorithms templates. template_path: the algorithm_template. It must contain algo.py in the follow path: ``{algorithm_templates_dir}/{network}/scripts/algo.py`` @@ -385,7 +385,7 @@ def run_algo(self, obj_filename: str, output_folder: str = ".", template_path: P if not os.path.isfile(obj_filename): raise ValueError(f"{obj_filename} is not found") - self.algo, algo_meta_data = algo_from_pickle(obj_filename, template_path=template_path) + self.algo, algo_meta_data = algo_from_json(obj_filename, template_path=template_path) # step 1 sample hyperparams params = self.get_hyperparameters() @@ -397,5 +397,5 @@ def run_algo(self, obj_filename: str, output_folder: str = ".", template_path: P # step 4 report validation acc to controller acc = self.algo.get_score() algo_meta_data = {str(AlgoKeys.SCORE): acc} - algo_to_pickle(self.algo, template_path=self.algo.template_path, **algo_meta_data) + algo_to_json(self.algo, template_path=self.algo.template_path, **algo_meta_data) self.set_score(acc) diff --git a/monai/apps/auto3dseg/utils.py b/monai/apps/auto3dseg/utils.py index 64e1d2ea2a..fbf9dc101c 100644 --- a/monai/apps/auto3dseg/utils.py +++ b/monai/apps/auto3dseg/utils.py @@ -14,7 +14,7 @@ import os from monai.apps.auto3dseg.bundle_gen import BundleAlgo -from monai.auto3dseg import algo_from_pickle, algo_to_pickle +from monai.auto3dseg import algo_from_json, algo_to_json from monai.utils.enums import AlgoKeys __all__ = ["import_bundle_algo_history", "export_bundle_algo_history", "get_name_from_algo_id"] @@ -42,11 +42,18 @@ def import_bundle_algo_history( if not os.path.isdir(write_path): continue - obj_filename = os.path.join(write_path, "algo_object.pkl") - if not os.path.isfile(obj_filename): # saved mode pkl + # Prefer JSON format, fall back to legacy pickle + json_filename = os.path.join(write_path, "algo_object.json") + pkl_filename = os.path.join(write_path, "algo_object.pkl") + + if os.path.isfile(json_filename): + obj_filename = json_filename + elif os.path.isfile(pkl_filename): + obj_filename = pkl_filename + else: continue - algo, algo_meta_data = algo_from_pickle(obj_filename, template_path=template_path) + algo, algo_meta_data = algo_from_json(obj_filename, template_path=template_path) best_metric = algo_meta_data.get(AlgoKeys.SCORE, None) if best_metric is None: @@ -57,7 +64,7 @@ def import_bundle_algo_history( is_trained = best_metric is not None - if (only_trained and is_trained) or not only_trained: + if is_trained or not only_trained: history.append( {AlgoKeys.ID: name, AlgoKeys.ALGO: algo, AlgoKeys.SCORE: best_metric, AlgoKeys.IS_TRAINED: is_trained} ) @@ -67,14 +74,14 @@ def import_bundle_algo_history( def export_bundle_algo_history(history: list[dict[str, BundleAlgo]]) -> None: """ - Save all the BundleAlgo in the history to algo_object.pkl in each individual folder + Save all the BundleAlgo in the history to algo_object.json in each individual folder. Args: history: a List of Bundle. Typically, the history can be obtained from BundleGen get_history method """ for algo_dict in history: algo = algo_dict[AlgoKeys.ALGO] - algo_to_pickle(algo, template_path=algo.template_path) + algo_to_json(algo, template_path=algo.template_path) def get_name_from_algo_id(id: str) -> str: diff --git a/monai/auto3dseg/__init__.py b/monai/auto3dseg/__init__.py index 4e5d15613b..f35cd332d3 100644 --- a/monai/auto3dseg/__init__.py +++ b/monai/auto3dseg/__init__.py @@ -25,7 +25,9 @@ from .operations import Operations, SampleOperations, SummaryOperations from .seg_summarizer import SegSummarizer from .utils import ( + algo_from_json, algo_from_pickle, + algo_to_json, algo_to_pickle, concat_multikeys_to_dict, concat_val_to_np, diff --git a/monai/auto3dseg/algo_gen.py b/monai/auto3dseg/algo_gen.py index 5ad15c7d7a..fa306e2b6c 100644 --- a/monai/auto3dseg/algo_gen.py +++ b/monai/auto3dseg/algo_gen.py @@ -38,6 +38,30 @@ def get_score(self, *args, **kwargs): def get_output_path(self, *args, **kwargs): """Returns the algo output paths for scripts location""" + def state_dict(self) -> dict: + """ + Return state for serialization. + + Subclasses should override this method to return a dictionary of + attributes that need to be serialized. This follows the PyTorch + convention for state management. + + Returns: + A dictionary containing the state to serialize. + """ + return {} + + def load_state_dict(self, state: dict) -> None: + """ + Restore state from a dictionary. + + Subclasses should override this method to restore their state + from the dictionary returned by state_dict(). + + Args: + state: A dictionary containing the state to restore. + """ + class AlgoGen(Randomizable): """ diff --git a/monai/auto3dseg/utils.py b/monai/auto3dseg/utils.py index ff1be57629..3514e9f0a7 100644 --- a/monai/auto3dseg/utils.py +++ b/monai/auto3dseg/utils.py @@ -11,11 +11,13 @@ from __future__ import annotations +import json import logging import os import pickle import subprocess import sys +import warnings from copy import deepcopy from numbers import Number from typing import Any, cast @@ -30,6 +32,8 @@ from monai.data.meta_tensor import MetaTensor from monai.transforms import CropForeground, ToCupy from monai.utils import min_version, optional_import, run_cmd +from monai.utils.deprecate_utils import deprecated +from monai.utils.misc import MONAIEnvVars __all__ = [ "get_foreground_image", @@ -39,10 +43,25 @@ "concat_multikeys_to_dict", "datafold_read", "verify_report_format", + "algo_to_json", + "algo_from_json", "algo_to_pickle", "algo_from_pickle", ] +_PICKLE_DISABLED_MSG = ( + "Pickle serialization for Auto3DSeg is disabled by default for security reasons. " + "Set the environment variable MONAI_ALLOW_PICKLE=1 to enable. Pickle can execute " + "arbitrary code on load — only enable for files from trusted sources. " + "Prefer algo_to_json / algo_from_json." +) + + +def _require_pickle_allowed() -> None: + if not MONAIEnvVars.allow_pickle(): + raise RuntimeError(_PICKLE_DISABLED_MSG) + + measure_np, has_measure = optional_import("skimage.measure", "0.14.2", min_version) cp, has_cp = optional_import("cupy") @@ -274,48 +293,80 @@ def verify_report_format(report: dict, report_format: dict) -> bool: return True -def algo_to_pickle(algo: Algo, template_path: PathLike | None = None, **algo_meta_data: Any) -> str: +def _make_json_serializable(value: Any) -> Any: """ - Export the Algo object to pickle file. + Convert a value to a JSON-serializable type. + + Handles numpy arrays, Path objects, torch tensors, and other common types. + """ + if value is None: + return None + if isinstance(value, (str, int, float, bool)): + return value + if isinstance(value, (list, tuple)): + return [_make_json_serializable(v) for v in value] + if isinstance(value, dict): + return {k: _make_json_serializable(v) for k, v in value.items()} + if isinstance(value, np.ndarray): + return value.tolist() + if isinstance(value, (np.integer, np.floating)): + return value.item() + if isinstance(value, torch.Tensor): + return value.detach().cpu().numpy().tolist() + # Fallback to string representation + return str(value) + + +def _add_path_with_parent(paths: list[str], path: PathLike | None) -> None: + """Add a path and its parent directory to the list if the path is a valid directory.""" + if path and os.path.isdir(str(path)): + abs_path = os.path.abspath(str(path)) + paths.append(abs_path) + paths.append(os.path.abspath(os.path.join(abs_path, ".."))) + + +def algo_to_json(algo: Algo, template_path: PathLike | None = None, **algo_meta_data: Any) -> str: + """ + Export the Algo object to a JSON file (pickle-free serialization). Args: - algo: Algo-like object. - template_path: a str path that is needed to be added to the sys.path to instantiate the class. - algo_meta_data: additional keyword to save into the dictionary, for example, model training info - such as acc/best_metrics + algo: Algo-like object (typically BundleAlgo or subclass). + template_path: path needed for sys.path setup when loading custom Algo classes. + algo_meta_data: additional metadata to save (e.g., best_metric, score). Returns: - filename of the pickled Algo object + Filename of the saved Algo object (algo_object.json). """ - data = {"algo_bytes": pickle.dumps(algo), "template_path": str(template_path)} - pkl_filename = os.path.join(algo.get_output_path(), "algo_object.pkl") - for k, v in algo_meta_data.items(): - data.update({k: v}) - data_bytes = pickle.dumps(data) - with open(pkl_filename, "wb") as f_pi: - f_pi.write(data_bytes) - return pkl_filename + state = {k: _make_json_serializable(v) for k, v in algo.state_dict().items()} + # Build target string for dynamic class instantiation + cls = algo.__class__ + target = f"{cls.__module__}.{cls.__name__}" -def algo_from_pickle(pkl_filename: str, template_path: PathLike | None = None, **kwargs: Any) -> Any: - """ - Import the Algo object from a pickle file. + algo_meta_data = {str(k): _make_json_serializable(v) for k, v in algo_meta_data.items()} + data: dict[str, Any] = { + "_target_": target, + "_state_": state, + "template_path": str(template_path) if template_path else None, + **algo_meta_data, + } - Args: - pkl_filename: the name of the pickle file. - template_path: a folder containing files to instantiate the Algo. Besides the `template_path`, - this function will also attempt to use the `template_path` saved in the pickle file and a directory - named `algorithm_templates` in the parent folder of the folder containing the pickle file. + json_filename = os.path.join(algo.get_output_path(), "algo_object.json") + with open(json_filename, "w", encoding="utf-8") as f: + json.dump(data, f, separators=(",", ":")) - Returns: - algo: the Algo object saved in the pickle file. - algo_meta_data: additional keyword saved in the pickle file, for example, acc/best_metrics. + return json_filename - Raises: - ValueError if the pkl_filename does not contain a dict, or the dict does not contain `algo_bytes`. - ModuleNotFoundError if it is unable to instantiate the Algo class. +def _load_legacy_pickle(pkl_filename: str, template_path: PathLike | None = None) -> Any: + """ + Load an Algo object from a legacy pickle file. + + This is an internal function to support backward compatibility with pickle files. + Gated behind ``MONAI_ALLOW_PICKLE=1`` because unpickling executes arbitrary code. """ + _require_pickle_allowed() + with open(pkl_filename, "rb") as f_pi: data_bytes = f_pi.read() data = pickle.loads(data_bytes) @@ -330,51 +381,149 @@ def algo_from_pickle(pkl_filename: str, template_path: PathLike | None = None, * algo_template_path = data.pop("template_path", None) template_paths_candidates: list[str] = [] - - if os.path.isdir(str(template_path)): - template_paths_candidates.append(os.path.abspath(str(template_path))) - template_paths_candidates.append(os.path.abspath(os.path.join(str(template_path), ".."))) - - if os.path.isdir(str(algo_template_path)): - template_paths_candidates.append(os.path.abspath(algo_template_path)) - template_paths_candidates.append(os.path.abspath(os.path.join(algo_template_path, ".."))) + _add_path_with_parent(template_paths_candidates, template_path) + _add_path_with_parent(template_paths_candidates, algo_template_path) pkl_dir = os.path.dirname(pkl_filename) - algo_template_path_fuzzy = os.path.join(pkl_dir, "..", "algorithm_templates") - - if os.path.isdir(algo_template_path_fuzzy): - template_paths_candidates.append(os.path.abspath(algo_template_path_fuzzy)) + fuzzy_path = os.path.join(pkl_dir, "..", "algorithm_templates") + if os.path.isdir(fuzzy_path): + template_paths_candidates.append(os.path.abspath(fuzzy_path)) if len(template_paths_candidates) == 0: - # no template_path provided or needed algo = pickle.loads(algo_bytes) algo.template_path = None else: for i, p in enumerate(template_paths_candidates): + path_added = False try: - sys.path.append(p) + if p not in sys.path: + sys.path.insert(0, p) + path_added = True algo = pickle.loads(algo_bytes) break except ModuleNotFoundError as not_found_err: logging.debug(f"Folder {p} doesn't contain the Algo templates for Algo instantiation.") - sys.path.pop() if i == len(template_paths_candidates) - 1: raise ValueError( f"Failed to instantiate {pkl_filename} with {template_paths_candidates}" ) from not_found_err + finally: + if path_added and p in sys.path: + sys.path.remove(p) algo.template_path = p if os.path.abspath(pkl_dir) != os.path.abspath(algo.get_output_path()): logging.debug(f"{algo.get_output_path()} is changed. Now override the Algo output_path with: {pkl_dir}.") algo.output_path = pkl_dir - algo_meta_data = {} - for k, v in data.items(): - algo_meta_data.update({k: v}) - + algo_meta_data = dict(data) return algo, algo_meta_data +def algo_from_json(filename: str, template_path: PathLike | None = None, **kwargs: Any) -> Any: + """ + Import the Algo object from a JSON file (pickle-free serialization). + + Args: + filename: the name of the saved file (algo_object.json or legacy algo_object.pkl). + template_path: a folder containing files to instantiate the Algo. Besides the `template_path`, + this function will also attempt to use the `template_path` saved in the file and a directory + named `algorithm_templates` in the parent folder of the folder containing the file. + kwargs: additional keyword arguments (reserved for future use). + + Returns: + algo: the Algo object saved in the file. + algo_meta_data: additional keyword saved in the file, for example, acc/best_metrics. + + Raises: + ValueError: if the file format is invalid or the Algo class cannot be instantiated. + ModuleNotFoundError: if it is unable to instantiate the Algo class. + """ + abs_filename = os.path.abspath(filename) + file_dir = os.path.dirname(abs_filename) + + # Check if this is a legacy pickle file + if filename.endswith(".pkl"): + warnings.warn( + "Loading from pickle format (.pkl) is deprecated and will be removed in a future release. " + "Please re-save your algo using algo_to_json() to convert to the new JSON format.", + FutureWarning, + stacklevel=2, + ) + return _load_legacy_pickle(filename, template_path) + + with open(filename, encoding="utf-8") as f: + data = json.load(f) + + if not isinstance(data, dict): + raise ValueError(f"The data object type is {type(data)}, type dict is expected.") + + file_template_path = data.pop("template_path", None) + + if "_target_" not in data: + raise ValueError(f"Invalid file format: missing '_target_' key in {filename}.") + + target = data.pop("_target_") + state = data.pop("_state_", {}) + + # Build template path candidates for sys.path setup + template_paths: list[str] = [] + _add_path_with_parent(template_paths, str(template_path) if template_path else None) + # Handle string "None" from corrupted files + if file_template_path and file_template_path != "None": + _add_path_with_parent(template_paths, file_template_path) + fuzzy_path = os.path.join(file_dir, "..", "algorithm_templates") + if os.path.isdir(fuzzy_path): + template_paths.append(os.path.abspath(fuzzy_path)) + + # Try each template path to instantiate the class + paths_to_try: list[str | None] = list(template_paths) if template_paths else [None] + algo = None + used_template_path: str | None = None + last_error: ModuleNotFoundError | None = None + for path in paths_to_try: + path_added = False + try: + if path and path not in sys.path: + sys.path.insert(0, path) + path_added = True + + algo_config: dict[str, Any] = {"_target_": target} + state_template_path = state.get("template_path") + if state_template_path: + algo_config["template_path"] = state_template_path + + parser = ConfigParser(algo_config) + algo = parser.get_parsed_content() + used_template_path = path + break + except ModuleNotFoundError as e: + last_error = e + logging.debug(f"Failed to instantiate {target} with path {path}: {e}") + continue + finally: + if path_added and path in sys.path: + sys.path.remove(path) + + if algo is None: + raise ValueError( + f"Failed to instantiate Algo from target '{target}' with paths {template_paths}" + ) from last_error + + # Restore the state + algo.load_state_dict(state) + + # Use the path that successfully imported the class, not the original saved path + # (the original path may no longer exist if the workdir was moved) + algo.template_path = used_template_path + + if file_dir != os.path.abspath(algo.get_output_path()): + logging.debug(f"{algo.get_output_path()} is changed. Now override the Algo output_path with: {file_dir}.") + algo.output_path = file_dir + + return algo, dict(data) + + def list_to_python_fire_arg_str(args: list) -> str: """ Convert a list of arguments to a string that can be used in python-fire. @@ -523,3 +672,41 @@ def _run_cmd_bcprun(cmd: str, **kwargs: Any) -> subprocess.CompletedProcess: cmd_list += [f"-{arg}", str(params.pop(arg))] cmd_list.extend(["-c", cmd]) return run_cmd(cmd_list, run_cmd_verbose=True, **params) + + +@deprecated(since="1.6", msg_suffix="Use algo_to_json instead. Pickle is unsafe; see MONAI_ALLOW_PICKLE.") +def algo_to_pickle(algo: Algo, template_path: PathLike | None = None, **algo_meta_data: Any) -> str: + """Export the Algo object to a pickle file. **Unsafe**; prefer ``algo_to_json``. + + Pickle can execute arbitrary code on load. This function is disabled unless the + environment variable ``MONAI_ALLOW_PICKLE=1`` is set, and emits a ``UserWarning`` + even when enabled. Use ``algo_to_json`` for safe, pickle-free serialization. + + Args: + algo: Algo-like object. + template_path: a str path that is needed to be added to ``sys.path`` to + instantiate the class on load. + algo_meta_data: additional keywords to save (e.g., acc/best_metrics). + + Returns: + Filename of the pickled Algo object. + """ + _require_pickle_allowed() + data = {"algo_bytes": pickle.dumps(algo), "template_path": str(template_path)} + pkl_filename = os.path.join(algo.get_output_path(), "algo_object.pkl") + for k, v in algo_meta_data.items(): + data.update({k: v}) + data_bytes = pickle.dumps(data) + with open(pkl_filename, "wb") as f_pi: + f_pi.write(data_bytes) + return pkl_filename + + +@deprecated(since="1.6", msg_suffix="Use algo_from_json instead. Pickle is unsafe; see MONAI_ALLOW_PICKLE.") +def algo_from_pickle(pkl_filename: str, template_path: PathLike | None = None, **kwargs: Any) -> Any: + """Import the Algo object from a pickle file. **Unsafe**; prefer ``algo_from_json``. + + Disabled unless ``MONAI_ALLOW_PICKLE=1`` is set. See ``_load_legacy_pickle`` for + template-path resolution details. + """ + return _load_legacy_pickle(pkl_filename, template_path) diff --git a/monai/utils/misc.py b/monai/utils/misc.py index 30ceec3ee5..ed48d4b37d 100644 --- a/monai/utils/misc.py +++ b/monai/utils/misc.py @@ -565,6 +565,15 @@ def extra_test_data() -> str | None: def testing_algo_template() -> str | None: return os.environ.get("MONAI_TESTING_ALGO_TEMPLATE", None) + @staticmethod + def allow_pickle() -> bool: + """If true, Auto3DSeg algo (de)serialization may use pickle. Default False. + + Pickle can execute arbitrary code on load and should only be enabled for files + from trusted sources. Prefer ``algo_to_json`` / ``algo_from_json``. + """ + return str2bool(os.environ.get("MONAI_ALLOW_PICKLE", "0")) + class ImageMetaKey: """ diff --git a/tests/auto3dseg/__init__.py b/tests/auto3dseg/__init__.py new file mode 100644 index 0000000000..1e97f89407 --- /dev/null +++ b/tests/auto3dseg/__init__.py @@ -0,0 +1,10 @@ +# Copyright (c) MONAI Consortium +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/tests/auto3dseg/test_json_serialization.py b/tests/auto3dseg/test_json_serialization.py new file mode 100644 index 0000000000..95177009da --- /dev/null +++ b/tests/auto3dseg/test_json_serialization.py @@ -0,0 +1,119 @@ +# Copyright (c) MONAI Consortium +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import os +import pickle +import tempfile +import unittest +from pathlib import Path +from unittest import mock + +import numpy as np +import torch + +from monai.auto3dseg.utils import ( + _add_path_with_parent, + _make_json_serializable, + algo_from_json, + algo_from_pickle, + algo_to_pickle, +) + + +class TestMakeJsonSerializable(unittest.TestCase): + def test_primitives(self) -> None: + assert _make_json_serializable(None) is None + assert _make_json_serializable("hello") == "hello" + assert _make_json_serializable(42) == 42 + assert _make_json_serializable(3.14) == 3.14 + assert _make_json_serializable(True) is True + + def test_collections(self) -> None: + assert _make_json_serializable([1, 2, 3]) == [1, 2, 3] + assert _make_json_serializable((1, 2)) == [1, 2] + assert _make_json_serializable({"a": 1}) == {"a": 1} + + def test_numpy(self) -> None: + arr = np.array([1, 2, 3]) + assert _make_json_serializable(arr) == [1, 2, 3] + assert _make_json_serializable(np.int64(5)) == 5 + assert _make_json_serializable(np.float32(2.5)) == 2.5 + + def test_torch_tensor(self) -> None: + t = torch.tensor([1.0, 2.0]) + result = _make_json_serializable(t) + assert result == [1.0, 2.0] + + def test_path(self) -> None: + assert _make_json_serializable(Path("/some/path")) == os.fspath(Path("/some/path")) + + def test_fallback(self) -> None: + class Custom: + def __str__(self) -> str: + return "custom" + + assert _make_json_serializable(Custom()) == "custom" + + +class TestAddPathWithParent(unittest.TestCase): + def test_valid_directory(self) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + paths: list[str] = [] + _add_path_with_parent(paths, tmpdir) + assert len(paths) == 2 + assert os.path.abspath(tmpdir) in paths + assert os.path.abspath(os.path.join(tmpdir, "..")) in paths + + def test_none_path(self) -> None: + paths: list[str] = [] + _add_path_with_parent(paths, None) + assert len(paths) == 0 + + def test_nonexistent_path(self) -> None: + paths: list[str] = [] + _add_path_with_parent(paths, "/nonexistent/path/12345") + assert len(paths) == 0 + + +class TestPickleGate(unittest.TestCase): + """Pickle (de)serialization is gated behind MONAI_ALLOW_PICKLE=1.""" + + def setUp(self) -> None: + patcher = mock.patch.dict(os.environ, {}, clear=False) + patcher.start() + os.environ.pop("MONAI_ALLOW_PICKLE", None) + self.addCleanup(patcher.stop) + + def test_algo_to_pickle_disabled_by_default(self) -> None: + with self.assertRaisesRegex(RuntimeError, "MONAI_ALLOW_PICKLE"): + algo_to_pickle(object()) # type: ignore[arg-type] + + def test_algo_from_pickle_disabled_by_default(self) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + pkl = os.path.join(tmpdir, "algo_object.pkl") + with open(pkl, "wb") as f: + f.write(b"not used") + with self.assertRaisesRegex(RuntimeError, "MONAI_ALLOW_PICKLE"): + algo_from_pickle(pkl) + + def test_algo_from_json_legacy_pkl_disabled_by_default(self) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + pkl = os.path.join(tmpdir, "algo_object.pkl") + with open(pkl, "wb") as f: + pickle.dump({"algo_bytes": b"x", "template_path": None}, f) + with self.assertRaisesRegex(RuntimeError, "MONAI_ALLOW_PICKLE"): + algo_from_json(pkl) + + +if __name__ == "__main__": + unittest.main()