Merge pull request #705 from MannLabs/improve_reuse_quant

mschwoer · web-flow · commit f44507a67ac5 · 2025-10-27T09:56:13.000+01:00
Improve reuse quant
diff --git a/alphadia/cli.py b/alphadia/cli.py
@@ -159,7 +159,7 @@ def _get_config_from_args(
         try:
             _recursive_update(config, json.loads(args.config_dict))
         except Exception as e:
-            print(f"Could not parse config update: {e}")
+            raise ValueError(f"Could not parse config dict: {e}") from e
 
     return config, args.config, args.config_dict
 
diff --git a/alphadia/search_step.py b/alphadia/search_step.py
@@ -107,7 +107,7 @@ def _save_config(self, output_folder: str) -> None:
         file_path = os.path.join(output_folder, "frozen_config.yaml")
         moved_path = move_existing_file(file_path)
         self._config.to_yaml(file_path)
-        if moved_path:
+        if moved_path != file_path:
             logging.info(f"Moved existing config file {file_path} to {moved_path}")
 
     @staticmethod
@@ -434,12 +434,13 @@ def run(
                         for file_name in required_files
                     ):
                         logger.info(
-                            f"reuse_quant: found existing quantification for {raw_name}, skipping processing .."
+                            f"general.reuse_quant: found existing quantification for {raw_name}, skipping processing .."
                         )
                         is_already_processed = True
-                    logger.info(
-                        f"reuse_quant: found no existing quantification for {raw_name}, proceeding with processing .."
-                    )
+                    else:
+                        logger.warning(
+                            f"general.reuse_quant: found no existing quantification for {raw_name}, proceeding with processing .."
+                        )
 
                 if not is_already_processed:
                     self._process_raw_file(workflow, dia_path, speclib)
diff --git a/docs/guides/libfree-gui.md b/docs/guides/libfree-gui.md
@@ -9,7 +9,7 @@ Also ensure the right execution engine has been selected and your version is up
 <img src="../_static/images/libfree-gui-v1.10.1/initial_engine.png" width="100%" height="auto">
 
 ## 2. Project Structure
-In this workflow, we will perform a DIA search that handles both library generation and cross-sample quantification (Match Between Runs) automatically. We will not use transfer learning in this workflow as we are looking for unmodified peptides from isntruments well supported by the default PeptDeep model.
+In this workflow, we will perform a DIA search that handles both library generation and cross-sample quantification (Match Between Runs) automatically. We will not use transfer learning in this workflow as we are looking for unmodified peptides from instruments well supported by the default PeptDeep model.
 
 Start by preparing a single output folder for your analysis results.
 
diff --git a/docs/methods/command-line.md b/docs/methods/command-line.md
@@ -121,3 +121,34 @@ alphadia \
     --config config_astral_first_pass.yaml \
     --config-dict "{\"library_prediction\":{\"nce\":26}}"
 ```
+
+## Advanced
+
+### Restarting
+During the main search, alphaDIA processes each raw file independently.
+After each file, quantification results are saved to `<output_folder>/quant/<raw_file_name>`,
+which can be used as a checkpoint in case the processing is interrupted.
+
+The config switch `general.reuse_quant` enables skipping raw file processing
+when quantification results already exist, which is useful for
+distributed searches or for re-running the consensus step with protein inference, FDR and LFQ quantification with different parameters.
+
+When enabled: Before processing each raw file, checks if quantification results already exist.
+If so, skips processing entirely and reuses existing quantification.
+If not, the file is being searched.
+After all quantifications are available, the workflow continues normally, combining results from all files.
+This way, an alphaDIA run that failed at file 9/10 (e.g. due to a cluster timeout) can simply be restarted,
+as only the missing files (9 and 10) will be processed.
+
+The `--quant-dir` CLI parameter (Config: `quant_directory`, default: null)
+can be used to specify the directory containing quantification results.
+
+On startup, the current configuration is dumped as `frozen_config.yaml`, which contains all information to reproduce this run.
+
+Combining these three concepts, here's an example how to reuse an existing quantification (from the `previous_run` directory), but create additional
+output (`peptide_level_lfq`)
+```
+alphadia -o ./output_dir --quant-dir ./previous_run/quant --config ./previous_run/frozen_config.yaml --config-dict '{"general": {"reuse_quant": "True"}, "search_output": {"peptide_level_lfq": "True"}}'
+```
+
+Cf. also the documentation on [distributed search](./dist_search_setup.md).
diff --git a/tests/unit_tests/test_cli.py b/tests/unit_tests/test_cli.py
@@ -104,7 +104,16 @@ def test_cli_unknown_args(
 @patch("alphadia.cli.parser.parse_known_args")
 def test_cli_minimal_args(mock_parse_known_args):
     """Test the run function of the CLI with minimal arguments maps correctly to SearchPlan."""
-    mock_args = MagicMock(config=None, version=None, check=None, output="/output")
+    mock_args = MagicMock(
+        config=None,
+        version=None,
+        check=None,
+        output="/output",
+        config_dict="{}",
+        file=[],
+        directory=[],
+        regex=".*",
+    )
     mock_parse_known_args.return_value = (mock_args, [])
 
     mock_search_plan = MagicMock()
@@ -147,6 +156,10 @@ def test_cli_minimal_args_all_none(mock_parse_known_args):
         fasta=None,
         library=None,
         quant_dir=None,
+        config_dict="{}",
+        file=[],
+        directory=[],
+        regex=".*",
     )
     mock_parse_known_args.return_value = (mock_args, [])
 
diff --git a/tests/unit_tests/workflow/test_config.py b/tests/unit_tests/workflow/test_config.py
@@ -90,9 +90,6 @@ def test_config_update_simple_two_files():
     # when
     config_1.update([config_2, config_3], do_print=True)
 
-    config_1.__repr__()
-    print("X")
-
     assert config_1 == expected_generic_default_config_dict | {
         "simple_value_int": 2,
         "simple_value_float": 5.0,