nextgenusfs · nextgenusfs · Apr 22, 2026 · Apr 22, 2026 · Apr 22, 2026
diff --git a/funannotate2/align.py b/funannotate2/align.py
@@ -1,5 +1,6 @@
 import os
 import sys
+import tempfile
 import uuid
 
 from gapmm2.align import aligner as transcript_aligner
@@ -66,7 +67,7 @@ def align_transcripts(
     cpus=1,
     max_intron=3000,
     log=sys.stderr.write,
-    tmpdir="/tmp",
+    tmpdir=None,
 ):
     """
     Run spliced transcript alignment using gapmm2 (mm2 + edlib refinement).
@@ -82,7 +83,7 @@ def align_transcripts(
         cpus (int, optional): Number of CPUs to use for alignment. Defaults to 1.
         max_intron (int, optional): Maximum intron length allowed. Defaults to 3000.
         log (function, optional): Logging function. Defaults to sys.stderr.write.
-        tmpdir (str, optional): Temporary directory path. Defaults to "/tmp".
+        tmpdir (str, optional): Accepted for API parity with ``align_proteins``; currently unused because the gapmm2 backend writes directly to ``output=evidence``. Defaults to ``None`` (resolves to ``tempfile.gettempdir()`` when relied upon).
     """
     # function to run spliced transcript alignment using gapmm2 (mm2 + edlib refinement)
     # generate an EVM compatible GFF alignment file, parse data and pull out any full length gene models
@@ -114,7 +115,7 @@ def align_proteins(
     cpus=1,
     max_intron=3000,
     log=sys.stderr.write,
-    tmpdir="/tmp",
+    tmpdir=None,
 ):
     """
     Align protein evidence to the genome assembly using miniprot.
@@ -129,13 +130,15 @@ def align_proteins(
         cpus (int, optional): Number of CPUs to use for alignment. Defaults to 1.
         max_intron (int, optional): Maximum intron length for alignment. Defaults to 3000.
         log (function, optional): Logger function for info and error messages. Defaults to sys.stderr.write.
-        tmpdir (str, optional): Temporary directory path for storing intermediate files. Defaults to "/tmp".
+        tmpdir (str, optional): Temporary directory path for storing intermediate files. Defaults to the system temporary directory (``tempfile.gettempdir()``), which honors ``$TMPDIR``.
 
     Returns:
         None
     """
     # function to align protein evidence to genome with miniprot
     # generate evidence alignments and then parse any full length models
+    if tmpdir is None:
+        tmpdir = tempfile.gettempdir()
     mini_tmp = os.path.join(tmpdir, f"{uuid.uuid1()}.miniprot.out")
     # miniprot --outn=4 --gff-only -t 8 valid_contigs.fasta /usr/local/share/funannotate/uniprot_sprot.fasta > uniprot.mini.gff3
     log.info("Aligning protein evidence to the genome assembly with miniprot")

diff --git a/funannotate2/annotate.py b/funannotate2/annotate.py
@@ -310,7 +310,7 @@ def annotate(args):
             "Annotating proteome with diamond against the UniProtKB/Swiss-Prot database"
         )
         start = time.time()
-        swiss = swissprot_blast(Proteins, cpus=args.cpus)
+        swiss = swissprot_blast(Proteins, cpus=args.cpus, tmpdir=tmp_dir)
         end = time.time()
         logger.info(
             f"UniProtKB/Swiss-Prot search resulted in {len(swiss)} hits and finished in {round(end - start, 2)} seconds"
@@ -330,7 +330,7 @@ def annotate(args):
             "Annotating proteome with diamond against the MEROPS protease database"
         )
         start = time.time()
-        merops = merops_blast(Proteins, cpus=args.cpus)
+        merops = merops_blast(Proteins, cpus=args.cpus, tmpdir=tmp_dir)
         end = time.time()
         logger.info(
             f"MEROPS search resulted in {len(merops)} hits and finished in {round(end - start, 2)} seconds"

diff --git a/funannotate2/predict.py b/funannotate2/predict.py
@@ -271,6 +271,7 @@ def predict(args):
             cpus=args.cpus,
             max_intron=args.max_intron,
             log=logger,
+            tmpdir=tmp_dir,
         )
     elif checkfile(TranAlign) and checkfile(TranGenes):
         log("Existing transcript alignments found, will re-use and continue")
@@ -304,6 +305,7 @@ def predict(args):
             cpus=args.cpus,
             max_intron=args.max_intron,
             log=logger,
+            tmpdir=tmp_dir,
         )
     elif checkfile(ProtAlign) and checkfile(ProtGenes):
         log("Existing protein alignments found, will re-use and continue")

diff --git a/funannotate2/search.py b/funannotate2/search.py
@@ -3,6 +3,7 @@
 import re
 import subprocess
 import sys
+import tempfile
 import uuid
 
 import json_repair
@@ -316,7 +317,7 @@ def dbcan2tsv(results, output, annots):
     return a
 
 
-def diamond_blast(db, query, cpus=1, evalue=10.0, max_target_seqs=1, tmpdir="/tmp"):
+def diamond_blast(db, query, cpus=1, evalue=10.0, max_target_seqs=1, tmpdir=None):
     """
     Run a protein BLAST using Diamond.
 
@@ -328,12 +329,14 @@ def diamond_blast(db, query, cpus=1, evalue=10.0, max_target_seqs=1, tmpdir="/tm
         cpus (int, optional): Number of CPUs to use. Defaults to 1.
         evalue (float, optional): E-value threshold. Defaults to 10.0.
         max_target_seqs (int, optional): Maximum number of target sequences to report. Defaults to 1.
-        tmpdir (str, optional): Temporary directory to store intermediate files. Defaults to "/tmp".
+        tmpdir (str, optional): Temporary directory to store intermediate files. Defaults to the system temporary directory (``tempfile.gettempdir()``), which honors ``$TMPDIR``.
 
     Returns:
         list: JSON formatted results of the BLAST search.
     """
     # use diamond as protein blast
+    if tmpdir is None:
+        tmpdir = tempfile.gettempdir()
     tmpfile = os.path.join(tmpdir, f"diamond_{uuid.uuid4()}")
     cmd = [
         "diamond",
@@ -377,7 +380,7 @@ def diamond_blast(db, query, cpus=1, evalue=10.0, max_target_seqs=1, tmpdir="/tm
     return results
 
 
-def merops_blast(query, evalue=1e-5, cpus=1, max_target_seqs=1):
+def merops_blast(query, evalue=1e-5, cpus=1, max_target_seqs=1, tmpdir=None):
     """
     Run a BLAST search against the MEROPS database using Diamond.
 
@@ -388,6 +391,7 @@ def merops_blast(query, evalue=1e-5, cpus=1, max_target_seqs=1):
         evalue (float, optional): E-value threshold for reporting hits. Defaults to 1e-5.
         cpus (int, optional): Number of CPUs to use. Defaults to 1.
         max_target_seqs (int, optional): Maximum number of target sequences to report. Defaults to 1.
+        tmpdir (str, optional): Temporary directory forwarded to ``diamond_blast`` for intermediate files. Defaults to the system temporary directory.
 
     Returns:
         list: A list of dictionaries containing information about the hits found, including coverage and family details.
@@ -397,7 +401,12 @@ def merops_blast(query, evalue=1e-5, cpus=1, max_target_seqs=1):
     if checkfile(merops_db):
         results = []
         raw_results = diamond_blast(
-            merops_db, query, cpus=cpus, evalue=evalue, max_target_seqs=max_target_seqs
+            merops_db,
+            query,
+            cpus=cpus,
+            evalue=evalue,
+            max_target_seqs=max_target_seqs,
+            tmpdir=tmpdir,
         )
         for res in raw_results:
             coverage = (res["length"] / res["qlen"]) * 100
@@ -440,7 +449,13 @@ def merops2tsv(results, output, annots):
 
 
 def swissprot_blast(
-    query, evalue=1e-5, cpus=1, min_pident=60, min_cov=60, max_target_seqs=1
+    query,
+    evalue=1e-5,
+    cpus=1,
+    min_pident=60,
+    min_cov=60,
+    max_target_seqs=1,
+    tmpdir=None,
 ):
     """
     Perform a BLAST search against the SwissProt database using Diamond.
@@ -454,6 +469,7 @@ def swissprot_blast(
         min_pident (int, optional): Minimum percentage identity required for a hit. Defaults to 60.
         min_cov (int, optional): Minimum coverage percentage required for a hit. Defaults to 60.
         max_target_seqs (int, optional): Maximum number of target sequences to report. Defaults to 1.
+        tmpdir (str, optional): Temporary directory forwarded to ``diamond_blast`` for intermediate files. Defaults to the system temporary directory.
 
     Returns:
         list: A list of dictionaries containing information about the filtered hits, including coverage, percentage identity, and alignment details.
@@ -463,7 +479,12 @@ def swissprot_blast(
     if checkfile(uniprot_db):
         results = []
         raw_results = diamond_blast(
-            uniprot_db, query, cpus=cpus, evalue=evalue, max_target_seqs=max_target_seqs
+            uniprot_db,
+            query,
+            cpus=cpus,
+            evalue=evalue,
+            max_target_seqs=max_target_seqs,
+            tmpdir=tmpdir,
         )
         for res in raw_results:
             if res["pident"] >= min_pident:

diff --git a/funannotate2/utilities.py b/funannotate2/utilities.py
@@ -9,6 +9,7 @@
 import socket
 import subprocess
 import sys
+import tempfile
 import textwrap
 import time
 import uuid
@@ -531,32 +532,27 @@ def human_readable_size(size, decimal_places=2):
 
 def create_tmpdir(outdir, base="predict"):
     """
-    Create a temporary directory for storing files.
+    Create a unique temporary directory for storing intermediate files.
 
-    This function generates a temporary directory with a unique name based on the provided
-    base name and a UUID. If an output directory is specified, the temporary directory is
-    created within it. If the output directory is "/tmp", a subdirectory is created; otherwise,
-    the specified directory is used directly. If no output directory is provided, the temporary
-    directory is created in the current working directory.
+    A ``<base>_<uuid>`` subdirectory is always created inside the supplied
+    volume (``outdir``). If ``outdir`` is falsy, the system temporary directory
+    (``tempfile.gettempdir()``, which honors the ``$TMPDIR`` environment
+    variable) is used as the parent. Using a unique subdirectory avoids
+    collisions between concurrent runs sharing a scratch volume and prevents
+    downstream cleanup from inadvertently removing a user-supplied directory.
 
     Parameters:
-    - outdir (str): The output directory path.
+    - outdir (str or None): The parent directory under which the unique tmpdir
+      will be created. If falsy, ``tempfile.gettempdir()`` is used.
     - base (str, optional): The base name for the temporary directory (default is "predict").
 
     Returns:
     - str: The absolute path of the created temporary directory.
     """
-    # create a tmpdir for some files
     tmpdirslug = "{}_{}".format(base, str(uuid.uuid4()))
-    if outdir:
-        if outdir == "/tmp":
-            tmpdir = os.path.join(outdir, tmpdirslug)
-        else:
-            tmpdir = outdir
-    else:
-        tmpdir = tmpdirslug
-    if not os.path.isdir(tmpdir):
-        os.makedirs(tmpdir)
+    parent = outdir if outdir else tempfile.gettempdir()
+    tmpdir = os.path.join(parent, tmpdirslug)
+    os.makedirs(tmpdir, exist_ok=True)
     return os.path.abspath(tmpdir)
 
 

diff --git a/tests/unit/test_predict.py b/tests/unit/test_predict.py
@@ -317,3 +317,73 @@ def fake_run_snap(*args, **kwargs):
         assert not any("memory prediction for scaffold_1.fasta" in message for message in debug_messages)
         assert stats["tools_run"] == ["snap"]
         assert stats["contig_length"] == 4
+
+
+
+class TestAlignProteinsTmpdir:
+    """Tests verifying align_proteins honors the tmpdir kwarg (issue #52 follow-up)."""
+
+    def _capture_stdout_path(self):
+        captured = {}
+
+        def fake_run_subprocess(cmd, log, stdout=None, **kwargs):
+            captured["stdout"] = stdout
+            captured["cmd"] = cmd
+            return 0
+
+        return captured, fake_run_subprocess
+
+    @patch("funannotate2.align.os.remove")
+    @patch("funannotate2.align.split_evidence_and_genes")
+    @patch("funannotate2.align.runSubprocess")
+    def test_align_proteins_uses_provided_tmpdir(
+        self, mock_run_subprocess, mock_split, mock_remove, tmp_path
+    ):
+        from funannotate2.align import align_proteins
+
+        captured, fake = self._capture_stdout_path()
+        mock_run_subprocess.side_effect = fake
+        mock_split.return_value = (0, 0)
+
+        logger = MagicMock()
+        align_proteins(
+            "genome.fasta",
+            "proteins.fasta",
+            "evidence.gff3",
+            "genes.gff3",
+            cpus=1,
+            log=logger,
+            tmpdir=str(tmp_path),
+        )
+
+        assert captured["stdout"] is not None
+        assert captured["stdout"].startswith(str(tmp_path) + os.sep)
+        assert captured["stdout"].endswith(".miniprot.out")
+
+    @patch("funannotate2.align.os.remove")
+    @patch("funannotate2.align.split_evidence_and_genes")
+    @patch("funannotate2.align.runSubprocess")
+    def test_align_proteins_defaults_to_system_tmp(
+        self, mock_run_subprocess, mock_split, mock_remove
+    ):
+        import tempfile
+
+        from funannotate2.align import align_proteins
+
+        captured, fake = self._capture_stdout_path()
+        mock_run_subprocess.side_effect = fake
+        mock_split.return_value = (0, 0)
+
+        logger = MagicMock()
+        align_proteins(
+            "genome.fasta",
+            "proteins.fasta",
+            "evidence.gff3",
+            "genes.gff3",
+            cpus=1,
+            log=logger,
+        )
+
+        assert captured["stdout"] is not None
+        assert captured["stdout"].startswith(tempfile.gettempdir() + os.sep)
+        assert captured["stdout"].endswith(".miniprot.out")