From f5a2972e548da4e035739c6975d6af631953fd6b Mon Sep 17 00:00:00 2001 From: freerkvandijk Date: Tue, 7 Apr 2026 15:25:45 +0200 Subject: [PATCH 01/25] Initial commit fgumi implementation --- assets/schema_input.json | 2 +- assets/schema_sampleinfo.json | 2 +- conf/modules.config | 118 ++++++++++ main.nf | 27 +++ modules/local/fgumi/duplexmetrics/main.nf | 35 +++ modules/local/fgumi/extract/main.nf | 40 ++++ modules/local/fgumi/filter/main.nf | 48 ++++ modules/local/fgumi/group/main.nf | 39 ++++ modules/local/fgumi/simplex/main.nf | 37 ++++ modules/local/fgumi/snapzippersort/main.nf | 66 ++++++ nextflow.config | 16 ++ nextflow_schema.json | 77 +++++++ .../local/fastq_to_aligned_cram/main.nf | 79 ++++++- tests/inputs/fgumi/R1.fastq.gz | 4 + tests/inputs/fgumi/R2.fastq.gz | 4 + tests/inputs/fgumi/consensus.bam | 1 + tests/inputs/fgumi/grouped.bam | 1 + tests/inputs/fgumi/ref.dict | 2 + tests/inputs/fgumi/ref.fa | 2 + tests/inputs/fgumi/ref.fa.fai | 1 + .../inputs/fgumi/snap_index/OverflowTable.txt | 1 + tests/inputs/fgumi/template.bam | 1 + tests/inputs/fgumi/unmapped.bam | 1 + tests/inputs/test.yml | 15 ++ .../local/fgumi/duplexmetrics/main.nf.test | 33 +++ .../fgumi/duplexmetrics/main.nf.test.snap | 43 ++++ .../modules/local/fgumi/extract/main.nf.test | 35 +++ .../local/fgumi/extract/main.nf.test.snap | 47 ++++ tests/modules/local/fgumi/filter/main.nf.test | 33 +++ .../local/fgumi/filter/main.nf.test.snap | 75 +++++++ tests/modules/local/fgumi/group/main.nf.test | 32 +++ .../local/fgumi/group/main.nf.test.snap | 75 +++++++ .../modules/local/fgumi/simplex/main.nf.test | 32 +++ .../local/fgumi/simplex/main.nf.test.snap | 59 +++++ .../local/fgumi/snapzippersort/main.nf.test | 35 +++ .../fgumi/snapzippersort/main.nf.test.snap | 45 ++++ .../fgumi_umi_stub.nf.test | 58 +++++ .../fgumi_umi_stub.nf.test.snap | 205 ++++++++++++++++++ .../local/fastq_to_aligned_cram/main.nf.test | 46 ++++ workflows/preprocessing.nf | 8 + 40 files changed, 1475 insertions(+), 5 deletions(-) create mode 100644 modules/local/fgumi/duplexmetrics/main.nf create mode 100644 modules/local/fgumi/extract/main.nf create mode 100644 modules/local/fgumi/filter/main.nf create mode 100644 modules/local/fgumi/group/main.nf create mode 100644 modules/local/fgumi/simplex/main.nf create mode 100644 modules/local/fgumi/snapzippersort/main.nf create mode 100644 tests/inputs/fgumi/R1.fastq.gz create mode 100644 tests/inputs/fgumi/R2.fastq.gz create mode 100644 tests/inputs/fgumi/consensus.bam create mode 100644 tests/inputs/fgumi/grouped.bam create mode 100644 tests/inputs/fgumi/ref.dict create mode 100644 tests/inputs/fgumi/ref.fa create mode 100644 tests/inputs/fgumi/ref.fa.fai create mode 100644 tests/inputs/fgumi/snap_index/OverflowTable.txt create mode 100644 tests/inputs/fgumi/template.bam create mode 100644 tests/inputs/fgumi/unmapped.bam create mode 100644 tests/modules/local/fgumi/duplexmetrics/main.nf.test create mode 100644 tests/modules/local/fgumi/duplexmetrics/main.nf.test.snap create mode 100644 tests/modules/local/fgumi/extract/main.nf.test create mode 100644 tests/modules/local/fgumi/extract/main.nf.test.snap create mode 100644 tests/modules/local/fgumi/filter/main.nf.test create mode 100644 tests/modules/local/fgumi/filter/main.nf.test.snap create mode 100644 tests/modules/local/fgumi/group/main.nf.test create mode 100644 tests/modules/local/fgumi/group/main.nf.test.snap create mode 100644 tests/modules/local/fgumi/simplex/main.nf.test create mode 100644 tests/modules/local/fgumi/simplex/main.nf.test.snap create mode 100644 tests/modules/local/fgumi/snapzippersort/main.nf.test create mode 100644 tests/modules/local/fgumi/snapzippersort/main.nf.test.snap create mode 100644 tests/subworkflows/local/fastq_to_aligned_cram/fgumi_umi_stub.nf.test create mode 100644 tests/subworkflows/local/fastq_to_aligned_cram/fgumi_umi_stub.nf.test.snap diff --git a/assets/schema_input.json b/assets/schema_input.json index 4a7bd527..6c3931a2 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -50,7 +50,7 @@ "umi_aware": { "meta": ["umi_aware"], "type": "boolean", - "description": "Run markdup in UMI-aware mode. This applies to Samtools only and requires the UMI to be in the read name.", + "description": "Enable UMI-aware processing through the fgumi consensus branch.", "default": false }, "skip_trimming": { diff --git a/assets/schema_sampleinfo.json b/assets/schema_sampleinfo.json index dee4c572..15571ba1 100644 --- a/assets/schema_sampleinfo.json +++ b/assets/schema_sampleinfo.json @@ -90,7 +90,7 @@ "umi_aware": { "meta": ["umi_aware"], "type": "boolean", - "description": "Run markdup in UMI-aware mode. This applies to Samtools only and requires the UMI to be in the read name.", + "description": "Enable UMI-aware processing through the fgumi consensus branch.", "default": false }, "skip_trimming": { diff --git a/conf/modules.config b/conf/modules.config index becf78e8..af3612d1 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -231,6 +231,124 @@ process { } } + //// FGUMI extract (step 1) + withName: '.*FASTQ_TO_CRAM:FGUMI_EXTRACT' { + cpus = 8 + memory = 32.GB + ext.prefix = { "${meta.id}.fgumi.unmapped" } + ext.args = { + [ + "--read-structures ${params.fgumi_read_structures}", + params.fgumi_extract_umis_from_read_names ? "--extract-umis-from-read-names" : "", + "--threads ${task.cpus}", + "--queue-memory ${params.fgumi_queue_memory}", + params.fgumi_queue_memory_per_thread ? "--queue-memory-per-thread" : "", + "--compression-level ${params.fgumi_compression_level}", + ].join(" ").trim() + } + } + + //// FGUMI fastq | SNAP | zipper | template sort (step 3) + withName: '.*FASTQ_TO_CRAM:FGUMI_SNAP_ZIPPER_SORT' { + cpus = 16 + memory = 64.GB + ext.prefix = { "${meta.id}.fgumi" } + ext.args = { + [ + params.fgumi_snap_ignore_mismatched_pairs ? "-I" : "", + "${params.fgumi_snap_extra_args}", + ].join(" ").trim() + } + ext.args2 = { + [ + "--threads ${task.cpus}", + ].join(" ").trim() + } + ext.args3 = { + [ + "--threads ${task.cpus}", + "--max-memory ${params.fgumi_sort_max_memory}", + "--memory-per-thread=${params.fgumi_sort_memory_per_thread ? 'true' : 'false'}", + ].join(" ").trim() + } + ext.args4 = { + [ + "--threads ${task.cpus}", + "--max-memory ${params.fgumi_sort_max_memory}", + "--memory-per-thread=${params.fgumi_sort_memory_per_thread ? 'true' : 'false'}", + ].join(" ").trim() + } + ext.args5 = { + [ + "-@ ${task.cpus}", + ].join(" ").trim() + } + } + + //// FGUMI group (step 4) + withName: '.*FASTQ_TO_CRAM:FGUMI_GROUP' { + cpus = 8 + memory = 32.GB + ext.prefix = { "${meta.id}.fgumi.group" } + ext.args = { + [ + "--strategy ${params.fgumi_group_strategy}", + "--edits ${params.fgumi_group_edits}", + "--threads ${task.cpus}", + "--queue-memory ${params.fgumi_queue_memory}", + params.fgumi_queue_memory_per_thread ? "--queue-memory-per-thread" : "", + "--compression-level ${params.fgumi_compression_level}", + "--grouping-metrics ${meta.id}.fgumi.group.grouping_metrics.txt", + "--family-size-histogram ${meta.id}.fgumi.group.family_size_histogram.txt", + ].join(" ").trim() + } + } + + //// FGUMI simplex (step 5) + withName: '.*FASTQ_TO_CRAM:FGUMI_SIMPLEX' { + cpus = 8 + memory = 32.GB + ext.prefix = { "${meta.id}.fgumi.simplex" } + ext.args = { + [ + "--min-reads ${params.fgumi_simplex_min_reads}", + "--threads ${task.cpus}", + "--queue-memory ${params.fgumi_queue_memory}", + params.fgumi_queue_memory_per_thread ? "--queue-memory-per-thread" : "", + "--compression-level ${params.fgumi_compression_level}", + "--stats ${meta.id}.fgumi.simplex.consensus_metrics.txt", + ].join(" ").trim() + } + } + + //// FGUMI duplex-metrics (step 6) + withName: '.*FASTQ_TO_CRAM:FGUMI_DUPLEX_METRICS' { + cpus = 2 + memory = 8.GB + ext.prefix = { "${meta.id}.fgumi" } + ext.args = "${params.fgumi_duplex_metrics_extra_args}" + } + + //// FGUMI filter + coordinate sort/index (step 7) + withName: '.*FASTQ_TO_CRAM:FGUMI_FILTER' { + cpus = 8 + memory = 32.GB + ext.prefix = { "${meta.id}.fgumi.filter" } + ext.args = { + [ + "--min-reads ${params.fgumi_filter_min_reads}", + "--stats ${meta.id}.fgumi.filter.filtering_metrics.txt", + ].join(" ").trim() + } + ext.args2 = { + [ + "--threads ${task.cpus}", + "--max-memory ${params.fgumi_sort_max_memory}", + "--memory-per-thread=${params.fgumi_sort_memory_per_thread ? 'true' : 'false'}", + ].join(" ").trim() + } + } + // coverage //// Mosdepth withName: '.*COVERAGE:MOSDEPTH' { diff --git a/main.nf b/main.nf index 64e8d3db..e0c1f1fa 100644 --- a/main.nf +++ b/main.nf @@ -79,6 +79,11 @@ workflow { rna_junctions = PREPROCESSING.out.rna_junctions align_reports = PREPROCESSING.out.align_reports sormadup_metrics = PREPROCESSING.out.sormadup_metrics + // Additional UMI consensus outputs. + family_size_histogram = PREPROCESSING.out.family_size_histogram + umi_filtered_consensus_bam = PREPROCESSING.out.umi_filtered_consensus_bam + umi_duplex_metrics = PREPROCESSING.out.umi_duplex_metrics + umi_crams = PREPROCESSING.out.umi_crams mosdepth_global = PREPROCESSING.out.mosdepth_global mosdepth_summary = PREPROCESSING.out.mosdepth_summary mosdepth_regions = PREPROCESSING.out.mosdepth_regions @@ -176,6 +181,28 @@ output { metrics >> (meta.library ? "${meta.library}/${meta.samplename}/${meta.samplename}.duplicate_metrics.txt" : "${meta.samplename}/${meta.samplename}.duplicate_metrics.txt") } } + // UMI consensus artefacts are published per sample next to CRAM outputs. + family_size_histogram { + path { meta, histogram -> + histogram >> (meta.library ? "${meta.library}/${meta.samplename}/${histogram.name}" : "${meta.samplename}/${histogram.name}") + } + } + umi_filtered_consensus_bam { + path { meta, bam -> + bam >> (meta.library ? "${meta.library}/${meta.samplename}/${bam.name}" : "${meta.samplename}/${bam.name}") + } + } + umi_duplex_metrics { + path { meta, _file -> + return (meta.library ? "${meta.library}/${meta.samplename}/" : "${meta.samplename}/") + } + } + umi_crams { + path { meta, cram, crai -> + cram >> (meta.library ? "${meta.library}/${meta.samplename}/${meta.samplename}.umi.cram" : "${meta.samplename}/${meta.samplename}.umi.cram") + crai >> (meta.library ? "${meta.library}/${meta.samplename}/${meta.samplename}.umi.cram.crai" : "${meta.samplename}/${meta.samplename}.umi.cram.crai") + } + } mosdepth_global { path { meta, _file -> return (meta.library ? "${meta.library}/${meta.samplename}/" : "${meta.samplename}/") diff --git a/modules/local/fgumi/duplexmetrics/main.nf b/modules/local/fgumi/duplexmetrics/main.nf new file mode 100644 index 00000000..14954864 --- /dev/null +++ b/modules/local/fgumi/duplexmetrics/main.nf @@ -0,0 +1,35 @@ +process FGUMI_DUPLEX_METRICS { + tag "$meta.id" + label 'process_low' + + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/95/954170443a820787c9e02ef2135ebb8ec29c6b03633b0d61b5fafa98c59a1cce/data' + : 'community.wave.seqera.io/library/fgumi_r-base_r-ggplot2_r-scales:09c99070b82c1c28'}" + + input: + tuple val(meta), path(bam) + + output: + tuple val(meta), path("${prefix}.duplex_metrics*"), emit: duplex_metrics + tuple val("${task.process}"), val('fgumi'), eval("fgumi --version | sed 's/^fgumi //;q'"), topic: versions, emit: versions_fgumi + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}.fgumi" + + """ + fgumi duplex-metrics \ + --input ${bam} \ + --output ${prefix}.duplex_metrics \ + ${args} + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}.fgumi" + """ + touch ${prefix}.duplex_metrics.txt + """ +} diff --git a/modules/local/fgumi/extract/main.nf b/modules/local/fgumi/extract/main.nf new file mode 100644 index 00000000..bd4294fc --- /dev/null +++ b/modules/local/fgumi/extract/main.nf @@ -0,0 +1,40 @@ +process FGUMI_EXTRACT { + tag "$meta.id" + label 'process_medium' + + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/95/954170443a820787c9e02ef2135ebb8ec29c6b03633b0d61b5fafa98c59a1cce/data' + : 'community.wave.seqera.io/library/fgumi_r-base_r-ggplot2_r-scales:09c99070b82c1c28'}" + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path("${prefix}.bam"), emit: bam + tuple val("${task.process}"), val('fgumi'), eval("fgumi --version | sed 's/^fgumi //;q'"), topic: versions, emit: versions_fgumi + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}.fgumi.unmapped" + def sample_name = meta.samplename ?: meta.id + def library_name = meta.library ?: meta.id + def input_files = (reads instanceof List ? reads : [reads]).collect { read -> "${read}" }.join(' ') + + """ + fgumi extract \ + --inputs ${input_files} \ + --output ${prefix}.bam \ + --sample "${sample_name}" \ + --library "${library_name}" \ + ${args} + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}.fgumi.unmapped" + """ + touch ${prefix}.bam + """ +} diff --git a/modules/local/fgumi/filter/main.nf b/modules/local/fgumi/filter/main.nf new file mode 100644 index 00000000..c35bd7cc --- /dev/null +++ b/modules/local/fgumi/filter/main.nf @@ -0,0 +1,48 @@ +process FGUMI_FILTER { + tag "$meta.id" + label 'process_medium' + + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/95/954170443a820787c9e02ef2135ebb8ec29c6b03633b0d61b5fafa98c59a1cce/data' + : 'community.wave.seqera.io/library/fgumi_r-base_r-ggplot2_r-scales:09c99070b82c1c28'}" + + input: + tuple val(meta), path(bam), path(fasta) + + output: + tuple val(meta), path("${prefix}.bam"), emit: bam + tuple val(meta), path("${prefix}.bam.bai"), emit: bai + tuple val(meta), path("${prefix}.filtering_metrics.txt"), optional: true, emit: filtering_metrics + tuple val("${task.process}"), val('fgumi'), eval("fgumi --version | sed 's/^fgumi //;q'"), topic: versions, emit: versions_fgumi + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def sort_args = task.ext.args2 ?: '' + prefix = task.ext.prefix ?: "${meta.id}.fgumi.filter" + + """ + fgumi filter \ + --input ${bam} \ + --output ${prefix}.filtered.bam \ + --ref ${fasta} \ + ${args} + + fgumi sort \ + --input ${prefix}.filtered.bam \ + --output ${prefix}.bam \ + --order coordinate \ + --write-index \ + ${sort_args} + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}.fgumi.filter" + """ + touch ${prefix}.bam + touch ${prefix}.bam.bai + touch ${prefix}.filtering_metrics.txt + """ +} diff --git a/modules/local/fgumi/group/main.nf b/modules/local/fgumi/group/main.nf new file mode 100644 index 00000000..6779351f --- /dev/null +++ b/modules/local/fgumi/group/main.nf @@ -0,0 +1,39 @@ +process FGUMI_GROUP { + tag "$meta.id" + label 'process_medium' + + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/95/954170443a820787c9e02ef2135ebb8ec29c6b03633b0d61b5fafa98c59a1cce/data' + : 'community.wave.seqera.io/library/fgumi_r-base_r-ggplot2_r-scales:09c99070b82c1c28'}" + + input: + tuple val(meta), path(bam) + + output: + tuple val(meta), path("${prefix}.bam"), emit: bam + tuple val(meta), path("${prefix}.grouping_metrics.txt"), optional: true, emit: grouping_metrics + tuple val(meta), path("${prefix}.family_size_histogram.txt"), optional: true, emit: family_size_histogram + tuple val("${task.process}"), val('fgumi'), eval("fgumi --version | sed 's/^fgumi //;q'"), topic: versions, emit: versions_fgumi + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}.fgumi.group" + + """ + fgumi group \ + --input ${bam} \ + --output ${prefix}.bam \ + ${args} + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}.fgumi.group" + """ + touch ${prefix}.bam + touch ${prefix}.grouping_metrics.txt + touch ${prefix}.family_size_histogram.txt + """ +} diff --git a/modules/local/fgumi/simplex/main.nf b/modules/local/fgumi/simplex/main.nf new file mode 100644 index 00000000..81dfc29a --- /dev/null +++ b/modules/local/fgumi/simplex/main.nf @@ -0,0 +1,37 @@ +process FGUMI_SIMPLEX { + tag "$meta.id" + label 'process_medium' + + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/95/954170443a820787c9e02ef2135ebb8ec29c6b03633b0d61b5fafa98c59a1cce/data' + : 'community.wave.seqera.io/library/fgumi_r-base_r-ggplot2_r-scales:09c99070b82c1c28'}" + + input: + tuple val(meta), path(bam) + + output: + tuple val(meta), path("${prefix}.bam"), emit: bam + tuple val(meta), path("${prefix}.consensus_metrics.txt"), optional: true, emit: consensus_metrics + tuple val("${task.process}"), val('fgumi'), eval("fgumi --version | sed 's/^fgumi //;q'"), topic: versions, emit: versions_fgumi + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}.fgumi.simplex" + + """ + fgumi simplex \ + --input ${bam} \ + --output ${prefix}.bam \ + ${args} + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}.fgumi.simplex" + """ + touch ${prefix}.bam + touch ${prefix}.consensus_metrics.txt + """ +} diff --git a/modules/local/fgumi/snapzippersort/main.nf b/modules/local/fgumi/snapzippersort/main.nf new file mode 100644 index 00000000..0c6ec63b --- /dev/null +++ b/modules/local/fgumi/snapzippersort/main.nf @@ -0,0 +1,66 @@ +process FGUMI_SNAP_ZIPPER_SORT { + tag "$meta.id" + label 'process_high' + + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/24/2466270633749543330f352e34588f142de4988585ce63e7f22ee5ed1ff57450/data' + : 'community.wave.seqera.io/library/fgumi_samtools_snap-aligner:c9ba911435350668'}" + + input: + tuple val(meta), path(unmapped_bam), path(index, stageAs: "index/*"), path(fasta), path(dict) + + output: + tuple val(meta), path("${prefix}.template.bam"), emit: bam + tuple val("${task.process}"), val('fgumi'), eval("fgumi --version | sed 's/^fgumi //;q'"), topic: versions, emit: versions_fgumi + + when: + task.ext.when == null || task.ext.when + + script: + def snap_args = task.ext.args ?: '' + def zipper_args = task.ext.args2 ?: '' + def sort_args = task.ext.args3 ?: '' + prefix = task.ext.prefix ?: "${meta.id}.fgumi" + + """ + INDEX=`dirname \$(find -L ./ -name "OverflowTable*" | head -n1)` + [ -z "\$INDEX" ] && echo "Snap index files not found" 1>&2 && exit 1 + + # Ensure zipper and fastq read exactly the same queryname-ordered unmapped stream. + samtools sort \ + -n \ + -@ ${task.cpus} \ + -m 1G \ + -o ${prefix}.unmapped.queryname.bam \ + ${unmapped_bam} + + fgumi fastq --input ${prefix}.unmapped.queryname.bam \ + | snap-aligner paired \ + \$INDEX \ + -pairedInterleavedFastq - \ + -o -sam - \ + -t ${task.cpus} \ + ${snap_args} \ + | samtools sort \ + -n \ + -@ ${task.cpus} \ + -m 1G \ + -O SAM \ + - \ + | fgumi zipper \ + --unmapped ${prefix}.unmapped.queryname.bam \ + --reference ${fasta} \ + ${zipper_args} \ + | fgumi sort \ + --input /dev/stdin \ + --output ${prefix}.template.bam \ + --order template-coordinate \ + ${sort_args} + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}.fgumi" + """ + touch ${prefix}.template.bam + """ +} diff --git a/nextflow.config b/nextflow.config index 61685c2d..60eecb88 100644 --- a/nextflow.config +++ b/nextflow.config @@ -22,6 +22,22 @@ params { split_fastq = 100000000 genelists = null + // UMI consensus (fgumi) options + fgumi_read_structures = '+T +T' + fgumi_extract_umis_from_read_names = true + fgumi_group_strategy = 'adjacency' + fgumi_group_edits = 1 + fgumi_simplex_min_reads = 1 + fgumi_filter_min_reads = '1,1,1' + fgumi_queue_memory = 768 + fgumi_queue_memory_per_thread= true + fgumi_compression_level = 1 + fgumi_sort_max_memory = '2G' + fgumi_sort_memory_per_thread = true + fgumi_snap_ignore_mismatched_pairs = true + fgumi_snap_extra_args = '' + fgumi_duplex_metrics_extra_args = '' + // MultiQC options multiqc_config = null multiqc_title = null diff --git a/nextflow_schema.json b/nextflow_schema.json index e0d5f227..ad538cad 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -72,6 +72,83 @@ "exists": true, "format": "directory-path", "description": "Directory containing gene list bed files for granular coverage analysis" + }, + "fgumi_read_structures": { + "type": "string", + "default": "+T +T", + "description": "Read structures passed to fgumi extract for UMI extraction from FASTQ." + }, + "fgumi_extract_umis_from_read_names": { + "type": "boolean", + "default": true, + "description": "Enable fgumi extract --extract-umis-from-read-names for read name encoded UMIs." + }, + "fgumi_group_strategy": { + "type": "string", + "default": "adjacency", + "description": "UMI grouping strategy for fgumi group.", + "enum": ["identity", "edit", "adjacency", "paired"] + }, + "fgumi_group_edits": { + "type": "integer", + "default": 1, + "minimum": 0, + "description": "Maximum UMI edit distance used by fgumi group." + }, + "fgumi_simplex_min_reads": { + "type": "integer", + "default": 1, + "minimum": 1, + "description": "Minimum number of reads required per UMI family for fgumi simplex consensus generation." + }, + "fgumi_filter_min_reads": { + "type": "string", + "default": "1,1,1", + "description": "Minimum reads triplet passed to fgumi filter --min-reads." + }, + "fgumi_queue_memory": { + "type": "integer", + "default": 768, + "minimum": 64, + "description": "fgumi queue-memory budget in MB." + }, + "fgumi_queue_memory_per_thread": { + "type": "boolean", + "default": true, + "description": "Scale fgumi queue-memory by allocated thread count." + }, + "fgumi_compression_level": { + "type": "integer", + "default": 1, + "minimum": 0, + "maximum": 12, + "description": "Compression level for fgumi BAM outputs." + }, + "fgumi_sort_max_memory": { + "type": "string", + "default": "2G", + "pattern": "^\\d+(\\.\\d+)?[KMG]$", + "description": "Memory budget for fgumi sort, for example 2G or 768M." + }, + "fgumi_sort_memory_per_thread": { + "type": "boolean", + "default": true, + "description": "Scale fgumi sort memory by allocated thread count." + }, + "fgumi_snap_ignore_mismatched_pairs": { + "type": "boolean", + "default": true, + "description": "Pass -I to SNAP to ignore mismatched read IDs in paired-end input." + }, + "fgumi_snap_extra_args": { + "type": "string", + "default": "", + "description": "Additional SNAP options for the fgumi fastq|snap|zipper pipeline." + }, + "fgumi_duplex_metrics_extra_args": { + "type": "string", + "default": "", + "description": "Additional fgumi duplex-metrics options." } } }, diff --git a/subworkflows/local/fastq_to_aligned_cram/main.nf b/subworkflows/local/fastq_to_aligned_cram/main.nf index dfccf2d5..d46961e5 100644 --- a/subworkflows/local/fastq_to_aligned_cram/main.nf +++ b/subworkflows/local/fastq_to_aligned_cram/main.nf @@ -6,6 +6,12 @@ // MODULES include { BIOBAMBAM_BAMSORMADUP } from "../../../modules/nf-core/biobambam/bamsormadup/main.nf" +include { FGUMI_DUPLEX_METRICS } from "../../../modules/local/fgumi/duplexmetrics/main.nf" +include { FGUMI_EXTRACT } from "../../../modules/local/fgumi/extract/main.nf" +include { FGUMI_FILTER } from "../../../modules/local/fgumi/filter/main.nf" +include { FGUMI_GROUP } from "../../../modules/local/fgumi/group/main.nf" +include { FGUMI_SIMPLEX } from "../../../modules/local/fgumi/simplex/main.nf" +include { FGUMI_SNAP_ZIPPER_SORT } from "../../../modules/local/fgumi/snapzippersort/main.nf" include { SAMTOOLS_CONVERT } from "../../../modules/nf-core/samtools/convert/main" include { SAMTOOLS_SORMADUP } from "../../../modules/nf-core/samtools/sormadup/main.nf" include { SAMTOOLS_SORT } from "../../../modules/nf-core/samtools/sort/main" @@ -42,12 +48,59 @@ workflow FASTQ_TO_CRAM { } .set { ch_meta_reads_aligner_index_fasta_datatype } - // align fastq files per sample - // ALIGNMENT([meta,fastq], index, sort) + ch_meta_reads_aligner_index_fasta_datatype.dna + .branch { meta, reads, aligner, index, fasta -> + umi: meta.umi_aware == true + return [meta, reads, aligner, index, fasta] + non_umi: true + return [meta, reads, aligner, index, fasta] + } + .set { ch_dna_to_align } + + // Align non-UMI DNA fastq files per sample FASTQ_ALIGN_DNA( - ch_meta_reads_aligner_index_fasta_datatype.dna, + ch_dna_to_align.non_umi, false, ) + + // UMI-aware fgumi branch (steps 1, 3, 4, 5, 6, 7 in fgumi Basic Workflow) + FGUMI_EXTRACT( + ch_dna_to_align.umi + .map { meta, reads, _aligner, _index, _fasta -> [meta, reads] } + ) + + FGUMI_SNAP_ZIPPER_SORT( + FGUMI_EXTRACT.out.bam + .join( + ch_dna_to_align.umi.map { meta, _reads, _aligner, _index, fasta -> + [meta, getGenomeAttribute(meta.genome_data, 'snap'), fasta, getGenomeAttribute(meta.genome_data, 'dict')] + }, + by: 0, + ) + .map { meta, unmapped_bam, snap_index, fasta, dict -> [meta, unmapped_bam, snap_index, fasta, dict] } + ) + + FGUMI_GROUP( + FGUMI_SNAP_ZIPPER_SORT.out.bam + ) + + FGUMI_SIMPLEX( + FGUMI_GROUP.out.bam + ) + + FGUMI_DUPLEX_METRICS( + FGUMI_GROUP.out.bam + ) + + FGUMI_FILTER( + FGUMI_SIMPLEX.out.bam + .join( + ch_dna_to_align.umi.map { meta, _reads, _aligner, _index, fasta -> [meta, fasta] }, + by: 0, + ) + .map { meta, bam, fasta -> [meta, bam, fasta] } + ) + FASTQ_ALIGN_RNA( ch_meta_reads_aligner_index_fasta_datatype.rna ) @@ -100,6 +153,18 @@ workflow FASTQ_TO_CRAM { ch_markdup_index = channel.empty() + // UMI branch outputs are mixed into the common markdup/metrics streams. + ch_markdup_index = ch_markdup_index.mix( + FGUMI_FILTER.out.bam.join(FGUMI_FILTER.out.bai, failOnMismatch: true, failOnDuplicate: true) + ) + ch_sormadup_metrics = ch_sormadup_metrics.mix(FGUMI_GROUP.out.grouping_metrics) + ch_sormadup_metrics = ch_sormadup_metrics.mix(FGUMI_GROUP.out.family_size_histogram) + ch_sormadup_metrics = ch_sormadup_metrics.mix(FGUMI_SIMPLEX.out.consensus_metrics) + ch_sormadup_metrics = ch_sormadup_metrics.mix(FGUMI_FILTER.out.filtering_metrics) + ch_duplex_metrics = FGUMI_DUPLEX_METRICS.out.duplex_metrics + ch_family_size_histogram = FGUMI_GROUP.out.family_size_histogram + ch_filtered_consensus_bam = FGUMI_FILTER.out.bam + // BIOBAMBAM_BAMSORMADUP([meta, [bam, bam]], fasta, fai) BIOBAMBAM_BAMSORMADUP(ch_bam_fasta.bamsormadup) ch_markdup_index = ch_markdup_index.mix(BIOBAMBAM_BAMSORMADUP.out.bam.join(BIOBAMBAM_BAMSORMADUP.out.bam_index, failOnMismatch: true, failOnDuplicate: true)) @@ -147,10 +212,18 @@ workflow FASTQ_TO_CRAM { .set { ch_cram_crai } ch_cram_crai.dump(tag: "FASTQ_TO_CRAM: cram and crai", pretty: true) + // Keep a dedicated channel for UMI-aware sample CRAM outputs. + ch_umi_cram_crai = ch_cram_crai.filter { meta, _cram, _crai -> meta.umi_aware == true } + emit: cram_crai = ch_cram_crai + // UMI-specific output channels for downstream reporting and publishing. + umi_cram_crai = ch_umi_cram_crai + filtered_consensus_bam = ch_filtered_consensus_bam rna_splice_junctions = FASTQ_ALIGN_RNA.out.splice_junctions rna_junctions = FASTQ_ALIGN_RNA.out.junctions sormadup_metrics = ch_sormadup_metrics + family_size_histogram = ch_family_size_histogram + duplex_metrics = ch_duplex_metrics align_reports = FASTQ_ALIGN_DNA.out.reports } diff --git a/tests/inputs/fgumi/R1.fastq.gz b/tests/inputs/fgumi/R1.fastq.gz new file mode 100644 index 00000000..3b41ea25 --- /dev/null +++ b/tests/inputs/fgumi/R1.fastq.gz @@ -0,0 +1,4 @@ +@r1 +ACGT ++ +!!!! diff --git a/tests/inputs/fgumi/R2.fastq.gz b/tests/inputs/fgumi/R2.fastq.gz new file mode 100644 index 00000000..c5b0a999 --- /dev/null +++ b/tests/inputs/fgumi/R2.fastq.gz @@ -0,0 +1,4 @@ +@r2 +TGCA ++ +!!!! diff --git a/tests/inputs/fgumi/consensus.bam b/tests/inputs/fgumi/consensus.bam new file mode 100644 index 00000000..48cdce85 --- /dev/null +++ b/tests/inputs/fgumi/consensus.bam @@ -0,0 +1 @@ +placeholder diff --git a/tests/inputs/fgumi/grouped.bam b/tests/inputs/fgumi/grouped.bam new file mode 100644 index 00000000..48cdce85 --- /dev/null +++ b/tests/inputs/fgumi/grouped.bam @@ -0,0 +1 @@ +placeholder diff --git a/tests/inputs/fgumi/ref.dict b/tests/inputs/fgumi/ref.dict new file mode 100644 index 00000000..c7a324e0 --- /dev/null +++ b/tests/inputs/fgumi/ref.dict @@ -0,0 +1,2 @@ +@HD VN:1.6 SO:unsorted +@SQ SN:chr1 LN:12 diff --git a/tests/inputs/fgumi/ref.fa b/tests/inputs/fgumi/ref.fa new file mode 100644 index 00000000..41884804 --- /dev/null +++ b/tests/inputs/fgumi/ref.fa @@ -0,0 +1,2 @@ +>chr1 +ACGTACGTACGT diff --git a/tests/inputs/fgumi/ref.fa.fai b/tests/inputs/fgumi/ref.fa.fai new file mode 100644 index 00000000..795162b9 --- /dev/null +++ b/tests/inputs/fgumi/ref.fa.fai @@ -0,0 +1 @@ +chr1 12 6 12 13 diff --git a/tests/inputs/fgumi/snap_index/OverflowTable.txt b/tests/inputs/fgumi/snap_index/OverflowTable.txt new file mode 100644 index 00000000..48cdce85 --- /dev/null +++ b/tests/inputs/fgumi/snap_index/OverflowTable.txt @@ -0,0 +1 @@ +placeholder diff --git a/tests/inputs/fgumi/template.bam b/tests/inputs/fgumi/template.bam new file mode 100644 index 00000000..48cdce85 --- /dev/null +++ b/tests/inputs/fgumi/template.bam @@ -0,0 +1 @@ +placeholder diff --git a/tests/inputs/fgumi/unmapped.bam b/tests/inputs/fgumi/unmapped.bam new file mode 100644 index 00000000..48cdce85 --- /dev/null +++ b/tests/inputs/fgumi/unmapped.bam @@ -0,0 +1 @@ +placeholder diff --git a/tests/inputs/test.yml b/tests/inputs/test.yml index 3a432b4b..f4a89c9e 100644 --- a/tests/inputs/test.yml +++ b/tests/inputs/test.yml @@ -49,3 +49,18 @@ run_coverage: true fastq_1: https://github.com/nf-cmgg/test-datasets/raw/main/data/genomics/homo_sapiens/illumina/fastq/test_R1.fastq.gz fastq_2: https://github.com/nf-cmgg/test-datasets/raw/main/data/genomics/homo_sapiens/illumina/fastq/test_R2.fastq.gz +# UMI consensus (fgumi) inputs +# Example DNA sample with umi_aware enabled for fgumi processing. +- id: UMI_consensus1 + samplename: HT1080-chr20 + library: test_library + organism: Homo sapiens + tag: WES + sample_type: DNA + aligner: snap + markdup: bamsormadup + umi_aware: true + run_coverage: true + fastq_1: https://github.com/nf-cmgg/test-datasets/raw/main/data/genomics/homo_sapiens/illumina/fastq/HT1080-chr20_S16_R1_001.fastq.gz + fastq_2: https://github.com/nf-cmgg/test-datasets/raw/main/data/genomics/homo_sapiens/illumina/fastq/HT1080-chr20_S16_R2_001.fastq.gz + diff --git a/tests/modules/local/fgumi/duplexmetrics/main.nf.test b/tests/modules/local/fgumi/duplexmetrics/main.nf.test new file mode 100644 index 00000000..665fb839 --- /dev/null +++ b/tests/modules/local/fgumi/duplexmetrics/main.nf.test @@ -0,0 +1,33 @@ +nextflow_process { + + name "Test Process FGUMI_DUPLEX_METRICS" + script "modules/local/fgumi/duplexmetrics/main.nf" + process "FGUMI_DUPLEX_METRICS" + + tag "modules" + tag "modules/local" + tag "modules/local/fgumi" + tag "modules/local/fgumi/duplexmetrics" + + test("test - stub") { + // Stub-mode contract test: verifies duplex-metrics output wiring. + options "-stub" + + when { + process { + """ + input[0] = [ + [id: "test"], + file("${projectDir}/tests/inputs/fgumi/grouped.bam", checkIfExists: true) + ] + """ + } + } + + then { + // Keep assertions minimal and snapshot-driven, consistent with other module tests. + assert process.success + assert snapshot(process.out).match() + } + } +} diff --git a/tests/modules/local/fgumi/duplexmetrics/main.nf.test.snap b/tests/modules/local/fgumi/duplexmetrics/main.nf.test.snap new file mode 100644 index 00000000..c2514d51 --- /dev/null +++ b/tests/modules/local/fgumi/duplexmetrics/main.nf.test.snap @@ -0,0 +1,43 @@ +{ + "test - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.fgumi.duplex_metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + "FGUMI_DUPLEX_METRICS", + "fgumi", + "0.1.2" + ] + ], + "duplex_metrics": [ + [ + { + "id": "test" + }, + "test.fgumi.duplex_metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_fgumi": [ + [ + "FGUMI_DUPLEX_METRICS", + "fgumi", + "0.1.2" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.4" + }, + "timestamp": "2026-04-02T16:04:44.383973526" + } +} \ No newline at end of file diff --git a/tests/modules/local/fgumi/extract/main.nf.test b/tests/modules/local/fgumi/extract/main.nf.test new file mode 100644 index 00000000..ad1088a5 --- /dev/null +++ b/tests/modules/local/fgumi/extract/main.nf.test @@ -0,0 +1,35 @@ +nextflow_process { + + name "Test Process FGUMI_EXTRACT" + script "modules/local/fgumi/extract/main.nf" + process "FGUMI_EXTRACT" + + tag "modules" + tag "modules/local" + tag "modules/local/fgumi" + tag "modules/local/fgumi/extract" + + test("test - stub") { + // Stub-mode contract test: verifies extract output channel and versions tuple. + options "-stub" + + when { + process { + """ + input[0] = [ + [id: "test", samplename: "test", library: "lib1"], + [ + file("${projectDir}/tests/inputs/fgumi/R1.fastq.gz", checkIfExists: true), + file("${projectDir}/tests/inputs/fgumi/R2.fastq.gz", checkIfExists: true) + ] + ] + """ + } + } + + then { + assert process.success + assert snapshot(process.out).match() + } + } +} diff --git a/tests/modules/local/fgumi/extract/main.nf.test.snap b/tests/modules/local/fgumi/extract/main.nf.test.snap new file mode 100644 index 00000000..82b09e4b --- /dev/null +++ b/tests/modules/local/fgumi/extract/main.nf.test.snap @@ -0,0 +1,47 @@ +{ + "test - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "samplename": "test", + "library": "lib1" + }, + "test.fgumi.unmapped.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + "FGUMI_EXTRACT", + "fgumi", + "0.1.2" + ] + ], + "bam": [ + [ + { + "id": "test", + "samplename": "test", + "library": "lib1" + }, + "test.fgumi.unmapped.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_fgumi": [ + [ + "FGUMI_EXTRACT", + "fgumi", + "0.1.2" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.4" + }, + "timestamp": "2026-04-02T15:59:46.342116682" + } +} \ No newline at end of file diff --git a/tests/modules/local/fgumi/filter/main.nf.test b/tests/modules/local/fgumi/filter/main.nf.test new file mode 100644 index 00000000..9c068a67 --- /dev/null +++ b/tests/modules/local/fgumi/filter/main.nf.test @@ -0,0 +1,33 @@ +nextflow_process { + + name "Test Process FGUMI_FILTER" + script "modules/local/fgumi/filter/main.nf" + process "FGUMI_FILTER" + + tag "modules" + tag "modules/local" + tag "modules/local/fgumi" + tag "modules/local/fgumi/filter" + + test("test - stub") { + // Stub-mode contract test: verifies filtered BAM/index and metrics outputs. + options "-stub" + + when { + process { + """ + input[0] = [ + [id: "test"], + file("${projectDir}/tests/inputs/fgumi/consensus.bam", checkIfExists: true), + file("${projectDir}/tests/inputs/fgumi/ref.fa", checkIfExists: true) + ] + """ + } + } + + then { + assert process.success + assert snapshot(process.out).match() + } + } +} diff --git a/tests/modules/local/fgumi/filter/main.nf.test.snap b/tests/modules/local/fgumi/filter/main.nf.test.snap new file mode 100644 index 00000000..28c1a053 --- /dev/null +++ b/tests/modules/local/fgumi/filter/main.nf.test.snap @@ -0,0 +1,75 @@ +{ + "test - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.fgumi.filter.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.fgumi.filter.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test" + }, + "test.fgumi.filter.filtering_metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + "FGUMI_FILTER", + "fgumi", + "0.1.2" + ] + ], + "bai": [ + [ + { + "id": "test" + }, + "test.fgumi.filter.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam": [ + [ + { + "id": "test" + }, + "test.fgumi.filter.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "filtering_metrics": [ + [ + { + "id": "test" + }, + "test.fgumi.filter.filtering_metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_fgumi": [ + [ + "FGUMI_FILTER", + "fgumi", + "0.1.2" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.4" + }, + "timestamp": "2026-04-02T16:05:00.831097499" + } +} \ No newline at end of file diff --git a/tests/modules/local/fgumi/group/main.nf.test b/tests/modules/local/fgumi/group/main.nf.test new file mode 100644 index 00000000..8fb9b79e --- /dev/null +++ b/tests/modules/local/fgumi/group/main.nf.test @@ -0,0 +1,32 @@ +nextflow_process { + + name "Test Process FGUMI_GROUP" + script "modules/local/fgumi/group/main.nf" + process "FGUMI_GROUP" + + tag "modules" + tag "modules/local" + tag "modules/local/fgumi" + tag "modules/local/fgumi/group" + + test("test - stub") { + // Stub-mode contract test: verifies grouping and family-size outputs. + options "-stub" + + when { + process { + """ + input[0] = [ + [id: "test"], + file("${projectDir}/tests/inputs/fgumi/template.bam", checkIfExists: true) + ] + """ + } + } + + then { + assert process.success + assert snapshot(process.out).match() + } + } +} diff --git a/tests/modules/local/fgumi/group/main.nf.test.snap b/tests/modules/local/fgumi/group/main.nf.test.snap new file mode 100644 index 00000000..d242fb4a --- /dev/null +++ b/tests/modules/local/fgumi/group/main.nf.test.snap @@ -0,0 +1,75 @@ +{ + "test - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.fgumi.group.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.fgumi.group.grouping_metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test" + }, + "test.fgumi.group.family_size_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + "FGUMI_GROUP", + "fgumi", + "0.1.2" + ] + ], + "bam": [ + [ + { + "id": "test" + }, + "test.fgumi.group.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "family_size_histogram": [ + [ + { + "id": "test" + }, + "test.fgumi.group.family_size_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "grouping_metrics": [ + [ + { + "id": "test" + }, + "test.fgumi.group.grouping_metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_fgumi": [ + [ + "FGUMI_GROUP", + "fgumi", + "0.1.2" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.4" + }, + "timestamp": "2026-04-02T16:05:08.118138918" + } +} \ No newline at end of file diff --git a/tests/modules/local/fgumi/simplex/main.nf.test b/tests/modules/local/fgumi/simplex/main.nf.test new file mode 100644 index 00000000..62b9fc5f --- /dev/null +++ b/tests/modules/local/fgumi/simplex/main.nf.test @@ -0,0 +1,32 @@ +nextflow_process { + + name "Test Process FGUMI_SIMPLEX" + script "modules/local/fgumi/simplex/main.nf" + process "FGUMI_SIMPLEX" + + tag "modules" + tag "modules/local" + tag "modules/local/fgumi" + tag "modules/local/fgumi/simplex" + + test("test - stub") { + // Stub-mode contract test: verifies simplex BAM and consensus metrics. + options "-stub" + + when { + process { + """ + input[0] = [ + [id: "test"], + file("${projectDir}/tests/inputs/fgumi/grouped.bam", checkIfExists: true) + ] + """ + } + } + + then { + assert process.success + assert snapshot(process.out).match() + } + } +} diff --git a/tests/modules/local/fgumi/simplex/main.nf.test.snap b/tests/modules/local/fgumi/simplex/main.nf.test.snap new file mode 100644 index 00000000..14ecba3d --- /dev/null +++ b/tests/modules/local/fgumi/simplex/main.nf.test.snap @@ -0,0 +1,59 @@ +{ + "test - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.fgumi.simplex.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.fgumi.simplex.consensus_metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + "FGUMI_SIMPLEX", + "fgumi", + "0.1.2" + ] + ], + "bam": [ + [ + { + "id": "test" + }, + "test.fgumi.simplex.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "consensus_metrics": [ + [ + { + "id": "test" + }, + "test.fgumi.simplex.consensus_metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_fgumi": [ + [ + "FGUMI_SIMPLEX", + "fgumi", + "0.1.2" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.4" + }, + "timestamp": "2026-04-02T16:05:15.268242503" + } +} \ No newline at end of file diff --git a/tests/modules/local/fgumi/snapzippersort/main.nf.test b/tests/modules/local/fgumi/snapzippersort/main.nf.test new file mode 100644 index 00000000..e27f388e --- /dev/null +++ b/tests/modules/local/fgumi/snapzippersort/main.nf.test @@ -0,0 +1,35 @@ +nextflow_process { + + name "Test Process FGUMI_SNAP_ZIPPER_SORT" + script "modules/local/fgumi/snapzippersort/main.nf" + process "FGUMI_SNAP_ZIPPER_SORT" + + tag "modules" + tag "modules/local" + tag "modules/local/fgumi" + tag "modules/local/fgumi/snapzippersort" + + test("test - stub") { + // Stub-mode contract test: verifies template BAM output wiring. + options "-stub" + + when { + process { + """ + input[0] = [ + [id: "test", samplename: "test"], + file("${projectDir}/tests/inputs/fgumi/unmapped.bam", checkIfExists: true), + file("${projectDir}/tests/inputs/fgumi/snap_index", checkIfExists: true), + file("${projectDir}/tests/inputs/fgumi/ref.fa", checkIfExists: true), + file("${projectDir}/tests/inputs/fgumi/ref.dict", checkIfExists: true) + ] + """ + } + } + + then { + assert process.success + assert snapshot(process.out).match() + } + } +} diff --git a/tests/modules/local/fgumi/snapzippersort/main.nf.test.snap b/tests/modules/local/fgumi/snapzippersort/main.nf.test.snap new file mode 100644 index 00000000..d575f1ef --- /dev/null +++ b/tests/modules/local/fgumi/snapzippersort/main.nf.test.snap @@ -0,0 +1,45 @@ +{ + "test - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "samplename": "test" + }, + "test.fgumi.template.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + "FGUMI_SNAP_ZIPPER_SORT", + "fgumi", + "0.1.2" + ] + ], + "bam": [ + [ + { + "id": "test", + "samplename": "test" + }, + "test.fgumi.template.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_fgumi": [ + [ + "FGUMI_SNAP_ZIPPER_SORT", + "fgumi", + "0.1.2" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.4" + }, + "timestamp": "2026-04-02T16:05:22.614091027" + } +} \ No newline at end of file diff --git a/tests/subworkflows/local/fastq_to_aligned_cram/fgumi_umi_stub.nf.test b/tests/subworkflows/local/fastq_to_aligned_cram/fgumi_umi_stub.nf.test new file mode 100644 index 00000000..5f192e63 --- /dev/null +++ b/tests/subworkflows/local/fastq_to_aligned_cram/fgumi_umi_stub.nf.test @@ -0,0 +1,58 @@ +nextflow_workflow { + + name "Test Workflow FASTQ_TO_CRAM UMI fgumi stub" + script "subworkflows/local/fastq_to_aligned_cram/main.nf" + workflow "FASTQ_TO_CRAM" + + tag "subworkflows" + tag "subworkflows/local" + tag "subworkflows/local/fastq_to_aligned_cram" + tag "fgumi" + + test("fastq to cram - fgumi umi-aware - stub") { + // End-to-end UMI branch contract test in stub mode. + options "-stub" + when { + workflow { + """ + input[0] = Channel.of([ + [ + id: "UMI_consensus1", + samplename: "HT1080_chr20", + single_end: false, + sample_type: "DNA", + markdup: "bamsormadup", + umi_aware: true, + genome_data: [ + fasta: "${projectDir}/tests/inputs/fgumi/ref.fa", + fai: "${projectDir}/tests/inputs/fgumi/ref.fa.fai", + dict: "${projectDir}/tests/inputs/fgumi/ref.dict", + snap: "${projectDir}/tests/inputs/fgumi/snap_index" + ] + ], + [ + file("${projectDir}/tests/inputs/fgumi/R1.fastq.gz", checkIfExists: true), + file("${projectDir}/tests/inputs/fgumi/R2.fastq.gz", checkIfExists: true) + ], + "snap", + file("${projectDir}/tests/inputs/fgumi/snap_index", checkIfExists: true), + file("${projectDir}/tests/inputs/fgumi/ref.fa", checkIfExists: true), + [] + ]) + """ + } + } + + then { + assert workflow.success + // Explicitly assert newly exposed UMI channels. + assert workflow.out.duplex_metrics.size() == 1 + assert workflow.out.family_size_histogram.size() == 1 + assert workflow.out.filtered_consensus_bam.size() == 1 + assert workflow.out.umi_cram_crai.size() == 1 + assert snapshot( + sanitizeOutput(workflow.out, unstableKeys:["cram_crai"]) + ).match() + } + } +} diff --git a/tests/subworkflows/local/fastq_to_aligned_cram/fgumi_umi_stub.nf.test.snap b/tests/subworkflows/local/fastq_to_aligned_cram/fgumi_umi_stub.nf.test.snap new file mode 100644 index 00000000..ceffdb37 --- /dev/null +++ b/tests/subworkflows/local/fastq_to_aligned_cram/fgumi_umi_stub.nf.test.snap @@ -0,0 +1,205 @@ +{ + "fastq to cram - fgumi umi-aware - stub": { + "content": [ + { + "align_reports": [ + + ], + "cram_crai": [ + [ + { + "genome_data": { + "dict": "ref.dict", + "fai": "ref.fa.fai", + "fasta": "ref.fa", + "snap": "snap_index" + }, + "id": "UMI_consensus1", + "markdup": "bamsormadup", + "sample_type": "DNA", + "samplename": "HT1080_chr20", + "single_end": false, + "umi_aware": true + }, + "UMI_consensus1.cram", + "UMI_consensus1.cram.crai" + ] + ], + "duplex_metrics": [ + [ + { + "id": "UMI_consensus1", + "samplename": "HT1080_chr20", + "single_end": false, + "sample_type": "DNA", + "markdup": "bamsormadup", + "umi_aware": true, + "genome_data": { + "fasta": "ref.fa:md5,78724096432e1b2702881f0126c656f2", + "fai": "ref.fa.fai:md5,fdd09c27f9ecf4cc7e824fd8407b72b7", + "dict": "ref.dict:md5,759dea0d5194dec0cd4f3e02fadf6e83", + "snap": [ + "OverflowTable.txt:md5,178164f81917b8e87073295a635588de" + ] + } + }, + "UMI_consensus1.fgumi.duplex_metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "family_size_histogram": [ + [ + { + "id": "UMI_consensus1", + "samplename": "HT1080_chr20", + "single_end": false, + "sample_type": "DNA", + "markdup": "bamsormadup", + "umi_aware": true, + "genome_data": { + "fasta": "ref.fa:md5,78724096432e1b2702881f0126c656f2", + "fai": "ref.fa.fai:md5,fdd09c27f9ecf4cc7e824fd8407b72b7", + "dict": "ref.dict:md5,759dea0d5194dec0cd4f3e02fadf6e83", + "snap": [ + "OverflowTable.txt:md5,178164f81917b8e87073295a635588de" + ] + } + }, + "UMI_consensus1.fgumi.group.family_size_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "filtered_consensus_bam": [ + [ + { + "id": "UMI_consensus1", + "samplename": "HT1080_chr20", + "single_end": false, + "sample_type": "DNA", + "markdup": "bamsormadup", + "umi_aware": true, + "genome_data": { + "fasta": "ref.fa:md5,78724096432e1b2702881f0126c656f2", + "fai": "ref.fa.fai:md5,fdd09c27f9ecf4cc7e824fd8407b72b7", + "dict": "ref.dict:md5,759dea0d5194dec0cd4f3e02fadf6e83", + "snap": [ + "OverflowTable.txt:md5,178164f81917b8e87073295a635588de" + ] + } + }, + "UMI_consensus1.fgumi.filter.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "rna_junctions": [ + + ], + "rna_splice_junctions": [ + + ], + "sormadup_metrics": [ + [ + { + "id": "UMI_consensus1", + "samplename": "HT1080_chr20", + "single_end": false, + "sample_type": "DNA", + "markdup": "bamsormadup", + "umi_aware": true, + "genome_data": { + "fasta": "ref.fa:md5,78724096432e1b2702881f0126c656f2", + "fai": "ref.fa.fai:md5,fdd09c27f9ecf4cc7e824fd8407b72b7", + "dict": "ref.dict:md5,759dea0d5194dec0cd4f3e02fadf6e83", + "snap": [ + "OverflowTable.txt:md5,178164f81917b8e87073295a635588de" + ] + } + }, + "UMI_consensus1.fgumi.filter.filtering_metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "UMI_consensus1", + "samplename": "HT1080_chr20", + "single_end": false, + "sample_type": "DNA", + "markdup": "bamsormadup", + "umi_aware": true, + "genome_data": { + "fasta": "ref.fa:md5,78724096432e1b2702881f0126c656f2", + "fai": "ref.fa.fai:md5,fdd09c27f9ecf4cc7e824fd8407b72b7", + "dict": "ref.dict:md5,759dea0d5194dec0cd4f3e02fadf6e83", + "snap": [ + "OverflowTable.txt:md5,178164f81917b8e87073295a635588de" + ] + } + }, + "UMI_consensus1.fgumi.group.family_size_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "UMI_consensus1", + "samplename": "HT1080_chr20", + "single_end": false, + "sample_type": "DNA", + "markdup": "bamsormadup", + "umi_aware": true, + "genome_data": { + "fasta": "ref.fa:md5,78724096432e1b2702881f0126c656f2", + "fai": "ref.fa.fai:md5,fdd09c27f9ecf4cc7e824fd8407b72b7", + "dict": "ref.dict:md5,759dea0d5194dec0cd4f3e02fadf6e83", + "snap": [ + "OverflowTable.txt:md5,178164f81917b8e87073295a635588de" + ] + } + }, + "UMI_consensus1.fgumi.group.grouping_metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "UMI_consensus1", + "samplename": "HT1080_chr20", + "single_end": false, + "sample_type": "DNA", + "markdup": "bamsormadup", + "umi_aware": true, + "genome_data": { + "fasta": "ref.fa:md5,78724096432e1b2702881f0126c656f2", + "fai": "ref.fa.fai:md5,fdd09c27f9ecf4cc7e824fd8407b72b7", + "dict": "ref.dict:md5,759dea0d5194dec0cd4f3e02fadf6e83", + "snap": [ + "OverflowTable.txt:md5,178164f81917b8e87073295a635588de" + ] + } + }, + "UMI_consensus1.fgumi.simplex.consensus_metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "umi_cram_crai": [ + [ + { + "id": "UMI_consensus1", + "samplename": "HT1080_chr20", + "single_end": false, + "sample_type": "DNA", + "markdup": "bamsormadup", + "umi_aware": true, + "genome_data": { + "fasta": "ref.fa:md5,78724096432e1b2702881f0126c656f2", + "fai": "ref.fa.fai:md5,fdd09c27f9ecf4cc7e824fd8407b72b7", + "dict": "ref.dict:md5,759dea0d5194dec0cd4f3e02fadf6e83", + "snap": [ + "OverflowTable.txt:md5,178164f81917b8e87073295a635588de" + ] + } + }, + "UMI_consensus1.cram:md5,d41d8cd98f00b204e9800998ecf8427e", + "UMI_consensus1.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.4" + }, + "timestamp": "2026-04-07T14:33:07.208059007" + } +} \ No newline at end of file diff --git a/tests/subworkflows/local/fastq_to_aligned_cram/main.nf.test b/tests/subworkflows/local/fastq_to_aligned_cram/main.nf.test index 65b68c4c..efaaabd4 100644 --- a/tests/subworkflows/local/fastq_to_aligned_cram/main.nf.test +++ b/tests/subworkflows/local/fastq_to_aligned_cram/main.nf.test @@ -219,4 +219,50 @@ nextflow_workflow { ) } } + + test("fastq to cram - fgumi umi-aware - stub") { + options: "-stub" + when { + workflow { + """ + // [meta, [fq_1,fq_2], aligner, index, fasta] + input[0] = Channel.of([ + [ + id: "UMI_consensus1", + samplename: "HT1080_chr20", + single_end: false, + sample_type: "DNA", + markdup: "bamsormadup", + umi_aware: true, + genome_data: [ + fasta: "${projectDir}/tests/inputs/fgumi/ref.fa", + fai: "${projectDir}/tests/inputs/fgumi/ref.fa.fai", + dict: "${projectDir}/tests/inputs/fgumi/ref.dict", + snap: "${projectDir}/tests/inputs/fgumi/snap_index" + ] + ], + [ + file("${projectDir}/tests/inputs/fgumi/R1.fastq.gz", checkIfExists: true), + file("${projectDir}/tests/inputs/fgumi/R2.fastq.gz", checkIfExists: true) + ], + "snap", + file("${projectDir}/tests/inputs/fgumi/snap_index", checkIfExists: true), + file("${projectDir}/tests/inputs/fgumi/ref.fa", checkIfExists: true), + [] + ]) + """ + } + } + + then { + assertAll( + { + assert workflow.success + assert snapshot( + sanitizeOutput(workflow.out, unstableKeys:["cram_crai"]) + ).match() + } + ) + } + } } diff --git a/workflows/preprocessing.nf b/workflows/preprocessing.nf index 5337ae93..009317d9 100644 --- a/workflows/preprocessing.nf +++ b/workflows/preprocessing.nf @@ -269,7 +269,10 @@ workflow PREPROCESSING { FASTQ_TO_CRAM( ch_meta_reads_aligner_index_fasta_gtf ) + // Collect both standard and UMI-specific metrics for MultiQC. ch_multiqc_files = ch_multiqc_files.mix(FASTQ_TO_CRAM.out.sormadup_metrics) + ch_multiqc_files = ch_multiqc_files.mix(FASTQ_TO_CRAM.out.duplex_metrics) + ch_multiqc_files = ch_multiqc_files.mix(FASTQ_TO_CRAM.out.family_size_histogram) /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -430,6 +433,11 @@ workflow PREPROCESSING { rna_junctions = FASTQ_TO_CRAM.out.rna_junctions align_reports = FASTQ_TO_CRAM.out.align_reports sormadup_metrics = FASTQ_TO_CRAM.out.sormadup_metrics + // UMI-specific outputs exposed at workflow level. + family_size_histogram = FASTQ_TO_CRAM.out.family_size_histogram + umi_filtered_consensus_bam = FASTQ_TO_CRAM.out.filtered_consensus_bam + umi_duplex_metrics = FASTQ_TO_CRAM.out.duplex_metrics + umi_crams = FASTQ_TO_CRAM.out.umi_cram_crai mosdepth_global = COVERAGE.out.mosdepth_global mosdepth_summary = COVERAGE.out.mosdepth_summary mosdepth_regions = COVERAGE.out.mosdepth_regions From bd23c30d4283bb8ec2d35c67e1cc405507550950 Mon Sep 17 00:00:00 2001 From: freerkvandijk Date: Wed, 8 Apr 2026 09:38:31 +0200 Subject: [PATCH 02/25] Fixed assets. Changed to fgumi_aware parameter. --- assets/schema_input.json | 8 +++++++- assets/schema_sampleinfo.json | 8 +++++++- docs/usage.md | 6 ++++++ subworkflows/local/fastq_to_aligned_cram/main.nf | 4 ++-- tests/inputs/test.yml | 4 ++-- 5 files changed, 24 insertions(+), 6 deletions(-) diff --git a/assets/schema_input.json b/assets/schema_input.json index 6c3931a2..48c52105 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -50,7 +50,13 @@ "umi_aware": { "meta": ["umi_aware"], "type": "boolean", - "description": "Enable UMI-aware processing through the fgumi consensus branch.", + "description": "Run markdup in UMI-aware mode. This applies to Samtools only and requires the UMI to be in the read name.", + "default": false + }, + "fgumi_aware": { + "meta": ["fgumi_aware"], + "type": "boolean", + "description": "Enable UMI-aware consensus processing through the fgumi branch.", "default": false }, "skip_trimming": { diff --git a/assets/schema_sampleinfo.json b/assets/schema_sampleinfo.json index 15571ba1..aaae3b10 100644 --- a/assets/schema_sampleinfo.json +++ b/assets/schema_sampleinfo.json @@ -90,7 +90,13 @@ "umi_aware": { "meta": ["umi_aware"], "type": "boolean", - "description": "Enable UMI-aware processing through the fgumi consensus branch.", + "description": "Run markdup in UMI-aware mode. This applies to Samtools only and requires the UMI to be in the read name.", + "default": false + }, + "fgumi_aware": { + "meta": ["fgumi_aware"], + "type": "boolean", + "description": "Enable UMI-aware consensus processing through the fgumi branch.", "default": false }, "skip_trimming": { diff --git a/docs/usage.md b/docs/usage.md index f59c809a..7f3055af 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -26,6 +26,7 @@ A `fastq` samplesheet file consisting of paired-end data may look something like aligner: bwamem markdup: bamsormadup umi_aware: false + fgumi_aware: false skip_trimming: false trim_front: 0 trim_tail: 0 @@ -67,6 +68,10 @@ Following table shows the fields that are used by the `fastq` samplesheet: An [example samplesheet](../tests/inputs/test.yml) has been provided with the pipeline. +> [!NOTE] +> `umi_aware` and `fgumi_aware` are independent options. +> Use `umi_aware` for samtools markdup UMI mode, and `fgumi_aware` to run the fgumi consensus branch. + ### Flowcell samplesheet A `flowcell` samplesheet file consisting of one sequencing run may look something like the one below. @@ -102,6 +107,7 @@ A `flowcell` sample info JSON/YML file consisting for one sequencing run may loo aligner: bwamem markdup: bamsormadup umi_aware: false + fgumi_aware: false skip_trimming: false trim_front: 0 trim_tail: 0 diff --git a/subworkflows/local/fastq_to_aligned_cram/main.nf b/subworkflows/local/fastq_to_aligned_cram/main.nf index d46961e5..0d82c3ef 100644 --- a/subworkflows/local/fastq_to_aligned_cram/main.nf +++ b/subworkflows/local/fastq_to_aligned_cram/main.nf @@ -50,7 +50,7 @@ workflow FASTQ_TO_CRAM { ch_meta_reads_aligner_index_fasta_datatype.dna .branch { meta, reads, aligner, index, fasta -> - umi: meta.umi_aware == true + umi: (meta.fgumi_aware == true) || (meta.umi_aware == true) return [meta, reads, aligner, index, fasta] non_umi: true return [meta, reads, aligner, index, fasta] @@ -213,7 +213,7 @@ workflow FASTQ_TO_CRAM { ch_cram_crai.dump(tag: "FASTQ_TO_CRAM: cram and crai", pretty: true) // Keep a dedicated channel for UMI-aware sample CRAM outputs. - ch_umi_cram_crai = ch_cram_crai.filter { meta, _cram, _crai -> meta.umi_aware == true } + ch_umi_cram_crai = ch_cram_crai.filter { meta, _cram, _crai -> (meta.fgumi_aware == true) || (meta.umi_aware == true) } emit: cram_crai = ch_cram_crai diff --git a/tests/inputs/test.yml b/tests/inputs/test.yml index f4a89c9e..051ac1ca 100644 --- a/tests/inputs/test.yml +++ b/tests/inputs/test.yml @@ -50,7 +50,7 @@ fastq_1: https://github.com/nf-cmgg/test-datasets/raw/main/data/genomics/homo_sapiens/illumina/fastq/test_R1.fastq.gz fastq_2: https://github.com/nf-cmgg/test-datasets/raw/main/data/genomics/homo_sapiens/illumina/fastq/test_R2.fastq.gz # UMI consensus (fgumi) inputs -# Example DNA sample with umi_aware enabled for fgumi processing. +# Example DNA sample with fgumi_aware enabled for fgumi processing. - id: UMI_consensus1 samplename: HT1080-chr20 library: test_library @@ -59,7 +59,7 @@ sample_type: DNA aligner: snap markdup: bamsormadup - umi_aware: true + fgumi_aware: true run_coverage: true fastq_1: https://github.com/nf-cmgg/test-datasets/raw/main/data/genomics/homo_sapiens/illumina/fastq/HT1080-chr20_S16_R1_001.fastq.gz fastq_2: https://github.com/nf-cmgg/test-datasets/raw/main/data/genomics/homo_sapiens/illumina/fastq/HT1080-chr20_S16_R2_001.fastq.gz From 1795ab6c9ff5b97a88fdc4439edccde7c577db10 Mon Sep 17 00:00:00 2001 From: freerkvandijk Date: Wed, 8 Apr 2026 09:42:26 +0200 Subject: [PATCH 03/25] Changed pipeline wide read-structures to meta values --- assets/schema_input.json | 12 ++++++++++++ assets/schema_sampleinfo.json | 12 ++++++++++++ conf/modules.config | 4 ++-- 3 files changed, 26 insertions(+), 2 deletions(-) diff --git a/assets/schema_input.json b/assets/schema_input.json index 48c52105..ea6d99e5 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -59,6 +59,18 @@ "description": "Enable UMI-aware consensus processing through the fgumi branch.", "default": false }, + "fgumi_read_structures": { + "meta": ["fgumi_read_structures"], + "type": "string", + "description": "Read structures passed to fgumi extract for this sample.", + "default": null + }, + "fgumi_extract_umis_from_read_names": { + "meta": ["fgumi_extract_umis_from_read_names"], + "type": "boolean", + "description": "Override fgumi extraction from read names for this sample.", + "default": null + }, "skip_trimming": { "meta": ["skip_trimming"], "type": "boolean", diff --git a/assets/schema_sampleinfo.json b/assets/schema_sampleinfo.json index aaae3b10..d19bb1c4 100644 --- a/assets/schema_sampleinfo.json +++ b/assets/schema_sampleinfo.json @@ -99,6 +99,18 @@ "description": "Enable UMI-aware consensus processing through the fgumi branch.", "default": false }, + "fgumi_read_structures": { + "meta": ["fgumi_read_structures"], + "type": "string", + "description": "Read structures passed to fgumi extract for this sample.", + "default": null + }, + "fgumi_extract_umis_from_read_names": { + "meta": ["fgumi_extract_umis_from_read_names"], + "type": "boolean", + "description": "Override fgumi extraction from read names for this sample.", + "default": null + }, "skip_trimming": { "meta": ["skip_trimming"], "type": "boolean", diff --git a/conf/modules.config b/conf/modules.config index af3612d1..97b99b28 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -238,8 +238,8 @@ process { ext.prefix = { "${meta.id}.fgumi.unmapped" } ext.args = { [ - "--read-structures ${params.fgumi_read_structures}", - params.fgumi_extract_umis_from_read_names ? "--extract-umis-from-read-names" : "", + "--read-structures ${meta.fgumi_read_structures ?: params.fgumi_read_structures}", + ((meta.fgumi_extract_umis_from_read_names != null ? meta.fgumi_extract_umis_from_read_names : params.fgumi_extract_umis_from_read_names) ? "--extract-umis-from-read-names" : ""), "--threads ${task.cpus}", "--queue-memory ${params.fgumi_queue_memory}", params.fgumi_queue_memory_per_thread ? "--queue-memory-per-thread" : "", From 9ea17ffe05a1ba51e014a9594ea662975c8d9aef Mon Sep 17 00:00:00 2001 From: freerkvandijk Date: Wed, 8 Apr 2026 09:46:36 +0200 Subject: [PATCH 04/25] Added usage of default SNAP parameters in config --- conf/modules.config | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/conf/modules.config b/conf/modules.config index 97b99b28..772492ff 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -255,7 +255,14 @@ process { ext.prefix = { "${meta.id}.fgumi" } ext.args = { [ + "-b-", + "-sm 20", params.fgumi_snap_ignore_mismatched_pairs ? "-I" : "", + "-hc-", + "-S id", + "-sa", + "-xf 2", + meta.readgroup ? "-R \"@RG\\t" + meta.readgroup.findResults { rg -> rg.value?.trim() ? "${rg.key}:${rg.value}" : null }.join("\\t") + "\"" : "", "${params.fgumi_snap_extra_args}", ].join(" ").trim() } From f2054802365faede2f336b1e1d331d9a8a7b5635 Mon Sep 17 00:00:00 2001 From: freerkvandijk Date: Wed, 8 Apr 2026 09:52:21 +0200 Subject: [PATCH 05/25] Process memory now handled per nf-core style --- conf/modules.config | 15 +++------------ modules/local/fgumi/group/main.nf | 12 ++++++++++++ 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 772492ff..a34e3166 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -297,18 +297,9 @@ process { cpus = 8 memory = 32.GB ext.prefix = { "${meta.id}.fgumi.group" } - ext.args = { - [ - "--strategy ${params.fgumi_group_strategy}", - "--edits ${params.fgumi_group_edits}", - "--threads ${task.cpus}", - "--queue-memory ${params.fgumi_queue_memory}", - params.fgumi_queue_memory_per_thread ? "--queue-memory-per-thread" : "", - "--compression-level ${params.fgumi_compression_level}", - "--grouping-metrics ${meta.id}.fgumi.group.grouping_metrics.txt", - "--family-size-histogram ${meta.id}.fgumi.group.family_size_histogram.txt", - ].join(" ").trim() - } + ext.strategy = { params.fgumi_group_strategy } + ext.edits = { params.fgumi_group_edits } + ext.compression_level = { params.fgumi_compression_level } } //// FGUMI simplex (step 5) diff --git a/modules/local/fgumi/group/main.nf b/modules/local/fgumi/group/main.nf index 6779351f..cecc9d5d 100644 --- a/modules/local/fgumi/group/main.nf +++ b/modules/local/fgumi/group/main.nf @@ -20,12 +20,24 @@ process FGUMI_GROUP { script: def args = task.ext.args ?: '' + def strategy = task.ext.strategy ?: 'adjacency' + def edits = task.ext.edits != null ? task.ext.edits : 1 + def compression_level = task.ext.compression_level != null ? task.ext.compression_level : 1 + def queue_memory_mb = (task.memory.mega / task.cpus * 0.75).intValue() prefix = task.ext.prefix ?: "${meta.id}.fgumi.group" """ fgumi group \ --input ${bam} \ --output ${prefix}.bam \ + --strategy ${strategy} \ + --edits ${edits} \ + --threads ${task.cpus} \ + --queue-memory ${queue_memory_mb} \ + --queue-memory-per-thread \ + --compression-level ${compression_level} \ + --grouping-metrics ${prefix}.grouping_metrics.txt \ + --family-size-histogram ${prefix}.family_size_histogram.txt \ ${args} """ From b2c80a7266cfac03f39b54d368b46dc29a59e716 Mon Sep 17 00:00:00 2001 From: freerkvandijk Date: Wed, 8 Apr 2026 10:00:00 +0200 Subject: [PATCH 06/25] Changes to fgumi extract to make it portable (nf-core) --- modules/local/fgumi/extract/main.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/local/fgumi/extract/main.nf b/modules/local/fgumi/extract/main.nf index bd4294fc..b855ef5d 100644 --- a/modules/local/fgumi/extract/main.nf +++ b/modules/local/fgumi/extract/main.nf @@ -19,8 +19,8 @@ process FGUMI_EXTRACT { script: def args = task.ext.args ?: '' prefix = task.ext.prefix ?: "${meta.id}.fgumi.unmapped" - def sample_name = meta.samplename ?: meta.id - def library_name = meta.library ?: meta.id + def sample_name = task.ext.sample_name ?: meta.id + def library_name = task.ext.library_name ?: meta.id def input_files = (reads instanceof List ? reads : [reads]).collect { read -> "${read}" }.join(' ') """ From ac9c547dc7f7bbffd26f71f03088a0fead5b3fa4 Mon Sep 17 00:00:00 2001 From: freerkvandijk Date: Wed, 8 Apr 2026 10:02:34 +0200 Subject: [PATCH 07/25] List input instead of own conversion --- modules/local/fgumi/extract/main.nf | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/modules/local/fgumi/extract/main.nf b/modules/local/fgumi/extract/main.nf index b855ef5d..9b341def 100644 --- a/modules/local/fgumi/extract/main.nf +++ b/modules/local/fgumi/extract/main.nf @@ -21,11 +21,10 @@ process FGUMI_EXTRACT { prefix = task.ext.prefix ?: "${meta.id}.fgumi.unmapped" def sample_name = task.ext.sample_name ?: meta.id def library_name = task.ext.library_name ?: meta.id - def input_files = (reads instanceof List ? reads : [reads]).collect { read -> "${read}" }.join(' ') """ fgumi extract \ - --inputs ${input_files} \ + --inputs ${reads} \ --output ${prefix}.bam \ --sample "${sample_name}" \ --library "${library_name}" \ From 05ee64540d5c1e9e06218361e32566ef57fd3ae0 Mon Sep 17 00:00:00 2001 From: freerkvandijk Date: Wed, 8 Apr 2026 10:09:26 +0200 Subject: [PATCH 08/25] Split FGUMI_FILTER into seperate filter and sort module and updated tests --- conf/modules.config | 9 ++- modules/local/fgumi/filter/main.nf | 14 +---- modules/local/fgumi/sort/main.nf | 39 ++++++++++++ .../local/fastq_to_aligned_cram/main.nf | 9 ++- tests/modules/local/fgumi/filter/main.nf.test | 2 +- .../local/fgumi/filter/main.nf.test.snap | 24 ++------ tests/modules/local/fgumi/sort/main.nf.test | 32 ++++++++++ .../local/fgumi/sort/main.nf.test.snap | 59 +++++++++++++++++++ 8 files changed, 152 insertions(+), 36 deletions(-) create mode 100644 modules/local/fgumi/sort/main.nf create mode 100644 tests/modules/local/fgumi/sort/main.nf.test create mode 100644 tests/modules/local/fgumi/sort/main.nf.test.snap diff --git a/conf/modules.config b/conf/modules.config index a34e3166..dc512604 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -338,7 +338,14 @@ process { "--stats ${meta.id}.fgumi.filter.filtering_metrics.txt", ].join(" ").trim() } - ext.args2 = { + } + + //// FGUMI coordinate sort/index after filter (step 7) + withName: '.*FASTQ_TO_CRAM:FGUMI_SORT' { + cpus = 8 + memory = 32.GB + ext.prefix = { "${meta.id}.fgumi.filter" } + ext.args = { [ "--threads ${task.cpus}", "--max-memory ${params.fgumi_sort_max_memory}", diff --git a/modules/local/fgumi/filter/main.nf b/modules/local/fgumi/filter/main.nf index c35bd7cc..b63752da 100644 --- a/modules/local/fgumi/filter/main.nf +++ b/modules/local/fgumi/filter/main.nf @@ -10,8 +10,7 @@ process FGUMI_FILTER { tuple val(meta), path(bam), path(fasta) output: - tuple val(meta), path("${prefix}.bam"), emit: bam - tuple val(meta), path("${prefix}.bam.bai"), emit: bai + tuple val(meta), path("${prefix}.filtered.bam"), emit: bam tuple val(meta), path("${prefix}.filtering_metrics.txt"), optional: true, emit: filtering_metrics tuple val("${task.process}"), val('fgumi'), eval("fgumi --version | sed 's/^fgumi //;q'"), topic: versions, emit: versions_fgumi @@ -20,7 +19,6 @@ process FGUMI_FILTER { script: def args = task.ext.args ?: '' - def sort_args = task.ext.args2 ?: '' prefix = task.ext.prefix ?: "${meta.id}.fgumi.filter" """ @@ -29,20 +27,12 @@ process FGUMI_FILTER { --output ${prefix}.filtered.bam \ --ref ${fasta} \ ${args} - - fgumi sort \ - --input ${prefix}.filtered.bam \ - --output ${prefix}.bam \ - --order coordinate \ - --write-index \ - ${sort_args} """ stub: prefix = task.ext.prefix ?: "${meta.id}.fgumi.filter" """ - touch ${prefix}.bam - touch ${prefix}.bam.bai + touch ${prefix}.filtered.bam touch ${prefix}.filtering_metrics.txt """ } diff --git a/modules/local/fgumi/sort/main.nf b/modules/local/fgumi/sort/main.nf new file mode 100644 index 00000000..a1b8de7c --- /dev/null +++ b/modules/local/fgumi/sort/main.nf @@ -0,0 +1,39 @@ +process FGUMI_SORT { + tag "$meta.id" + label 'process_medium' + + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/95/954170443a820787c9e02ef2135ebb8ec29c6b03633b0d61b5fafa98c59a1cce/data' + : 'community.wave.seqera.io/library/fgumi_r-base_r-ggplot2_r-scales:09c99070b82c1c28'}" + + input: + tuple val(meta), path(bam) + + output: + tuple val(meta), path("${prefix}.bam"), emit: bam + tuple val(meta), path("${prefix}.bam.bai"), emit: bai + tuple val("${task.process}"), val('fgumi'), eval("fgumi --version | sed 's/^fgumi //;q'"), topic: versions, emit: versions_fgumi + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}.fgumi.filter" + + """ + fgumi sort \ + --input ${bam} \ + --output ${prefix}.bam \ + --order coordinate \ + --write-index \ + ${args} + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}.fgumi.filter" + """ + touch ${prefix}.bam + touch ${prefix}.bam.bai + """ +} diff --git a/subworkflows/local/fastq_to_aligned_cram/main.nf b/subworkflows/local/fastq_to_aligned_cram/main.nf index 0d82c3ef..46f4b843 100644 --- a/subworkflows/local/fastq_to_aligned_cram/main.nf +++ b/subworkflows/local/fastq_to_aligned_cram/main.nf @@ -12,6 +12,7 @@ include { FGUMI_FILTER } from "../../../modules/local/fgumi/filter/main include { FGUMI_GROUP } from "../../../modules/local/fgumi/group/main.nf" include { FGUMI_SIMPLEX } from "../../../modules/local/fgumi/simplex/main.nf" include { FGUMI_SNAP_ZIPPER_SORT } from "../../../modules/local/fgumi/snapzippersort/main.nf" +include { FGUMI_SORT } from "../../../modules/local/fgumi/sort/main.nf" include { SAMTOOLS_CONVERT } from "../../../modules/nf-core/samtools/convert/main" include { SAMTOOLS_SORMADUP } from "../../../modules/nf-core/samtools/sormadup/main.nf" include { SAMTOOLS_SORT } from "../../../modules/nf-core/samtools/sort/main" @@ -101,6 +102,10 @@ workflow FASTQ_TO_CRAM { .map { meta, bam, fasta -> [meta, bam, fasta] } ) + FGUMI_SORT( + FGUMI_FILTER.out.bam + ) + FASTQ_ALIGN_RNA( ch_meta_reads_aligner_index_fasta_datatype.rna ) @@ -155,7 +160,7 @@ workflow FASTQ_TO_CRAM { // UMI branch outputs are mixed into the common markdup/metrics streams. ch_markdup_index = ch_markdup_index.mix( - FGUMI_FILTER.out.bam.join(FGUMI_FILTER.out.bai, failOnMismatch: true, failOnDuplicate: true) + FGUMI_SORT.out.bam.join(FGUMI_SORT.out.bai, failOnMismatch: true, failOnDuplicate: true) ) ch_sormadup_metrics = ch_sormadup_metrics.mix(FGUMI_GROUP.out.grouping_metrics) ch_sormadup_metrics = ch_sormadup_metrics.mix(FGUMI_GROUP.out.family_size_histogram) @@ -163,7 +168,7 @@ workflow FASTQ_TO_CRAM { ch_sormadup_metrics = ch_sormadup_metrics.mix(FGUMI_FILTER.out.filtering_metrics) ch_duplex_metrics = FGUMI_DUPLEX_METRICS.out.duplex_metrics ch_family_size_histogram = FGUMI_GROUP.out.family_size_histogram - ch_filtered_consensus_bam = FGUMI_FILTER.out.bam + ch_filtered_consensus_bam = FGUMI_SORT.out.bam // BIOBAMBAM_BAMSORMADUP([meta, [bam, bam]], fasta, fai) BIOBAMBAM_BAMSORMADUP(ch_bam_fasta.bamsormadup) diff --git a/tests/modules/local/fgumi/filter/main.nf.test b/tests/modules/local/fgumi/filter/main.nf.test index 9c068a67..bec1f2b3 100644 --- a/tests/modules/local/fgumi/filter/main.nf.test +++ b/tests/modules/local/fgumi/filter/main.nf.test @@ -10,7 +10,7 @@ nextflow_process { tag "modules/local/fgumi/filter" test("test - stub") { - // Stub-mode contract test: verifies filtered BAM/index and metrics outputs. + // Stub-mode contract test: verifies filtered BAM and metrics outputs. options "-stub" when { diff --git a/tests/modules/local/fgumi/filter/main.nf.test.snap b/tests/modules/local/fgumi/filter/main.nf.test.snap index 28c1a053..7f59c001 100644 --- a/tests/modules/local/fgumi/filter/main.nf.test.snap +++ b/tests/modules/local/fgumi/filter/main.nf.test.snap @@ -7,18 +7,10 @@ { "id": "test" }, - "test.fgumi.filter.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + "test.fgumi.filter.filtered.bam:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "1": [ - [ - { - "id": "test" - }, - "test.fgumi.filter.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "2": [ [ { "id": "test" @@ -26,27 +18,19 @@ "test.fgumi.filter.filtering_metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "3": [ + "2": [ [ "FGUMI_FILTER", "fgumi", "0.1.2" ] ], - "bai": [ - [ - { - "id": "test" - }, - "test.fgumi.filter.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], "bam": [ [ { "id": "test" }, - "test.fgumi.filter.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + "test.fgumi.filter.filtered.bam:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "filtering_metrics": [ @@ -70,6 +54,6 @@ "nf-test": "0.9.2", "nextflow": "25.10.4" }, - "timestamp": "2026-04-02T16:05:00.831097499" + "timestamp": "2026-04-08T10:07:36.721154372" } } \ No newline at end of file diff --git a/tests/modules/local/fgumi/sort/main.nf.test b/tests/modules/local/fgumi/sort/main.nf.test new file mode 100644 index 00000000..7879f414 --- /dev/null +++ b/tests/modules/local/fgumi/sort/main.nf.test @@ -0,0 +1,32 @@ +nextflow_process { + + name "Test Process FGUMI_SORT" + script "modules/local/fgumi/sort/main.nf" + process "FGUMI_SORT" + + tag "modules" + tag "modules/local" + tag "modules/local/fgumi" + tag "modules/local/fgumi/sort" + + test("test - stub") { + // Stub-mode contract test: verifies coordinate-sorted BAM/index outputs. + options "-stub" + + when { + process { + """ + input[0] = [ + [id: "test"], + file("${projectDir}/tests/inputs/fgumi/consensus.bam", checkIfExists: true) + ] + """ + } + } + + then { + assert process.success + assert snapshot(process.out).match() + } + } +} diff --git a/tests/modules/local/fgumi/sort/main.nf.test.snap b/tests/modules/local/fgumi/sort/main.nf.test.snap new file mode 100644 index 00000000..c597cb17 --- /dev/null +++ b/tests/modules/local/fgumi/sort/main.nf.test.snap @@ -0,0 +1,59 @@ +{ + "test - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.fgumi.filter.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.fgumi.filter.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + "FGUMI_SORT", + "fgumi", + "0.1.2" + ] + ], + "bai": [ + [ + { + "id": "test" + }, + "test.fgumi.filter.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam": [ + [ + { + "id": "test" + }, + "test.fgumi.filter.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_fgumi": [ + [ + "FGUMI_SORT", + "fgumi", + "0.1.2" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.4" + }, + "timestamp": "2026-04-08T10:07:49.850827528" + } +} \ No newline at end of file From cfdaac0981780d298957d4b1fc3c5cf1ea217a7b Mon Sep 17 00:00:00 2001 From: freerkvandijk Date: Wed, 8 Apr 2026 10:12:16 +0200 Subject: [PATCH 09/25] decoupled umi_aware and fgumi_aware --- .../local/fastq_to_aligned_cram/main.nf | 4 ++-- .../fgumi_umi_stub.nf.test | 2 +- .../fgumi_umi_stub.nf.test.snap | 22 +++++++++---------- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/subworkflows/local/fastq_to_aligned_cram/main.nf b/subworkflows/local/fastq_to_aligned_cram/main.nf index 46f4b843..32e73584 100644 --- a/subworkflows/local/fastq_to_aligned_cram/main.nf +++ b/subworkflows/local/fastq_to_aligned_cram/main.nf @@ -51,7 +51,7 @@ workflow FASTQ_TO_CRAM { ch_meta_reads_aligner_index_fasta_datatype.dna .branch { meta, reads, aligner, index, fasta -> - umi: (meta.fgumi_aware == true) || (meta.umi_aware == true) + umi: meta.fgumi_aware == true return [meta, reads, aligner, index, fasta] non_umi: true return [meta, reads, aligner, index, fasta] @@ -218,7 +218,7 @@ workflow FASTQ_TO_CRAM { ch_cram_crai.dump(tag: "FASTQ_TO_CRAM: cram and crai", pretty: true) // Keep a dedicated channel for UMI-aware sample CRAM outputs. - ch_umi_cram_crai = ch_cram_crai.filter { meta, _cram, _crai -> (meta.fgumi_aware == true) || (meta.umi_aware == true) } + ch_umi_cram_crai = ch_cram_crai.filter { meta, _cram, _crai -> meta.fgumi_aware == true } emit: cram_crai = ch_cram_crai diff --git a/tests/subworkflows/local/fastq_to_aligned_cram/fgumi_umi_stub.nf.test b/tests/subworkflows/local/fastq_to_aligned_cram/fgumi_umi_stub.nf.test index 5f192e63..8a724b0d 100644 --- a/tests/subworkflows/local/fastq_to_aligned_cram/fgumi_umi_stub.nf.test +++ b/tests/subworkflows/local/fastq_to_aligned_cram/fgumi_umi_stub.nf.test @@ -22,7 +22,7 @@ nextflow_workflow { single_end: false, sample_type: "DNA", markdup: "bamsormadup", - umi_aware: true, + fgumi_aware: true, genome_data: [ fasta: "${projectDir}/tests/inputs/fgumi/ref.fa", fai: "${projectDir}/tests/inputs/fgumi/ref.fa.fai", diff --git a/tests/subworkflows/local/fastq_to_aligned_cram/fgumi_umi_stub.nf.test.snap b/tests/subworkflows/local/fastq_to_aligned_cram/fgumi_umi_stub.nf.test.snap index ceffdb37..491cd976 100644 --- a/tests/subworkflows/local/fastq_to_aligned_cram/fgumi_umi_stub.nf.test.snap +++ b/tests/subworkflows/local/fastq_to_aligned_cram/fgumi_umi_stub.nf.test.snap @@ -8,6 +8,7 @@ "cram_crai": [ [ { + "fgumi_aware": true, "genome_data": { "dict": "ref.dict", "fai": "ref.fa.fai", @@ -18,8 +19,7 @@ "markdup": "bamsormadup", "sample_type": "DNA", "samplename": "HT1080_chr20", - "single_end": false, - "umi_aware": true + "single_end": false }, "UMI_consensus1.cram", "UMI_consensus1.cram.crai" @@ -33,7 +33,7 @@ "single_end": false, "sample_type": "DNA", "markdup": "bamsormadup", - "umi_aware": true, + "fgumi_aware": true, "genome_data": { "fasta": "ref.fa:md5,78724096432e1b2702881f0126c656f2", "fai": "ref.fa.fai:md5,fdd09c27f9ecf4cc7e824fd8407b72b7", @@ -54,7 +54,7 @@ "single_end": false, "sample_type": "DNA", "markdup": "bamsormadup", - "umi_aware": true, + "fgumi_aware": true, "genome_data": { "fasta": "ref.fa:md5,78724096432e1b2702881f0126c656f2", "fai": "ref.fa.fai:md5,fdd09c27f9ecf4cc7e824fd8407b72b7", @@ -75,7 +75,7 @@ "single_end": false, "sample_type": "DNA", "markdup": "bamsormadup", - "umi_aware": true, + "fgumi_aware": true, "genome_data": { "fasta": "ref.fa:md5,78724096432e1b2702881f0126c656f2", "fai": "ref.fa.fai:md5,fdd09c27f9ecf4cc7e824fd8407b72b7", @@ -102,7 +102,7 @@ "single_end": false, "sample_type": "DNA", "markdup": "bamsormadup", - "umi_aware": true, + "fgumi_aware": true, "genome_data": { "fasta": "ref.fa:md5,78724096432e1b2702881f0126c656f2", "fai": "ref.fa.fai:md5,fdd09c27f9ecf4cc7e824fd8407b72b7", @@ -121,7 +121,7 @@ "single_end": false, "sample_type": "DNA", "markdup": "bamsormadup", - "umi_aware": true, + "fgumi_aware": true, "genome_data": { "fasta": "ref.fa:md5,78724096432e1b2702881f0126c656f2", "fai": "ref.fa.fai:md5,fdd09c27f9ecf4cc7e824fd8407b72b7", @@ -140,7 +140,7 @@ "single_end": false, "sample_type": "DNA", "markdup": "bamsormadup", - "umi_aware": true, + "fgumi_aware": true, "genome_data": { "fasta": "ref.fa:md5,78724096432e1b2702881f0126c656f2", "fai": "ref.fa.fai:md5,fdd09c27f9ecf4cc7e824fd8407b72b7", @@ -159,7 +159,7 @@ "single_end": false, "sample_type": "DNA", "markdup": "bamsormadup", - "umi_aware": true, + "fgumi_aware": true, "genome_data": { "fasta": "ref.fa:md5,78724096432e1b2702881f0126c656f2", "fai": "ref.fa.fai:md5,fdd09c27f9ecf4cc7e824fd8407b72b7", @@ -180,7 +180,7 @@ "single_end": false, "sample_type": "DNA", "markdup": "bamsormadup", - "umi_aware": true, + "fgumi_aware": true, "genome_data": { "fasta": "ref.fa:md5,78724096432e1b2702881f0126c656f2", "fai": "ref.fa.fai:md5,fdd09c27f9ecf4cc7e824fd8407b72b7", @@ -200,6 +200,6 @@ "nf-test": "0.9.2", "nextflow": "25.10.4" }, - "timestamp": "2026-04-07T14:33:07.208059007" + "timestamp": "2026-04-08T10:11:21.73767029" } } \ No newline at end of file From 31f924d424c92573da3d3f784c81ffeba39495eb Mon Sep 17 00:00:00 2001 From: freerkvandijk Date: Wed, 8 Apr 2026 10:18:02 +0200 Subject: [PATCH 10/25] Refactor: fgumi modules to dedicated umi_consensus subworkflow --- .../local/fastq_to_aligned_cram/main.nf | 63 +--- subworkflows/local/umi_consensus/main.nf | 69 ++++ .../local/umi_consensus/main.nf.test | 56 ++++ .../local/umi_consensus/main.nf.test.snap | 309 ++++++++++++++++++ 4 files changed, 444 insertions(+), 53 deletions(-) create mode 100644 subworkflows/local/umi_consensus/main.nf create mode 100644 tests/subworkflows/local/umi_consensus/main.nf.test create mode 100644 tests/subworkflows/local/umi_consensus/main.nf.test.snap diff --git a/subworkflows/local/fastq_to_aligned_cram/main.nf b/subworkflows/local/fastq_to_aligned_cram/main.nf index 32e73584..07483426 100644 --- a/subworkflows/local/fastq_to_aligned_cram/main.nf +++ b/subworkflows/local/fastq_to_aligned_cram/main.nf @@ -6,13 +6,6 @@ // MODULES include { BIOBAMBAM_BAMSORMADUP } from "../../../modules/nf-core/biobambam/bamsormadup/main.nf" -include { FGUMI_DUPLEX_METRICS } from "../../../modules/local/fgumi/duplexmetrics/main.nf" -include { FGUMI_EXTRACT } from "../../../modules/local/fgumi/extract/main.nf" -include { FGUMI_FILTER } from "../../../modules/local/fgumi/filter/main.nf" -include { FGUMI_GROUP } from "../../../modules/local/fgumi/group/main.nf" -include { FGUMI_SIMPLEX } from "../../../modules/local/fgumi/simplex/main.nf" -include { FGUMI_SNAP_ZIPPER_SORT } from "../../../modules/local/fgumi/snapzippersort/main.nf" -include { FGUMI_SORT } from "../../../modules/local/fgumi/sort/main.nf" include { SAMTOOLS_CONVERT } from "../../../modules/nf-core/samtools/convert/main" include { SAMTOOLS_SORMADUP } from "../../../modules/nf-core/samtools/sormadup/main.nf" include { SAMTOOLS_SORT } from "../../../modules/nf-core/samtools/sort/main" @@ -20,6 +13,7 @@ include { SAMTOOLS_SORT } from "../../../modules/nf-core/samtools/sort/m // SUBWORKFLOWS include { FASTQ_ALIGN_DNA } from '../../nf-core/fastq_align_dna/main' include { FASTQ_ALIGN_RNA } from '../../local/fastq_align_rna/main' +include { UMI_CONSENSUS_FGUMI } from '../../local/umi_consensus/main.nf' // FUNCTIONS include { getGenomeAttribute } from '../../local/utils_nfcore_preprocessing_pipeline' @@ -65,45 +59,8 @@ workflow FASTQ_TO_CRAM { ) // UMI-aware fgumi branch (steps 1, 3, 4, 5, 6, 7 in fgumi Basic Workflow) - FGUMI_EXTRACT( + UMI_CONSENSUS_FGUMI( ch_dna_to_align.umi - .map { meta, reads, _aligner, _index, _fasta -> [meta, reads] } - ) - - FGUMI_SNAP_ZIPPER_SORT( - FGUMI_EXTRACT.out.bam - .join( - ch_dna_to_align.umi.map { meta, _reads, _aligner, _index, fasta -> - [meta, getGenomeAttribute(meta.genome_data, 'snap'), fasta, getGenomeAttribute(meta.genome_data, 'dict')] - }, - by: 0, - ) - .map { meta, unmapped_bam, snap_index, fasta, dict -> [meta, unmapped_bam, snap_index, fasta, dict] } - ) - - FGUMI_GROUP( - FGUMI_SNAP_ZIPPER_SORT.out.bam - ) - - FGUMI_SIMPLEX( - FGUMI_GROUP.out.bam - ) - - FGUMI_DUPLEX_METRICS( - FGUMI_GROUP.out.bam - ) - - FGUMI_FILTER( - FGUMI_SIMPLEX.out.bam - .join( - ch_dna_to_align.umi.map { meta, _reads, _aligner, _index, fasta -> [meta, fasta] }, - by: 0, - ) - .map { meta, bam, fasta -> [meta, bam, fasta] } - ) - - FGUMI_SORT( - FGUMI_FILTER.out.bam ) FASTQ_ALIGN_RNA( @@ -160,15 +117,15 @@ workflow FASTQ_TO_CRAM { // UMI branch outputs are mixed into the common markdup/metrics streams. ch_markdup_index = ch_markdup_index.mix( - FGUMI_SORT.out.bam.join(FGUMI_SORT.out.bai, failOnMismatch: true, failOnDuplicate: true) + UMI_CONSENSUS_FGUMI.out.bam_bai ) - ch_sormadup_metrics = ch_sormadup_metrics.mix(FGUMI_GROUP.out.grouping_metrics) - ch_sormadup_metrics = ch_sormadup_metrics.mix(FGUMI_GROUP.out.family_size_histogram) - ch_sormadup_metrics = ch_sormadup_metrics.mix(FGUMI_SIMPLEX.out.consensus_metrics) - ch_sormadup_metrics = ch_sormadup_metrics.mix(FGUMI_FILTER.out.filtering_metrics) - ch_duplex_metrics = FGUMI_DUPLEX_METRICS.out.duplex_metrics - ch_family_size_histogram = FGUMI_GROUP.out.family_size_histogram - ch_filtered_consensus_bam = FGUMI_SORT.out.bam + ch_sormadup_metrics = ch_sormadup_metrics.mix(UMI_CONSENSUS_FGUMI.out.grouping_metrics) + ch_sormadup_metrics = ch_sormadup_metrics.mix(UMI_CONSENSUS_FGUMI.out.family_size_histogram) + ch_sormadup_metrics = ch_sormadup_metrics.mix(UMI_CONSENSUS_FGUMI.out.consensus_metrics) + ch_sormadup_metrics = ch_sormadup_metrics.mix(UMI_CONSENSUS_FGUMI.out.filtering_metrics) + ch_duplex_metrics = UMI_CONSENSUS_FGUMI.out.duplex_metrics + ch_family_size_histogram = UMI_CONSENSUS_FGUMI.out.family_size_histogram + ch_filtered_consensus_bam = UMI_CONSENSUS_FGUMI.out.filtered_consensus_bam // BIOBAMBAM_BAMSORMADUP([meta, [bam, bam]], fasta, fai) BIOBAMBAM_BAMSORMADUP(ch_bam_fasta.bamsormadup) diff --git a/subworkflows/local/umi_consensus/main.nf b/subworkflows/local/umi_consensus/main.nf new file mode 100644 index 00000000..a29d1920 --- /dev/null +++ b/subworkflows/local/umi_consensus/main.nf @@ -0,0 +1,69 @@ +#!/usr/bin/env nextflow + +// MODULES +include { FGUMI_DUPLEX_METRICS } from "../../../modules/local/fgumi/duplexmetrics/main.nf" +include { FGUMI_EXTRACT } from "../../../modules/local/fgumi/extract/main.nf" +include { FGUMI_FILTER } from "../../../modules/local/fgumi/filter/main.nf" +include { FGUMI_GROUP } from "../../../modules/local/fgumi/group/main.nf" +include { FGUMI_SIMPLEX } from "../../../modules/local/fgumi/simplex/main.nf" +include { FGUMI_SNAP_ZIPPER_SORT } from "../../../modules/local/fgumi/snapzippersort/main.nf" +include { FGUMI_SORT } from "../../../modules/local/fgumi/sort/main.nf" + +// FUNCTIONS +include { getGenomeAttribute } from '../../local/utils_nfcore_preprocessing_pipeline' + +workflow UMI_CONSENSUS_FGUMI { + take: + ch_meta_reads_aligner_index_fasta // channel: [mandatory] [meta, reads, aligner, index, fasta] + + main: + FGUMI_EXTRACT( + ch_meta_reads_aligner_index_fasta + .map { meta, reads, _aligner, _index, _fasta -> [meta, reads] } + ) + + FGUMI_SNAP_ZIPPER_SORT( + FGUMI_EXTRACT.out.bam + .join( + ch_meta_reads_aligner_index_fasta.map { meta, _reads, _aligner, _index, fasta -> + [meta, getGenomeAttribute(meta.genome_data, 'snap'), fasta, getGenomeAttribute(meta.genome_data, 'dict')] + }, + by: 0, + ) + .map { meta, unmapped_bam, snap_index, fasta, dict -> [meta, unmapped_bam, snap_index, fasta, dict] } + ) + + FGUMI_GROUP( + FGUMI_SNAP_ZIPPER_SORT.out.bam + ) + + FGUMI_SIMPLEX( + FGUMI_GROUP.out.bam + ) + + FGUMI_DUPLEX_METRICS( + FGUMI_GROUP.out.bam + ) + + FGUMI_FILTER( + FGUMI_SIMPLEX.out.bam + .join( + ch_meta_reads_aligner_index_fasta.map { meta, _reads, _aligner, _index, fasta -> [meta, fasta] }, + by: 0, + ) + .map { meta, bam, fasta -> [meta, bam, fasta] } + ) + + FGUMI_SORT( + FGUMI_FILTER.out.bam + ) + + emit: + bam_bai = FGUMI_SORT.out.bam.join(FGUMI_SORT.out.bai, failOnMismatch: true, failOnDuplicate: true) + grouping_metrics = FGUMI_GROUP.out.grouping_metrics + family_size_histogram = FGUMI_GROUP.out.family_size_histogram + consensus_metrics = FGUMI_SIMPLEX.out.consensus_metrics + filtering_metrics = FGUMI_FILTER.out.filtering_metrics + duplex_metrics = FGUMI_DUPLEX_METRICS.out.duplex_metrics + filtered_consensus_bam = FGUMI_SORT.out.bam +} diff --git a/tests/subworkflows/local/umi_consensus/main.nf.test b/tests/subworkflows/local/umi_consensus/main.nf.test new file mode 100644 index 00000000..a365bb28 --- /dev/null +++ b/tests/subworkflows/local/umi_consensus/main.nf.test @@ -0,0 +1,56 @@ +nextflow_workflow { + + name "Test Workflow UMI_CONSENSUS_FGUMI" + script "subworkflows/local/umi_consensus/main.nf" + workflow "UMI_CONSENSUS_FGUMI" + + tag "subworkflows" + tag "subworkflows/local" + tag "subworkflows/local/umi_consensus" + tag "fgumi" + + test("umi consensus fgumi - stub") { + options "-stub" + when { + workflow { + """ + input[0] = Channel.of([ + [ + id: "UMI_consensus1", + samplename: "HT1080_chr20", + single_end: false, + sample_type: "DNA", + markdup: "bamsormadup", + fgumi_aware: true, + genome_data: [ + fasta: "${projectDir}/tests/inputs/fgumi/ref.fa", + fai: "${projectDir}/tests/inputs/fgumi/ref.fa.fai", + dict: "${projectDir}/tests/inputs/fgumi/ref.dict", + snap: "${projectDir}/tests/inputs/fgumi/snap_index" + ] + ], + [ + file("${projectDir}/tests/inputs/fgumi/R1.fastq.gz", checkIfExists: true), + file("${projectDir}/tests/inputs/fgumi/R2.fastq.gz", checkIfExists: true) + ], + "snap", + file("${projectDir}/tests/inputs/fgumi/snap_index", checkIfExists: true), + file("${projectDir}/tests/inputs/fgumi/ref.fa", checkIfExists: true) + ]) + """ + } + } + + then { + assert workflow.success + assert workflow.out.bam_bai.size() == 1 + assert workflow.out.grouping_metrics.size() == 1 + assert workflow.out.family_size_histogram.size() == 1 + assert workflow.out.consensus_metrics.size() == 1 + assert workflow.out.filtering_metrics.size() == 1 + assert workflow.out.duplex_metrics.size() == 1 + assert workflow.out.filtered_consensus_bam.size() == 1 + assert snapshot(workflow.out).match() + } + } +} diff --git a/tests/subworkflows/local/umi_consensus/main.nf.test.snap b/tests/subworkflows/local/umi_consensus/main.nf.test.snap new file mode 100644 index 00000000..9fc3df12 --- /dev/null +++ b/tests/subworkflows/local/umi_consensus/main.nf.test.snap @@ -0,0 +1,309 @@ +{ + "umi consensus fgumi - stub": { + "content": [ + { + "0": [ + [ + { + "id": "UMI_consensus1", + "samplename": "HT1080_chr20", + "single_end": false, + "sample_type": "DNA", + "markdup": "bamsormadup", + "fgumi_aware": true, + "genome_data": { + "fasta": "ref.fa:md5,78724096432e1b2702881f0126c656f2", + "fai": "ref.fa.fai:md5,fdd09c27f9ecf4cc7e824fd8407b72b7", + "dict": "ref.dict:md5,759dea0d5194dec0cd4f3e02fadf6e83", + "snap": [ + "OverflowTable.txt:md5,178164f81917b8e87073295a635588de" + ] + } + }, + "UMI_consensus1.fgumi.filter.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "UMI_consensus1.fgumi.filter.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "UMI_consensus1", + "samplename": "HT1080_chr20", + "single_end": false, + "sample_type": "DNA", + "markdup": "bamsormadup", + "fgumi_aware": true, + "genome_data": { + "fasta": "ref.fa:md5,78724096432e1b2702881f0126c656f2", + "fai": "ref.fa.fai:md5,fdd09c27f9ecf4cc7e824fd8407b72b7", + "dict": "ref.dict:md5,759dea0d5194dec0cd4f3e02fadf6e83", + "snap": [ + "OverflowTable.txt:md5,178164f81917b8e87073295a635588de" + ] + } + }, + "UMI_consensus1.fgumi.group.grouping_metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "UMI_consensus1", + "samplename": "HT1080_chr20", + "single_end": false, + "sample_type": "DNA", + "markdup": "bamsormadup", + "fgumi_aware": true, + "genome_data": { + "fasta": "ref.fa:md5,78724096432e1b2702881f0126c656f2", + "fai": "ref.fa.fai:md5,fdd09c27f9ecf4cc7e824fd8407b72b7", + "dict": "ref.dict:md5,759dea0d5194dec0cd4f3e02fadf6e83", + "snap": [ + "OverflowTable.txt:md5,178164f81917b8e87073295a635588de" + ] + } + }, + "UMI_consensus1.fgumi.group.family_size_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "UMI_consensus1", + "samplename": "HT1080_chr20", + "single_end": false, + "sample_type": "DNA", + "markdup": "bamsormadup", + "fgumi_aware": true, + "genome_data": { + "fasta": "ref.fa:md5,78724096432e1b2702881f0126c656f2", + "fai": "ref.fa.fai:md5,fdd09c27f9ecf4cc7e824fd8407b72b7", + "dict": "ref.dict:md5,759dea0d5194dec0cd4f3e02fadf6e83", + "snap": [ + "OverflowTable.txt:md5,178164f81917b8e87073295a635588de" + ] + } + }, + "UMI_consensus1.fgumi.simplex.consensus_metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + { + "id": "UMI_consensus1", + "samplename": "HT1080_chr20", + "single_end": false, + "sample_type": "DNA", + "markdup": "bamsormadup", + "fgumi_aware": true, + "genome_data": { + "fasta": "ref.fa:md5,78724096432e1b2702881f0126c656f2", + "fai": "ref.fa.fai:md5,fdd09c27f9ecf4cc7e824fd8407b72b7", + "dict": "ref.dict:md5,759dea0d5194dec0cd4f3e02fadf6e83", + "snap": [ + "OverflowTable.txt:md5,178164f81917b8e87073295a635588de" + ] + } + }, + "UMI_consensus1.fgumi.filter.filtering_metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "5": [ + [ + { + "id": "UMI_consensus1", + "samplename": "HT1080_chr20", + "single_end": false, + "sample_type": "DNA", + "markdup": "bamsormadup", + "fgumi_aware": true, + "genome_data": { + "fasta": "ref.fa:md5,78724096432e1b2702881f0126c656f2", + "fai": "ref.fa.fai:md5,fdd09c27f9ecf4cc7e824fd8407b72b7", + "dict": "ref.dict:md5,759dea0d5194dec0cd4f3e02fadf6e83", + "snap": [ + "OverflowTable.txt:md5,178164f81917b8e87073295a635588de" + ] + } + }, + "UMI_consensus1.fgumi.duplex_metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "6": [ + [ + { + "id": "UMI_consensus1", + "samplename": "HT1080_chr20", + "single_end": false, + "sample_type": "DNA", + "markdup": "bamsormadup", + "fgumi_aware": true, + "genome_data": { + "fasta": "ref.fa:md5,78724096432e1b2702881f0126c656f2", + "fai": "ref.fa.fai:md5,fdd09c27f9ecf4cc7e824fd8407b72b7", + "dict": "ref.dict:md5,759dea0d5194dec0cd4f3e02fadf6e83", + "snap": [ + "OverflowTable.txt:md5,178164f81917b8e87073295a635588de" + ] + } + }, + "UMI_consensus1.fgumi.filter.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam_bai": [ + [ + { + "id": "UMI_consensus1", + "samplename": "HT1080_chr20", + "single_end": false, + "sample_type": "DNA", + "markdup": "bamsormadup", + "fgumi_aware": true, + "genome_data": { + "fasta": "ref.fa:md5,78724096432e1b2702881f0126c656f2", + "fai": "ref.fa.fai:md5,fdd09c27f9ecf4cc7e824fd8407b72b7", + "dict": "ref.dict:md5,759dea0d5194dec0cd4f3e02fadf6e83", + "snap": [ + "OverflowTable.txt:md5,178164f81917b8e87073295a635588de" + ] + } + }, + "UMI_consensus1.fgumi.filter.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "UMI_consensus1.fgumi.filter.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "consensus_metrics": [ + [ + { + "id": "UMI_consensus1", + "samplename": "HT1080_chr20", + "single_end": false, + "sample_type": "DNA", + "markdup": "bamsormadup", + "fgumi_aware": true, + "genome_data": { + "fasta": "ref.fa:md5,78724096432e1b2702881f0126c656f2", + "fai": "ref.fa.fai:md5,fdd09c27f9ecf4cc7e824fd8407b72b7", + "dict": "ref.dict:md5,759dea0d5194dec0cd4f3e02fadf6e83", + "snap": [ + "OverflowTable.txt:md5,178164f81917b8e87073295a635588de" + ] + } + }, + "UMI_consensus1.fgumi.simplex.consensus_metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "duplex_metrics": [ + [ + { + "id": "UMI_consensus1", + "samplename": "HT1080_chr20", + "single_end": false, + "sample_type": "DNA", + "markdup": "bamsormadup", + "fgumi_aware": true, + "genome_data": { + "fasta": "ref.fa:md5,78724096432e1b2702881f0126c656f2", + "fai": "ref.fa.fai:md5,fdd09c27f9ecf4cc7e824fd8407b72b7", + "dict": "ref.dict:md5,759dea0d5194dec0cd4f3e02fadf6e83", + "snap": [ + "OverflowTable.txt:md5,178164f81917b8e87073295a635588de" + ] + } + }, + "UMI_consensus1.fgumi.duplex_metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "family_size_histogram": [ + [ + { + "id": "UMI_consensus1", + "samplename": "HT1080_chr20", + "single_end": false, + "sample_type": "DNA", + "markdup": "bamsormadup", + "fgumi_aware": true, + "genome_data": { + "fasta": "ref.fa:md5,78724096432e1b2702881f0126c656f2", + "fai": "ref.fa.fai:md5,fdd09c27f9ecf4cc7e824fd8407b72b7", + "dict": "ref.dict:md5,759dea0d5194dec0cd4f3e02fadf6e83", + "snap": [ + "OverflowTable.txt:md5,178164f81917b8e87073295a635588de" + ] + } + }, + "UMI_consensus1.fgumi.group.family_size_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "filtered_consensus_bam": [ + [ + { + "id": "UMI_consensus1", + "samplename": "HT1080_chr20", + "single_end": false, + "sample_type": "DNA", + "markdup": "bamsormadup", + "fgumi_aware": true, + "genome_data": { + "fasta": "ref.fa:md5,78724096432e1b2702881f0126c656f2", + "fai": "ref.fa.fai:md5,fdd09c27f9ecf4cc7e824fd8407b72b7", + "dict": "ref.dict:md5,759dea0d5194dec0cd4f3e02fadf6e83", + "snap": [ + "OverflowTable.txt:md5,178164f81917b8e87073295a635588de" + ] + } + }, + "UMI_consensus1.fgumi.filter.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "filtering_metrics": [ + [ + { + "id": "UMI_consensus1", + "samplename": "HT1080_chr20", + "single_end": false, + "sample_type": "DNA", + "markdup": "bamsormadup", + "fgumi_aware": true, + "genome_data": { + "fasta": "ref.fa:md5,78724096432e1b2702881f0126c656f2", + "fai": "ref.fa.fai:md5,fdd09c27f9ecf4cc7e824fd8407b72b7", + "dict": "ref.dict:md5,759dea0d5194dec0cd4f3e02fadf6e83", + "snap": [ + "OverflowTable.txt:md5,178164f81917b8e87073295a635588de" + ] + } + }, + "UMI_consensus1.fgumi.filter.filtering_metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "grouping_metrics": [ + [ + { + "id": "UMI_consensus1", + "samplename": "HT1080_chr20", + "single_end": false, + "sample_type": "DNA", + "markdup": "bamsormadup", + "fgumi_aware": true, + "genome_data": { + "fasta": "ref.fa:md5,78724096432e1b2702881f0126c656f2", + "fai": "ref.fa.fai:md5,fdd09c27f9ecf4cc7e824fd8407b72b7", + "dict": "ref.dict:md5,759dea0d5194dec0cd4f3e02fadf6e83", + "snap": [ + "OverflowTable.txt:md5,178164f81917b8e87073295a635588de" + ] + } + }, + "UMI_consensus1.fgumi.group.grouping_metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.4" + }, + "timestamp": "2026-04-08T10:16:16.364166786" + } +} \ No newline at end of file From 3b30b71f8eecf38616a277923bda68236469ff82 Mon Sep 17 00:00:00 2001 From: freerkvandijk Date: Wed, 8 Apr 2026 10:30:48 +0200 Subject: [PATCH 11/25] removed output channel --- .../local/fastq_to_aligned_cram/main.nf | 4 ---- .../fgumi_umi_stub.nf.test | 1 - .../fgumi_umi_stub.nf.test.snap | 24 +------------------ workflows/preprocessing.nf | 2 +- 4 files changed, 2 insertions(+), 29 deletions(-) diff --git a/subworkflows/local/fastq_to_aligned_cram/main.nf b/subworkflows/local/fastq_to_aligned_cram/main.nf index 07483426..0eeffa05 100644 --- a/subworkflows/local/fastq_to_aligned_cram/main.nf +++ b/subworkflows/local/fastq_to_aligned_cram/main.nf @@ -174,13 +174,9 @@ workflow FASTQ_TO_CRAM { .set { ch_cram_crai } ch_cram_crai.dump(tag: "FASTQ_TO_CRAM: cram and crai", pretty: true) - // Keep a dedicated channel for UMI-aware sample CRAM outputs. - ch_umi_cram_crai = ch_cram_crai.filter { meta, _cram, _crai -> meta.fgumi_aware == true } - emit: cram_crai = ch_cram_crai // UMI-specific output channels for downstream reporting and publishing. - umi_cram_crai = ch_umi_cram_crai filtered_consensus_bam = ch_filtered_consensus_bam rna_splice_junctions = FASTQ_ALIGN_RNA.out.splice_junctions rna_junctions = FASTQ_ALIGN_RNA.out.junctions diff --git a/tests/subworkflows/local/fastq_to_aligned_cram/fgumi_umi_stub.nf.test b/tests/subworkflows/local/fastq_to_aligned_cram/fgumi_umi_stub.nf.test index 8a724b0d..405fb1d7 100644 --- a/tests/subworkflows/local/fastq_to_aligned_cram/fgumi_umi_stub.nf.test +++ b/tests/subworkflows/local/fastq_to_aligned_cram/fgumi_umi_stub.nf.test @@ -49,7 +49,6 @@ nextflow_workflow { assert workflow.out.duplex_metrics.size() == 1 assert workflow.out.family_size_histogram.size() == 1 assert workflow.out.filtered_consensus_bam.size() == 1 - assert workflow.out.umi_cram_crai.size() == 1 assert snapshot( sanitizeOutput(workflow.out, unstableKeys:["cram_crai"]) ).match() diff --git a/tests/subworkflows/local/fastq_to_aligned_cram/fgumi_umi_stub.nf.test.snap b/tests/subworkflows/local/fastq_to_aligned_cram/fgumi_umi_stub.nf.test.snap index 491cd976..d699ff72 100644 --- a/tests/subworkflows/local/fastq_to_aligned_cram/fgumi_umi_stub.nf.test.snap +++ b/tests/subworkflows/local/fastq_to_aligned_cram/fgumi_umi_stub.nf.test.snap @@ -171,28 +171,6 @@ }, "UMI_consensus1.fgumi.simplex.consensus_metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" ] - ], - "umi_cram_crai": [ - [ - { - "id": "UMI_consensus1", - "samplename": "HT1080_chr20", - "single_end": false, - "sample_type": "DNA", - "markdup": "bamsormadup", - "fgumi_aware": true, - "genome_data": { - "fasta": "ref.fa:md5,78724096432e1b2702881f0126c656f2", - "fai": "ref.fa.fai:md5,fdd09c27f9ecf4cc7e824fd8407b72b7", - "dict": "ref.dict:md5,759dea0d5194dec0cd4f3e02fadf6e83", - "snap": [ - "OverflowTable.txt:md5,178164f81917b8e87073295a635588de" - ] - } - }, - "UMI_consensus1.cram:md5,d41d8cd98f00b204e9800998ecf8427e", - "UMI_consensus1.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e" - ] ] } ], @@ -200,6 +178,6 @@ "nf-test": "0.9.2", "nextflow": "25.10.4" }, - "timestamp": "2026-04-08T10:11:21.73767029" + "timestamp": "2026-04-08T10:29:40.567401171" } } \ No newline at end of file diff --git a/workflows/preprocessing.nf b/workflows/preprocessing.nf index 009317d9..6427b9ab 100644 --- a/workflows/preprocessing.nf +++ b/workflows/preprocessing.nf @@ -437,7 +437,7 @@ workflow PREPROCESSING { family_size_histogram = FASTQ_TO_CRAM.out.family_size_histogram umi_filtered_consensus_bam = FASTQ_TO_CRAM.out.filtered_consensus_bam umi_duplex_metrics = FASTQ_TO_CRAM.out.duplex_metrics - umi_crams = FASTQ_TO_CRAM.out.umi_cram_crai + umi_crams = FASTQ_TO_CRAM.out.cram_crai.filter { meta, _cram, _crai -> meta.fgumi_aware == true } mosdepth_global = COVERAGE.out.mosdepth_global mosdepth_summary = COVERAGE.out.mosdepth_summary mosdepth_regions = COVERAGE.out.mosdepth_regions From 289d9357154102814e3fda98c4b60976145ba50a Mon Sep 17 00:00:00 2001 From: freerkvandijk Date: Wed, 8 Apr 2026 10:36:30 +0200 Subject: [PATCH 12/25] Changed tests to use template.bam and removed grouped.bam --- tests/inputs/fgumi/grouped.bam | 1 - tests/modules/local/fgumi/duplexmetrics/main.nf.test | 2 +- tests/modules/local/fgumi/simplex/main.nf.test | 2 +- 3 files changed, 2 insertions(+), 3 deletions(-) delete mode 100644 tests/inputs/fgumi/grouped.bam diff --git a/tests/inputs/fgumi/grouped.bam b/tests/inputs/fgumi/grouped.bam deleted file mode 100644 index 48cdce85..00000000 --- a/tests/inputs/fgumi/grouped.bam +++ /dev/null @@ -1 +0,0 @@ -placeholder diff --git a/tests/modules/local/fgumi/duplexmetrics/main.nf.test b/tests/modules/local/fgumi/duplexmetrics/main.nf.test index 665fb839..2266eaa9 100644 --- a/tests/modules/local/fgumi/duplexmetrics/main.nf.test +++ b/tests/modules/local/fgumi/duplexmetrics/main.nf.test @@ -18,7 +18,7 @@ nextflow_process { """ input[0] = [ [id: "test"], - file("${projectDir}/tests/inputs/fgumi/grouped.bam", checkIfExists: true) + file("${projectDir}/tests/inputs/fgumi/template.bam", checkIfExists: true) ] """ } diff --git a/tests/modules/local/fgumi/simplex/main.nf.test b/tests/modules/local/fgumi/simplex/main.nf.test index 62b9fc5f..f30cbe76 100644 --- a/tests/modules/local/fgumi/simplex/main.nf.test +++ b/tests/modules/local/fgumi/simplex/main.nf.test @@ -18,7 +18,7 @@ nextflow_process { """ input[0] = [ [id: "test"], - file("${projectDir}/tests/inputs/fgumi/grouped.bam", checkIfExists: true) + file("${projectDir}/tests/inputs/fgumi/template.bam", checkIfExists: true) ] """ } From 53de497fbe23e07485f6eb569c2e2b6da774f672 Mon Sep 17 00:00:00 2001 From: freerkvandijk Date: Wed, 8 Apr 2026 10:41:17 +0200 Subject: [PATCH 13/25] Changed some defaul resource requirements. Needs benchmarking though --- conf/modules.config | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index dc512604..01f710f7 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -233,8 +233,8 @@ process { //// FGUMI extract (step 1) withName: '.*FASTQ_TO_CRAM:FGUMI_EXTRACT' { - cpus = 8 - memory = 32.GB + cpus = 4 + memory = 16.GB ext.prefix = { "${meta.id}.fgumi.unmapped" } ext.args = { [ @@ -329,8 +329,8 @@ process { //// FGUMI filter + coordinate sort/index (step 7) withName: '.*FASTQ_TO_CRAM:FGUMI_FILTER' { - cpus = 8 - memory = 32.GB + cpus = 4 + memory = 16.GB ext.prefix = { "${meta.id}.fgumi.filter" } ext.args = { [ From 53549c22bfaf844d18c7378626964ef9a9a6629a Mon Sep 17 00:00:00 2001 From: freerkvandijk Date: Wed, 8 Apr 2026 10:50:54 +0200 Subject: [PATCH 14/25] Added some comments + fixed fgumi simplex parameter bug --- conf/modules.config | 14 +++++++------- modules/local/fgumi/extract/main.nf | 1 + modules/local/fgumi/group/main.nf | 1 + subworkflows/local/fastq_to_aligned_cram/main.nf | 1 + subworkflows/local/umi_consensus/main.nf | 3 +++ 5 files changed, 13 insertions(+), 7 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 01f710f7..1c6dbd87 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -232,7 +232,7 @@ process { } //// FGUMI extract (step 1) - withName: '.*FASTQ_TO_CRAM:FGUMI_EXTRACT' { + withName: '.*FGUMI_EXTRACT' { cpus = 4 memory = 16.GB ext.prefix = { "${meta.id}.fgumi.unmapped" } @@ -249,7 +249,7 @@ process { } //// FGUMI fastq | SNAP | zipper | template sort (step 3) - withName: '.*FASTQ_TO_CRAM:FGUMI_SNAP_ZIPPER_SORT' { + withName: '.*FGUMI_SNAP_ZIPPER_SORT' { cpus = 16 memory = 64.GB ext.prefix = { "${meta.id}.fgumi" } @@ -293,7 +293,7 @@ process { } //// FGUMI group (step 4) - withName: '.*FASTQ_TO_CRAM:FGUMI_GROUP' { + withName: '.*FGUMI_GROUP' { cpus = 8 memory = 32.GB ext.prefix = { "${meta.id}.fgumi.group" } @@ -303,7 +303,7 @@ process { } //// FGUMI simplex (step 5) - withName: '.*FASTQ_TO_CRAM:FGUMI_SIMPLEX' { + withName: '.*FGUMI_SIMPLEX' { cpus = 8 memory = 32.GB ext.prefix = { "${meta.id}.fgumi.simplex" } @@ -320,7 +320,7 @@ process { } //// FGUMI duplex-metrics (step 6) - withName: '.*FASTQ_TO_CRAM:FGUMI_DUPLEX_METRICS' { + withName: '.*FGUMI_DUPLEX_METRICS' { cpus = 2 memory = 8.GB ext.prefix = { "${meta.id}.fgumi" } @@ -328,7 +328,7 @@ process { } //// FGUMI filter + coordinate sort/index (step 7) - withName: '.*FASTQ_TO_CRAM:FGUMI_FILTER' { + withName: '.*FGUMI_FILTER' { cpus = 4 memory = 16.GB ext.prefix = { "${meta.id}.fgumi.filter" } @@ -341,7 +341,7 @@ process { } //// FGUMI coordinate sort/index after filter (step 7) - withName: '.*FASTQ_TO_CRAM:FGUMI_SORT' { + withName: '.*FGUMI_SORT' { cpus = 8 memory = 32.GB ext.prefix = { "${meta.id}.fgumi.filter" } diff --git a/modules/local/fgumi/extract/main.nf b/modules/local/fgumi/extract/main.nf index 9b341def..996c198e 100644 --- a/modules/local/fgumi/extract/main.nf +++ b/modules/local/fgumi/extract/main.nf @@ -19,6 +19,7 @@ process FGUMI_EXTRACT { script: def args = task.ext.args ?: '' prefix = task.ext.prefix ?: "${meta.id}.fgumi.unmapped" + // Keep module portable: only meta.id is assumed, with optional task.ext overrides. def sample_name = task.ext.sample_name ?: meta.id def library_name = task.ext.library_name ?: meta.id diff --git a/modules/local/fgumi/group/main.nf b/modules/local/fgumi/group/main.nf index cecc9d5d..9460ff86 100644 --- a/modules/local/fgumi/group/main.nf +++ b/modules/local/fgumi/group/main.nf @@ -23,6 +23,7 @@ process FGUMI_GROUP { def strategy = task.ext.strategy ?: 'adjacency' def edits = task.ext.edits != null ? task.ext.edits : 1 def compression_level = task.ext.compression_level != null ? task.ext.compression_level : 1 + // Derive per-thread queue memory from requested process resources. def queue_memory_mb = (task.memory.mega / task.cpus * 0.75).intValue() prefix = task.ext.prefix ?: "${meta.id}.fgumi.group" diff --git a/subworkflows/local/fastq_to_aligned_cram/main.nf b/subworkflows/local/fastq_to_aligned_cram/main.nf index 0eeffa05..c4d353c9 100644 --- a/subworkflows/local/fastq_to_aligned_cram/main.nf +++ b/subworkflows/local/fastq_to_aligned_cram/main.nf @@ -45,6 +45,7 @@ workflow FASTQ_TO_CRAM { ch_meta_reads_aligner_index_fasta_datatype.dna .branch { meta, reads, aligner, index, fasta -> + // fgumi consensus is opt-in via fgumi_aware to avoid changing samtools umi_aware semantics. umi: meta.fgumi_aware == true return [meta, reads, aligner, index, fasta] non_umi: true diff --git a/subworkflows/local/umi_consensus/main.nf b/subworkflows/local/umi_consensus/main.nf index a29d1920..4cf602ab 100644 --- a/subworkflows/local/umi_consensus/main.nf +++ b/subworkflows/local/umi_consensus/main.nf @@ -17,11 +17,13 @@ workflow UMI_CONSENSUS_FGUMI { ch_meta_reads_aligner_index_fasta // channel: [mandatory] [meta, reads, aligner, index, fasta] main: + // Step 1: build an unmapped BAM with UMI tags from input FASTQ. FGUMI_EXTRACT( ch_meta_reads_aligner_index_fasta .map { meta, reads, _aligner, _index, _fasta -> [meta, reads] } ) + // Step 3: align with SNAP, zipper tags back, then template-coordinate sort. FGUMI_SNAP_ZIPPER_SORT( FGUMI_EXTRACT.out.bam .join( @@ -45,6 +47,7 @@ workflow UMI_CONSENSUS_FGUMI { FGUMI_GROUP.out.bam ) + // Step 7: filter consensus reads, then coordinate-sort/index for downstream CRAM conversion. FGUMI_FILTER( FGUMI_SIMPLEX.out.bam .join( From 8af63a61aa69f4aecc54150f103461af1d88b4fd Mon Sep 17 00:00:00 2001 From: freerkvandijk Date: Thu, 9 Apr 2026 09:43:03 +0200 Subject: [PATCH 15/25] Dropped params --- conf/modules.config | 7 +++---- nextflow.config | 4 ---- nextflow_schema.json | 20 -------------------- 3 files changed, 3 insertions(+), 28 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 1c6dbd87..9a0a6703 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -238,8 +238,8 @@ process { ext.prefix = { "${meta.id}.fgumi.unmapped" } ext.args = { [ - "--read-structures ${meta.fgumi_read_structures ?: params.fgumi_read_structures}", - ((meta.fgumi_extract_umis_from_read_names != null ? meta.fgumi_extract_umis_from_read_names : params.fgumi_extract_umis_from_read_names) ? "--extract-umis-from-read-names" : ""), + "--read-structures ${meta.fgumi_read_structures ?: '+T +T'}", + ((meta.fgumi_extract_umis_from_read_names != null ? meta.fgumi_extract_umis_from_read_names : true) ? "--extract-umis-from-read-names" : ""), "--threads ${task.cpus}", "--queue-memory ${params.fgumi_queue_memory}", params.fgumi_queue_memory_per_thread ? "--queue-memory-per-thread" : "", @@ -263,7 +263,6 @@ process { "-sa", "-xf 2", meta.readgroup ? "-R \"@RG\\t" + meta.readgroup.findResults { rg -> rg.value?.trim() ? "${rg.key}:${rg.value}" : null }.join("\\t") + "\"" : "", - "${params.fgumi_snap_extra_args}", ].join(" ").trim() } ext.args2 = { @@ -334,7 +333,7 @@ process { ext.prefix = { "${meta.id}.fgumi.filter" } ext.args = { [ - "--min-reads ${params.fgumi_filter_min_reads}", + "--min-reads 1,1,1", "--stats ${meta.id}.fgumi.filter.filtering_metrics.txt", ].join(" ").trim() } diff --git a/nextflow.config b/nextflow.config index 60eecb88..93cf3934 100644 --- a/nextflow.config +++ b/nextflow.config @@ -23,19 +23,15 @@ params { genelists = null // UMI consensus (fgumi) options - fgumi_read_structures = '+T +T' - fgumi_extract_umis_from_read_names = true fgumi_group_strategy = 'adjacency' fgumi_group_edits = 1 fgumi_simplex_min_reads = 1 - fgumi_filter_min_reads = '1,1,1' fgumi_queue_memory = 768 fgumi_queue_memory_per_thread= true fgumi_compression_level = 1 fgumi_sort_max_memory = '2G' fgumi_sort_memory_per_thread = true fgumi_snap_ignore_mismatched_pairs = true - fgumi_snap_extra_args = '' fgumi_duplex_metrics_extra_args = '' // MultiQC options diff --git a/nextflow_schema.json b/nextflow_schema.json index ad538cad..386430c1 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -73,16 +73,6 @@ "format": "directory-path", "description": "Directory containing gene list bed files for granular coverage analysis" }, - "fgumi_read_structures": { - "type": "string", - "default": "+T +T", - "description": "Read structures passed to fgumi extract for UMI extraction from FASTQ." - }, - "fgumi_extract_umis_from_read_names": { - "type": "boolean", - "default": true, - "description": "Enable fgumi extract --extract-umis-from-read-names for read name encoded UMIs." - }, "fgumi_group_strategy": { "type": "string", "default": "adjacency", @@ -101,11 +91,6 @@ "minimum": 1, "description": "Minimum number of reads required per UMI family for fgumi simplex consensus generation." }, - "fgumi_filter_min_reads": { - "type": "string", - "default": "1,1,1", - "description": "Minimum reads triplet passed to fgumi filter --min-reads." - }, "fgumi_queue_memory": { "type": "integer", "default": 768, @@ -140,11 +125,6 @@ "default": true, "description": "Pass -I to SNAP to ignore mismatched read IDs in paired-end input." }, - "fgumi_snap_extra_args": { - "type": "string", - "default": "", - "description": "Additional SNAP options for the fgumi fastq|snap|zipper pipeline." - }, "fgumi_duplex_metrics_extra_args": { "type": "string", "default": "", From 0240c8b560443e91bb2c0da0745f3c9a8cf27b99 Mon Sep 17 00:00:00 2001 From: freerkvandijk Date: Thu, 9 Apr 2026 09:49:51 +0200 Subject: [PATCH 16/25] Fixed ext.args --- conf/modules.config | 12 +++++++++--- modules/local/fgumi/extract/main.nf | 5 ----- modules/local/fgumi/group/main.nf | 6 ------ 3 files changed, 9 insertions(+), 14 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 9a0a6703..f3f9a91e 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -238,6 +238,8 @@ process { ext.prefix = { "${meta.id}.fgumi.unmapped" } ext.args = { [ + "--sample \"${meta.id}\"", + "--library \"${meta.library ?: meta.id}\"", "--read-structures ${meta.fgumi_read_structures ?: '+T +T'}", ((meta.fgumi_extract_umis_from_read_names != null ? meta.fgumi_extract_umis_from_read_names : true) ? "--extract-umis-from-read-names" : ""), "--threads ${task.cpus}", @@ -296,9 +298,13 @@ process { cpus = 8 memory = 32.GB ext.prefix = { "${meta.id}.fgumi.group" } - ext.strategy = { params.fgumi_group_strategy } - ext.edits = { params.fgumi_group_edits } - ext.compression_level = { params.fgumi_compression_level } + ext.args = { + [ + "--strategy ${params.fgumi_group_strategy}", + "--edits ${params.fgumi_group_edits}", + "--compression-level ${params.fgumi_compression_level}", + ].join(" ").trim() + } } //// FGUMI simplex (step 5) diff --git a/modules/local/fgumi/extract/main.nf b/modules/local/fgumi/extract/main.nf index 996c198e..ffabc4b4 100644 --- a/modules/local/fgumi/extract/main.nf +++ b/modules/local/fgumi/extract/main.nf @@ -19,16 +19,11 @@ process FGUMI_EXTRACT { script: def args = task.ext.args ?: '' prefix = task.ext.prefix ?: "${meta.id}.fgumi.unmapped" - // Keep module portable: only meta.id is assumed, with optional task.ext overrides. - def sample_name = task.ext.sample_name ?: meta.id - def library_name = task.ext.library_name ?: meta.id """ fgumi extract \ --inputs ${reads} \ --output ${prefix}.bam \ - --sample "${sample_name}" \ - --library "${library_name}" \ ${args} """ diff --git a/modules/local/fgumi/group/main.nf b/modules/local/fgumi/group/main.nf index 9460ff86..52040a12 100644 --- a/modules/local/fgumi/group/main.nf +++ b/modules/local/fgumi/group/main.nf @@ -20,9 +20,6 @@ process FGUMI_GROUP { script: def args = task.ext.args ?: '' - def strategy = task.ext.strategy ?: 'adjacency' - def edits = task.ext.edits != null ? task.ext.edits : 1 - def compression_level = task.ext.compression_level != null ? task.ext.compression_level : 1 // Derive per-thread queue memory from requested process resources. def queue_memory_mb = (task.memory.mega / task.cpus * 0.75).intValue() prefix = task.ext.prefix ?: "${meta.id}.fgumi.group" @@ -31,12 +28,9 @@ process FGUMI_GROUP { fgumi group \ --input ${bam} \ --output ${prefix}.bam \ - --strategy ${strategy} \ - --edits ${edits} \ --threads ${task.cpus} \ --queue-memory ${queue_memory_mb} \ --queue-memory-per-thread \ - --compression-level ${compression_level} \ --grouping-metrics ${prefix}.grouping_metrics.txt \ --family-size-histogram ${prefix}.family_size_histogram.txt \ ${args} From fde7ed8b2f3a551c70859e86331303c25fed195d Mon Sep 17 00:00:00 2001 From: freerkvandijk Date: Thu, 9 Apr 2026 10:08:47 +0200 Subject: [PATCH 17/25] Moved hard coded options --- conf/modules.config | 4 ++++ modules/local/fgumi/group/main.nf | 2 -- modules/local/fgumi/sort/main.nf | 2 -- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index f3f9a91e..d4afc36f 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -303,6 +303,8 @@ process { "--strategy ${params.fgumi_group_strategy}", "--edits ${params.fgumi_group_edits}", "--compression-level ${params.fgumi_compression_level}", + "--grouping-metrics ${meta.id}.fgumi.group.grouping_metrics.txt", + "--family-size-histogram ${meta.id}.fgumi.group.family_size_histogram.txt", ].join(" ").trim() } } @@ -352,6 +354,8 @@ process { ext.prefix = { "${meta.id}.fgumi.filter" } ext.args = { [ + "--order coordinate", + "--write-index", "--threads ${task.cpus}", "--max-memory ${params.fgumi_sort_max_memory}", "--memory-per-thread=${params.fgumi_sort_memory_per_thread ? 'true' : 'false'}", diff --git a/modules/local/fgumi/group/main.nf b/modules/local/fgumi/group/main.nf index 52040a12..d079db7f 100644 --- a/modules/local/fgumi/group/main.nf +++ b/modules/local/fgumi/group/main.nf @@ -31,8 +31,6 @@ process FGUMI_GROUP { --threads ${task.cpus} \ --queue-memory ${queue_memory_mb} \ --queue-memory-per-thread \ - --grouping-metrics ${prefix}.grouping_metrics.txt \ - --family-size-histogram ${prefix}.family_size_histogram.txt \ ${args} """ diff --git a/modules/local/fgumi/sort/main.nf b/modules/local/fgumi/sort/main.nf index a1b8de7c..abfbf633 100644 --- a/modules/local/fgumi/sort/main.nf +++ b/modules/local/fgumi/sort/main.nf @@ -25,8 +25,6 @@ process FGUMI_SORT { fgumi sort \ --input ${bam} \ --output ${prefix}.bam \ - --order coordinate \ - --write-index \ ${args} """ From a6691fbfdb5e3df024dbbd187286bc036be4550a Mon Sep 17 00:00:00 2001 From: freerkvandijk Date: Thu, 9 Apr 2026 10:22:06 +0200 Subject: [PATCH 18/25] Removed duplex metrics from pipeline --- conf/modules.config | 8 ---- main.nf | 6 --- modules/local/fgumi/duplexmetrics/main.nf | 35 --------------- nextflow.config | 1 - nextflow_schema.json | 5 --- .../local/fastq_to_aligned_cram/main.nf | 2 - subworkflows/local/umi_consensus/main.nf | 6 --- .../local/fgumi/duplexmetrics/main.nf.test | 33 -------------- .../fgumi/duplexmetrics/main.nf.test.snap | 43 ------------------ .../fgumi_umi_stub.nf.test | 1 - .../fgumi_umi_stub.nf.test.snap | 23 +--------- .../local/umi_consensus/main.nf.test | 1 - .../local/umi_consensus/main.nf.test.snap | 44 +------------------ workflows/preprocessing.nf | 2 - 14 files changed, 2 insertions(+), 208 deletions(-) delete mode 100644 modules/local/fgumi/duplexmetrics/main.nf delete mode 100644 tests/modules/local/fgumi/duplexmetrics/main.nf.test delete mode 100644 tests/modules/local/fgumi/duplexmetrics/main.nf.test.snap diff --git a/conf/modules.config b/conf/modules.config index d4afc36f..88b565c2 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -326,14 +326,6 @@ process { } } - //// FGUMI duplex-metrics (step 6) - withName: '.*FGUMI_DUPLEX_METRICS' { - cpus = 2 - memory = 8.GB - ext.prefix = { "${meta.id}.fgumi" } - ext.args = "${params.fgumi_duplex_metrics_extra_args}" - } - //// FGUMI filter + coordinate sort/index (step 7) withName: '.*FGUMI_FILTER' { cpus = 4 diff --git a/main.nf b/main.nf index e0c1f1fa..a5c6c7fa 100644 --- a/main.nf +++ b/main.nf @@ -82,7 +82,6 @@ workflow { // Additional UMI consensus outputs. family_size_histogram = PREPROCESSING.out.family_size_histogram umi_filtered_consensus_bam = PREPROCESSING.out.umi_filtered_consensus_bam - umi_duplex_metrics = PREPROCESSING.out.umi_duplex_metrics umi_crams = PREPROCESSING.out.umi_crams mosdepth_global = PREPROCESSING.out.mosdepth_global mosdepth_summary = PREPROCESSING.out.mosdepth_summary @@ -192,11 +191,6 @@ output { bam >> (meta.library ? "${meta.library}/${meta.samplename}/${bam.name}" : "${meta.samplename}/${bam.name}") } } - umi_duplex_metrics { - path { meta, _file -> - return (meta.library ? "${meta.library}/${meta.samplename}/" : "${meta.samplename}/") - } - } umi_crams { path { meta, cram, crai -> cram >> (meta.library ? "${meta.library}/${meta.samplename}/${meta.samplename}.umi.cram" : "${meta.samplename}/${meta.samplename}.umi.cram") diff --git a/modules/local/fgumi/duplexmetrics/main.nf b/modules/local/fgumi/duplexmetrics/main.nf deleted file mode 100644 index 14954864..00000000 --- a/modules/local/fgumi/duplexmetrics/main.nf +++ /dev/null @@ -1,35 +0,0 @@ -process FGUMI_DUPLEX_METRICS { - tag "$meta.id" - label 'process_low' - - container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container - ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/95/954170443a820787c9e02ef2135ebb8ec29c6b03633b0d61b5fafa98c59a1cce/data' - : 'community.wave.seqera.io/library/fgumi_r-base_r-ggplot2_r-scales:09c99070b82c1c28'}" - - input: - tuple val(meta), path(bam) - - output: - tuple val(meta), path("${prefix}.duplex_metrics*"), emit: duplex_metrics - tuple val("${task.process}"), val('fgumi'), eval("fgumi --version | sed 's/^fgumi //;q'"), topic: versions, emit: versions_fgumi - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - prefix = task.ext.prefix ?: "${meta.id}.fgumi" - - """ - fgumi duplex-metrics \ - --input ${bam} \ - --output ${prefix}.duplex_metrics \ - ${args} - """ - - stub: - prefix = task.ext.prefix ?: "${meta.id}.fgumi" - """ - touch ${prefix}.duplex_metrics.txt - """ -} diff --git a/nextflow.config b/nextflow.config index 93cf3934..3d067c34 100644 --- a/nextflow.config +++ b/nextflow.config @@ -32,7 +32,6 @@ params { fgumi_sort_max_memory = '2G' fgumi_sort_memory_per_thread = true fgumi_snap_ignore_mismatched_pairs = true - fgumi_duplex_metrics_extra_args = '' // MultiQC options multiqc_config = null diff --git a/nextflow_schema.json b/nextflow_schema.json index 386430c1..1bbeed1e 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -124,11 +124,6 @@ "type": "boolean", "default": true, "description": "Pass -I to SNAP to ignore mismatched read IDs in paired-end input." - }, - "fgumi_duplex_metrics_extra_args": { - "type": "string", - "default": "", - "description": "Additional fgumi duplex-metrics options." } } }, diff --git a/subworkflows/local/fastq_to_aligned_cram/main.nf b/subworkflows/local/fastq_to_aligned_cram/main.nf index c4d353c9..024256f8 100644 --- a/subworkflows/local/fastq_to_aligned_cram/main.nf +++ b/subworkflows/local/fastq_to_aligned_cram/main.nf @@ -124,7 +124,6 @@ workflow FASTQ_TO_CRAM { ch_sormadup_metrics = ch_sormadup_metrics.mix(UMI_CONSENSUS_FGUMI.out.family_size_histogram) ch_sormadup_metrics = ch_sormadup_metrics.mix(UMI_CONSENSUS_FGUMI.out.consensus_metrics) ch_sormadup_metrics = ch_sormadup_metrics.mix(UMI_CONSENSUS_FGUMI.out.filtering_metrics) - ch_duplex_metrics = UMI_CONSENSUS_FGUMI.out.duplex_metrics ch_family_size_histogram = UMI_CONSENSUS_FGUMI.out.family_size_histogram ch_filtered_consensus_bam = UMI_CONSENSUS_FGUMI.out.filtered_consensus_bam @@ -183,6 +182,5 @@ workflow FASTQ_TO_CRAM { rna_junctions = FASTQ_ALIGN_RNA.out.junctions sormadup_metrics = ch_sormadup_metrics family_size_histogram = ch_family_size_histogram - duplex_metrics = ch_duplex_metrics align_reports = FASTQ_ALIGN_DNA.out.reports } diff --git a/subworkflows/local/umi_consensus/main.nf b/subworkflows/local/umi_consensus/main.nf index 4cf602ab..476df3e2 100644 --- a/subworkflows/local/umi_consensus/main.nf +++ b/subworkflows/local/umi_consensus/main.nf @@ -1,7 +1,6 @@ #!/usr/bin/env nextflow // MODULES -include { FGUMI_DUPLEX_METRICS } from "../../../modules/local/fgumi/duplexmetrics/main.nf" include { FGUMI_EXTRACT } from "../../../modules/local/fgumi/extract/main.nf" include { FGUMI_FILTER } from "../../../modules/local/fgumi/filter/main.nf" include { FGUMI_GROUP } from "../../../modules/local/fgumi/group/main.nf" @@ -43,10 +42,6 @@ workflow UMI_CONSENSUS_FGUMI { FGUMI_GROUP.out.bam ) - FGUMI_DUPLEX_METRICS( - FGUMI_GROUP.out.bam - ) - // Step 7: filter consensus reads, then coordinate-sort/index for downstream CRAM conversion. FGUMI_FILTER( FGUMI_SIMPLEX.out.bam @@ -67,6 +62,5 @@ workflow UMI_CONSENSUS_FGUMI { family_size_histogram = FGUMI_GROUP.out.family_size_histogram consensus_metrics = FGUMI_SIMPLEX.out.consensus_metrics filtering_metrics = FGUMI_FILTER.out.filtering_metrics - duplex_metrics = FGUMI_DUPLEX_METRICS.out.duplex_metrics filtered_consensus_bam = FGUMI_SORT.out.bam } diff --git a/tests/modules/local/fgumi/duplexmetrics/main.nf.test b/tests/modules/local/fgumi/duplexmetrics/main.nf.test deleted file mode 100644 index 2266eaa9..00000000 --- a/tests/modules/local/fgumi/duplexmetrics/main.nf.test +++ /dev/null @@ -1,33 +0,0 @@ -nextflow_process { - - name "Test Process FGUMI_DUPLEX_METRICS" - script "modules/local/fgumi/duplexmetrics/main.nf" - process "FGUMI_DUPLEX_METRICS" - - tag "modules" - tag "modules/local" - tag "modules/local/fgumi" - tag "modules/local/fgumi/duplexmetrics" - - test("test - stub") { - // Stub-mode contract test: verifies duplex-metrics output wiring. - options "-stub" - - when { - process { - """ - input[0] = [ - [id: "test"], - file("${projectDir}/tests/inputs/fgumi/template.bam", checkIfExists: true) - ] - """ - } - } - - then { - // Keep assertions minimal and snapshot-driven, consistent with other module tests. - assert process.success - assert snapshot(process.out).match() - } - } -} diff --git a/tests/modules/local/fgumi/duplexmetrics/main.nf.test.snap b/tests/modules/local/fgumi/duplexmetrics/main.nf.test.snap deleted file mode 100644 index c2514d51..00000000 --- a/tests/modules/local/fgumi/duplexmetrics/main.nf.test.snap +++ /dev/null @@ -1,43 +0,0 @@ -{ - "test - stub": { - "content": [ - { - "0": [ - [ - { - "id": "test" - }, - "test.fgumi.duplex_metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "1": [ - [ - "FGUMI_DUPLEX_METRICS", - "fgumi", - "0.1.2" - ] - ], - "duplex_metrics": [ - [ - { - "id": "test" - }, - "test.fgumi.duplex_metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "versions_fgumi": [ - [ - "FGUMI_DUPLEX_METRICS", - "fgumi", - "0.1.2" - ] - ] - } - ], - "meta": { - "nf-test": "0.9.2", - "nextflow": "25.10.4" - }, - "timestamp": "2026-04-02T16:04:44.383973526" - } -} \ No newline at end of file diff --git a/tests/subworkflows/local/fastq_to_aligned_cram/fgumi_umi_stub.nf.test b/tests/subworkflows/local/fastq_to_aligned_cram/fgumi_umi_stub.nf.test index 405fb1d7..2c553192 100644 --- a/tests/subworkflows/local/fastq_to_aligned_cram/fgumi_umi_stub.nf.test +++ b/tests/subworkflows/local/fastq_to_aligned_cram/fgumi_umi_stub.nf.test @@ -46,7 +46,6 @@ nextflow_workflow { then { assert workflow.success // Explicitly assert newly exposed UMI channels. - assert workflow.out.duplex_metrics.size() == 1 assert workflow.out.family_size_histogram.size() == 1 assert workflow.out.filtered_consensus_bam.size() == 1 assert snapshot( diff --git a/tests/subworkflows/local/fastq_to_aligned_cram/fgumi_umi_stub.nf.test.snap b/tests/subworkflows/local/fastq_to_aligned_cram/fgumi_umi_stub.nf.test.snap index d699ff72..55035830 100644 --- a/tests/subworkflows/local/fastq_to_aligned_cram/fgumi_umi_stub.nf.test.snap +++ b/tests/subworkflows/local/fastq_to_aligned_cram/fgumi_umi_stub.nf.test.snap @@ -25,27 +25,6 @@ "UMI_consensus1.cram.crai" ] ], - "duplex_metrics": [ - [ - { - "id": "UMI_consensus1", - "samplename": "HT1080_chr20", - "single_end": false, - "sample_type": "DNA", - "markdup": "bamsormadup", - "fgumi_aware": true, - "genome_data": { - "fasta": "ref.fa:md5,78724096432e1b2702881f0126c656f2", - "fai": "ref.fa.fai:md5,fdd09c27f9ecf4cc7e824fd8407b72b7", - "dict": "ref.dict:md5,759dea0d5194dec0cd4f3e02fadf6e83", - "snap": [ - "OverflowTable.txt:md5,178164f81917b8e87073295a635588de" - ] - } - }, - "UMI_consensus1.fgumi.duplex_metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], "family_size_histogram": [ [ { @@ -178,6 +157,6 @@ "nf-test": "0.9.2", "nextflow": "25.10.4" }, - "timestamp": "2026-04-08T10:29:40.567401171" + "timestamp": "2026-04-09T10:20:21.479979029" } } \ No newline at end of file diff --git a/tests/subworkflows/local/umi_consensus/main.nf.test b/tests/subworkflows/local/umi_consensus/main.nf.test index a365bb28..0ca457d6 100644 --- a/tests/subworkflows/local/umi_consensus/main.nf.test +++ b/tests/subworkflows/local/umi_consensus/main.nf.test @@ -48,7 +48,6 @@ nextflow_workflow { assert workflow.out.family_size_histogram.size() == 1 assert workflow.out.consensus_metrics.size() == 1 assert workflow.out.filtering_metrics.size() == 1 - assert workflow.out.duplex_metrics.size() == 1 assert workflow.out.filtered_consensus_bam.size() == 1 assert snapshot(workflow.out).match() } diff --git a/tests/subworkflows/local/umi_consensus/main.nf.test.snap b/tests/subworkflows/local/umi_consensus/main.nf.test.snap index 9fc3df12..9dbc33d6 100644 --- a/tests/subworkflows/local/umi_consensus/main.nf.test.snap +++ b/tests/subworkflows/local/umi_consensus/main.nf.test.snap @@ -109,27 +109,6 @@ ] ], "5": [ - [ - { - "id": "UMI_consensus1", - "samplename": "HT1080_chr20", - "single_end": false, - "sample_type": "DNA", - "markdup": "bamsormadup", - "fgumi_aware": true, - "genome_data": { - "fasta": "ref.fa:md5,78724096432e1b2702881f0126c656f2", - "fai": "ref.fa.fai:md5,fdd09c27f9ecf4cc7e824fd8407b72b7", - "dict": "ref.dict:md5,759dea0d5194dec0cd4f3e02fadf6e83", - "snap": [ - "OverflowTable.txt:md5,178164f81917b8e87073295a635588de" - ] - } - }, - "UMI_consensus1.fgumi.duplex_metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "6": [ [ { "id": "UMI_consensus1", @@ -193,27 +172,6 @@ "UMI_consensus1.fgumi.simplex.consensus_metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "duplex_metrics": [ - [ - { - "id": "UMI_consensus1", - "samplename": "HT1080_chr20", - "single_end": false, - "sample_type": "DNA", - "markdup": "bamsormadup", - "fgumi_aware": true, - "genome_data": { - "fasta": "ref.fa:md5,78724096432e1b2702881f0126c656f2", - "fai": "ref.fa.fai:md5,fdd09c27f9ecf4cc7e824fd8407b72b7", - "dict": "ref.dict:md5,759dea0d5194dec0cd4f3e02fadf6e83", - "snap": [ - "OverflowTable.txt:md5,178164f81917b8e87073295a635588de" - ] - } - }, - "UMI_consensus1.fgumi.duplex_metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], "family_size_histogram": [ [ { @@ -304,6 +262,6 @@ "nf-test": "0.9.2", "nextflow": "25.10.4" }, - "timestamp": "2026-04-08T10:16:16.364166786" + "timestamp": "2026-04-09T10:19:55.500577314" } } \ No newline at end of file diff --git a/workflows/preprocessing.nf b/workflows/preprocessing.nf index 6427b9ab..d4a3c3d2 100644 --- a/workflows/preprocessing.nf +++ b/workflows/preprocessing.nf @@ -271,7 +271,6 @@ workflow PREPROCESSING { ) // Collect both standard and UMI-specific metrics for MultiQC. ch_multiqc_files = ch_multiqc_files.mix(FASTQ_TO_CRAM.out.sormadup_metrics) - ch_multiqc_files = ch_multiqc_files.mix(FASTQ_TO_CRAM.out.duplex_metrics) ch_multiqc_files = ch_multiqc_files.mix(FASTQ_TO_CRAM.out.family_size_histogram) /* @@ -436,7 +435,6 @@ workflow PREPROCESSING { // UMI-specific outputs exposed at workflow level. family_size_histogram = FASTQ_TO_CRAM.out.family_size_histogram umi_filtered_consensus_bam = FASTQ_TO_CRAM.out.filtered_consensus_bam - umi_duplex_metrics = FASTQ_TO_CRAM.out.duplex_metrics umi_crams = FASTQ_TO_CRAM.out.cram_crai.filter { meta, _cram, _crai -> meta.fgumi_aware == true } mosdepth_global = COVERAGE.out.mosdepth_global mosdepth_summary = COVERAGE.out.mosdepth_summary From 1cd4b3b47a3b9ffe852287db8fdbdc988786855f Mon Sep 17 00:00:00 2001 From: freerkvandijk Date: Thu, 9 Apr 2026 10:32:05 +0200 Subject: [PATCH 19/25] Removed output redundancy --- main.nf | 7 ------- tests/modules/local/fgumi/sort/main.nf.test | 1 + workflows/preprocessing.nf | 1 - 3 files changed, 1 insertion(+), 8 deletions(-) diff --git a/main.nf b/main.nf index a5c6c7fa..c866d9e4 100644 --- a/main.nf +++ b/main.nf @@ -82,7 +82,6 @@ workflow { // Additional UMI consensus outputs. family_size_histogram = PREPROCESSING.out.family_size_histogram umi_filtered_consensus_bam = PREPROCESSING.out.umi_filtered_consensus_bam - umi_crams = PREPROCESSING.out.umi_crams mosdepth_global = PREPROCESSING.out.mosdepth_global mosdepth_summary = PREPROCESSING.out.mosdepth_summary mosdepth_regions = PREPROCESSING.out.mosdepth_regions @@ -191,12 +190,6 @@ output { bam >> (meta.library ? "${meta.library}/${meta.samplename}/${bam.name}" : "${meta.samplename}/${bam.name}") } } - umi_crams { - path { meta, cram, crai -> - cram >> (meta.library ? "${meta.library}/${meta.samplename}/${meta.samplename}.umi.cram" : "${meta.samplename}/${meta.samplename}.umi.cram") - crai >> (meta.library ? "${meta.library}/${meta.samplename}/${meta.samplename}.umi.cram.crai" : "${meta.samplename}/${meta.samplename}.umi.cram.crai") - } - } mosdepth_global { path { meta, _file -> return (meta.library ? "${meta.library}/${meta.samplename}/" : "${meta.samplename}/") diff --git a/tests/modules/local/fgumi/sort/main.nf.test b/tests/modules/local/fgumi/sort/main.nf.test index 7879f414..045b4bbb 100644 --- a/tests/modules/local/fgumi/sort/main.nf.test +++ b/tests/modules/local/fgumi/sort/main.nf.test @@ -29,4 +29,5 @@ nextflow_process { assert snapshot(process.out).match() } } + } diff --git a/workflows/preprocessing.nf b/workflows/preprocessing.nf index d4a3c3d2..17f679ec 100644 --- a/workflows/preprocessing.nf +++ b/workflows/preprocessing.nf @@ -435,7 +435,6 @@ workflow PREPROCESSING { // UMI-specific outputs exposed at workflow level. family_size_histogram = FASTQ_TO_CRAM.out.family_size_histogram umi_filtered_consensus_bam = FASTQ_TO_CRAM.out.filtered_consensus_bam - umi_crams = FASTQ_TO_CRAM.out.cram_crai.filter { meta, _cram, _crai -> meta.fgumi_aware == true } mosdepth_global = COVERAGE.out.mosdepth_global mosdepth_summary = COVERAGE.out.mosdepth_summary mosdepth_regions = COVERAGE.out.mosdepth_regions From 249df563df922b445d66ecdff9da7f6c2f3b9a92 Mon Sep 17 00:00:00 2001 From: freerkvandijk Date: Thu, 9 Apr 2026 11:43:21 +0200 Subject: [PATCH 20/25] Removed test BAMs + added paths to github repo test BAMs --- tests/inputs/fgumi/consensus.bam | 1 - tests/inputs/fgumi/template.bam | 1 - tests/inputs/fgumi/unmapped.bam | 1 - tests/modules/local/fgumi/extract/main.nf.test | 4 ++-- tests/modules/local/fgumi/filter/main.nf.test | 2 +- tests/modules/local/fgumi/group/main.nf.test | 2 +- tests/modules/local/fgumi/simplex/main.nf.test | 2 +- tests/modules/local/fgumi/snapzippersort/main.nf.test | 2 +- tests/modules/local/fgumi/sort/main.nf.test | 2 +- 9 files changed, 7 insertions(+), 10 deletions(-) delete mode 100644 tests/inputs/fgumi/consensus.bam delete mode 100644 tests/inputs/fgumi/template.bam delete mode 100644 tests/inputs/fgumi/unmapped.bam diff --git a/tests/inputs/fgumi/consensus.bam b/tests/inputs/fgumi/consensus.bam deleted file mode 100644 index 48cdce85..00000000 --- a/tests/inputs/fgumi/consensus.bam +++ /dev/null @@ -1 +0,0 @@ -placeholder diff --git a/tests/inputs/fgumi/template.bam b/tests/inputs/fgumi/template.bam deleted file mode 100644 index 48cdce85..00000000 --- a/tests/inputs/fgumi/template.bam +++ /dev/null @@ -1 +0,0 @@ -placeholder diff --git a/tests/inputs/fgumi/unmapped.bam b/tests/inputs/fgumi/unmapped.bam deleted file mode 100644 index 48cdce85..00000000 --- a/tests/inputs/fgumi/unmapped.bam +++ /dev/null @@ -1 +0,0 @@ -placeholder diff --git a/tests/modules/local/fgumi/extract/main.nf.test b/tests/modules/local/fgumi/extract/main.nf.test index ad1088a5..a92c75ce 100644 --- a/tests/modules/local/fgumi/extract/main.nf.test +++ b/tests/modules/local/fgumi/extract/main.nf.test @@ -19,8 +19,8 @@ nextflow_process { input[0] = [ [id: "test", samplename: "test", library: "lib1"], [ - file("${projectDir}/tests/inputs/fgumi/R1.fastq.gz", checkIfExists: true), - file("${projectDir}/tests/inputs/fgumi/R2.fastq.gz", checkIfExists: true) + file("https://github.com/nf-cmgg/test-datasets/raw/main/data/genomics/homo_sapiens/illumina/fastq/HT1080-chr20_S16_R1_001.fastq.gz", checkIfExists: true), + file("https://github.com/nf-cmgg/test-datasets/raw/main/data/genomics/homo_sapiens/illumina/fastq/HT1080-chr20_S16_R2_001.fastq.gz", checkIfExists: true) ] ] """ diff --git a/tests/modules/local/fgumi/filter/main.nf.test b/tests/modules/local/fgumi/filter/main.nf.test index bec1f2b3..1c7f7c69 100644 --- a/tests/modules/local/fgumi/filter/main.nf.test +++ b/tests/modules/local/fgumi/filter/main.nf.test @@ -18,7 +18,7 @@ nextflow_process { """ input[0] = [ [id: "test"], - file("${projectDir}/tests/inputs/fgumi/consensus.bam", checkIfExists: true), + file("https://github.com/nf-cmgg/test-datasets/raw/main/data/genomics/homo_sapiens/illumina/bam/consensus.bam", checkIfExists: true), file("${projectDir}/tests/inputs/fgumi/ref.fa", checkIfExists: true) ] """ diff --git a/tests/modules/local/fgumi/group/main.nf.test b/tests/modules/local/fgumi/group/main.nf.test index 8fb9b79e..cd54ae2f 100644 --- a/tests/modules/local/fgumi/group/main.nf.test +++ b/tests/modules/local/fgumi/group/main.nf.test @@ -18,7 +18,7 @@ nextflow_process { """ input[0] = [ [id: "test"], - file("${projectDir}/tests/inputs/fgumi/template.bam", checkIfExists: true) + file("https://github.com/nf-cmgg/test-datasets/raw/main/data/genomics/homo_sapiens/illumina/bam/template.bam", checkIfExists: true) ] """ } diff --git a/tests/modules/local/fgumi/simplex/main.nf.test b/tests/modules/local/fgumi/simplex/main.nf.test index f30cbe76..9ff5c017 100644 --- a/tests/modules/local/fgumi/simplex/main.nf.test +++ b/tests/modules/local/fgumi/simplex/main.nf.test @@ -18,7 +18,7 @@ nextflow_process { """ input[0] = [ [id: "test"], - file("${projectDir}/tests/inputs/fgumi/template.bam", checkIfExists: true) + file("https://github.com/nf-cmgg/test-datasets/raw/main/data/genomics/homo_sapiens/illumina/bam/grouped.bam", checkIfExists: true) ] """ } diff --git a/tests/modules/local/fgumi/snapzippersort/main.nf.test b/tests/modules/local/fgumi/snapzippersort/main.nf.test index e27f388e..88ca0500 100644 --- a/tests/modules/local/fgumi/snapzippersort/main.nf.test +++ b/tests/modules/local/fgumi/snapzippersort/main.nf.test @@ -18,7 +18,7 @@ nextflow_process { """ input[0] = [ [id: "test", samplename: "test"], - file("${projectDir}/tests/inputs/fgumi/unmapped.bam", checkIfExists: true), + file("https://github.com/nf-cmgg/test-datasets/raw/main/data/genomics/homo_sapiens/illumina/bam/unmapped.bam", checkIfExists: true), file("${projectDir}/tests/inputs/fgumi/snap_index", checkIfExists: true), file("${projectDir}/tests/inputs/fgumi/ref.fa", checkIfExists: true), file("${projectDir}/tests/inputs/fgumi/ref.dict", checkIfExists: true) diff --git a/tests/modules/local/fgumi/sort/main.nf.test b/tests/modules/local/fgumi/sort/main.nf.test index 045b4bbb..380727d0 100644 --- a/tests/modules/local/fgumi/sort/main.nf.test +++ b/tests/modules/local/fgumi/sort/main.nf.test @@ -18,7 +18,7 @@ nextflow_process { """ input[0] = [ [id: "test"], - file("${projectDir}/tests/inputs/fgumi/consensus.bam", checkIfExists: true) + file("https://github.com/nf-cmgg/test-datasets/raw/main/data/genomics/homo_sapiens/illumina/bam/consensus.bam", checkIfExists: true) ] """ } From 56cf6cc46686014353b9338694d7352a30f09bc7 Mon Sep 17 00:00:00 2001 From: freerkvandijk Date: Fri, 10 Apr 2026 15:07:59 +0200 Subject: [PATCH 21/25] Updated filepaths in tests --- tests/inputs/fgumi/R1.fastq.gz | 4 ---- tests/inputs/fgumi/R2.fastq.gz | 4 ---- tests/inputs/fgumi/ref.dict | 2 -- tests/inputs/fgumi/ref.fa | 2 -- tests/inputs/fgumi/ref.fa.fai | 1 - tests/inputs/test.yml | 8 ++++---- tests/modules/local/fgumi/extract/main.nf.test | 4 ++-- tests/modules/local/fgumi/filter/main.nf.test | 2 +- tests/modules/local/fgumi/snapzippersort/main.nf.test | 4 ++-- 9 files changed, 9 insertions(+), 22 deletions(-) delete mode 100644 tests/inputs/fgumi/R1.fastq.gz delete mode 100644 tests/inputs/fgumi/R2.fastq.gz delete mode 100644 tests/inputs/fgumi/ref.dict delete mode 100644 tests/inputs/fgumi/ref.fa delete mode 100644 tests/inputs/fgumi/ref.fa.fai diff --git a/tests/inputs/fgumi/R1.fastq.gz b/tests/inputs/fgumi/R1.fastq.gz deleted file mode 100644 index 3b41ea25..00000000 --- a/tests/inputs/fgumi/R1.fastq.gz +++ /dev/null @@ -1,4 +0,0 @@ -@r1 -ACGT -+ -!!!! diff --git a/tests/inputs/fgumi/R2.fastq.gz b/tests/inputs/fgumi/R2.fastq.gz deleted file mode 100644 index c5b0a999..00000000 --- a/tests/inputs/fgumi/R2.fastq.gz +++ /dev/null @@ -1,4 +0,0 @@ -@r2 -TGCA -+ -!!!! diff --git a/tests/inputs/fgumi/ref.dict b/tests/inputs/fgumi/ref.dict deleted file mode 100644 index c7a324e0..00000000 --- a/tests/inputs/fgumi/ref.dict +++ /dev/null @@ -1,2 +0,0 @@ -@HD VN:1.6 SO:unsorted -@SQ SN:chr1 LN:12 diff --git a/tests/inputs/fgumi/ref.fa b/tests/inputs/fgumi/ref.fa deleted file mode 100644 index 41884804..00000000 --- a/tests/inputs/fgumi/ref.fa +++ /dev/null @@ -1,2 +0,0 @@ ->chr1 -ACGTACGTACGT diff --git a/tests/inputs/fgumi/ref.fa.fai b/tests/inputs/fgumi/ref.fa.fai deleted file mode 100644 index 795162b9..00000000 --- a/tests/inputs/fgumi/ref.fa.fai +++ /dev/null @@ -1 +0,0 @@ -chr1 12 6 12 13 diff --git a/tests/inputs/test.yml b/tests/inputs/test.yml index 051ac1ca..c09566ea 100644 --- a/tests/inputs/test.yml +++ b/tests/inputs/test.yml @@ -51,8 +51,8 @@ fastq_2: https://github.com/nf-cmgg/test-datasets/raw/main/data/genomics/homo_sapiens/illumina/fastq/test_R2.fastq.gz # UMI consensus (fgumi) inputs # Example DNA sample with fgumi_aware enabled for fgumi processing. -- id: UMI_consensus1 - samplename: HT1080-chr20 +- id: sample1 + samplename: sample1-chr21 library: test_library organism: Homo sapiens tag: WES @@ -61,6 +61,6 @@ markdup: bamsormadup fgumi_aware: true run_coverage: true - fastq_1: https://github.com/nf-cmgg/test-datasets/raw/main/data/genomics/homo_sapiens/illumina/fastq/HT1080-chr20_S16_R1_001.fastq.gz - fastq_2: https://github.com/nf-cmgg/test-datasets/raw/main/data/genomics/homo_sapiens/illumina/fastq/HT1080-chr20_S16_R2_001.fastq.gz + fastq_1: https://github.com/nf-cmgg/test-datasets/raw/main/data/genomics/homo_sapiens/illumina/fastq/sample1_S31_R1_001.fastq.gz + fastq_2: https://github.com/nf-cmgg/test-datasets/raw/main/data/genomics/homo_sapiens/illumina/fastq/sample1_S31_R2_001.fastq.gz diff --git a/tests/modules/local/fgumi/extract/main.nf.test b/tests/modules/local/fgumi/extract/main.nf.test index a92c75ce..40cf1af8 100644 --- a/tests/modules/local/fgumi/extract/main.nf.test +++ b/tests/modules/local/fgumi/extract/main.nf.test @@ -19,8 +19,8 @@ nextflow_process { input[0] = [ [id: "test", samplename: "test", library: "lib1"], [ - file("https://github.com/nf-cmgg/test-datasets/raw/main/data/genomics/homo_sapiens/illumina/fastq/HT1080-chr20_S16_R1_001.fastq.gz", checkIfExists: true), - file("https://github.com/nf-cmgg/test-datasets/raw/main/data/genomics/homo_sapiens/illumina/fastq/HT1080-chr20_S16_R2_001.fastq.gz", checkIfExists: true) + file("https://github.com/nf-cmgg/test-datasets/raw/main/data/genomics/homo_sapiens/illumina/fastq/sample1_S31_R1_001.fastq.gz", checkIfExists: true), + file("https://github.com/nf-cmgg/test-datasets/raw/main/data/genomics/homo_sapiens/illumina/fastq/sample1_S31_R2_001.fastq.gz", checkIfExists: true) ] ] """ diff --git a/tests/modules/local/fgumi/filter/main.nf.test b/tests/modules/local/fgumi/filter/main.nf.test index 1c7f7c69..51c6f8c8 100644 --- a/tests/modules/local/fgumi/filter/main.nf.test +++ b/tests/modules/local/fgumi/filter/main.nf.test @@ -19,7 +19,7 @@ nextflow_process { input[0] = [ [id: "test"], file("https://github.com/nf-cmgg/test-datasets/raw/main/data/genomics/homo_sapiens/illumina/bam/consensus.bam", checkIfExists: true), - file("${projectDir}/tests/inputs/fgumi/ref.fa", checkIfExists: true) + file("https://github.com/nf-cmgg/test-datasets/raw/preprocessing/data/genomics/homo_sapiens/genome/seq/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set_chr21.fna", checkIfExists: true) ] """ } diff --git a/tests/modules/local/fgumi/snapzippersort/main.nf.test b/tests/modules/local/fgumi/snapzippersort/main.nf.test index 88ca0500..c3a34cbe 100644 --- a/tests/modules/local/fgumi/snapzippersort/main.nf.test +++ b/tests/modules/local/fgumi/snapzippersort/main.nf.test @@ -20,8 +20,8 @@ nextflow_process { [id: "test", samplename: "test"], file("https://github.com/nf-cmgg/test-datasets/raw/main/data/genomics/homo_sapiens/illumina/bam/unmapped.bam", checkIfExists: true), file("${projectDir}/tests/inputs/fgumi/snap_index", checkIfExists: true), - file("${projectDir}/tests/inputs/fgumi/ref.fa", checkIfExists: true), - file("${projectDir}/tests/inputs/fgumi/ref.dict", checkIfExists: true) + file("https://github.com/nf-cmgg/test-datasets/raw/preprocessing/data/genomics/homo_sapiens/genome/seq/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set_chr21.fna", checkIfExists: true), + file("https://github.com/nf-cmgg/test-datasets/raw/preprocessing/data/genomics/homo_sapiens/genome/seq/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set_chr21.dict", checkIfExists: true) ] """ } From 092a3820c29c690c362d587b129402fb9979682a Mon Sep 17 00:00:00 2001 From: freerkvandijk Date: Fri, 10 Apr 2026 17:24:48 +0200 Subject: [PATCH 22/25] removed thread and mem args from config --- conf/modules.config | 3 --- modules/local/fgumi/extract/main.nf | 5 +++++ 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 88b565c2..03f7b4d1 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -242,9 +242,6 @@ process { "--library \"${meta.library ?: meta.id}\"", "--read-structures ${meta.fgumi_read_structures ?: '+T +T'}", ((meta.fgumi_extract_umis_from_read_names != null ? meta.fgumi_extract_umis_from_read_names : true) ? "--extract-umis-from-read-names" : ""), - "--threads ${task.cpus}", - "--queue-memory ${params.fgumi_queue_memory}", - params.fgumi_queue_memory_per_thread ? "--queue-memory-per-thread" : "", "--compression-level ${params.fgumi_compression_level}", ].join(" ").trim() } diff --git a/modules/local/fgumi/extract/main.nf b/modules/local/fgumi/extract/main.nf index ffabc4b4..89002f13 100644 --- a/modules/local/fgumi/extract/main.nf +++ b/modules/local/fgumi/extract/main.nf @@ -18,12 +18,17 @@ process FGUMI_EXTRACT { script: def args = task.ext.args ?: '' + // Derive per-thread queue memory from requested process resources. + def queue_memory_mb = (task.memory.mega / task.cpus * 0.75).intValue() prefix = task.ext.prefix ?: "${meta.id}.fgumi.unmapped" """ fgumi extract \ --inputs ${reads} \ --output ${prefix}.bam \ + --threads ${task.cpus} \ + --queue-memory ${queue_memory_mb} \ + --queue-memory-per-thread \ ${args} """ From c1d00d54c1ce1ce9ac08a2916129559b4022a553 Mon Sep 17 00:00:00 2001 From: freerkvandijk Date: Fri, 10 Apr 2026 17:28:27 +0200 Subject: [PATCH 23/25] params to meta --- conf/modules.config | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 03f7b4d1..257e42b9 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -297,9 +297,9 @@ process { ext.prefix = { "${meta.id}.fgumi.group" } ext.args = { [ - "--strategy ${params.fgumi_group_strategy}", - "--edits ${params.fgumi_group_edits}", - "--compression-level ${params.fgumi_compression_level}", + "--strategy ${meta.fgumi_group_strategy ?: 'adjacency'}", + "--edits ${meta.fgumi_group_edits != null ? meta.fgumi_group_edits : 1}", + "--compression-level ${meta.fgumi_compression_level != null ? meta.fgumi_compression_level : 1}", "--grouping-metrics ${meta.id}.fgumi.group.grouping_metrics.txt", "--family-size-histogram ${meta.id}.fgumi.group.family_size_histogram.txt", ].join(" ").trim() From b6091f0fd9e44cec55242ff0652a231531284343 Mon Sep 17 00:00:00 2001 From: freerkvandijk Date: Fri, 10 Apr 2026 17:35:38 +0200 Subject: [PATCH 24/25] Fixed paths in test --- .../fastq_to_aligned_cram/fgumi_umi_stub.nf.test | 16 ++++++++-------- .../local/umi_consensus/main.nf.test | 16 ++++++++-------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/tests/subworkflows/local/fastq_to_aligned_cram/fgumi_umi_stub.nf.test b/tests/subworkflows/local/fastq_to_aligned_cram/fgumi_umi_stub.nf.test index 2c553192..c03f41e3 100644 --- a/tests/subworkflows/local/fastq_to_aligned_cram/fgumi_umi_stub.nf.test +++ b/tests/subworkflows/local/fastq_to_aligned_cram/fgumi_umi_stub.nf.test @@ -24,19 +24,19 @@ nextflow_workflow { markdup: "bamsormadup", fgumi_aware: true, genome_data: [ - fasta: "${projectDir}/tests/inputs/fgumi/ref.fa", - fai: "${projectDir}/tests/inputs/fgumi/ref.fa.fai", - dict: "${projectDir}/tests/inputs/fgumi/ref.dict", - snap: "${projectDir}/tests/inputs/fgumi/snap_index" + fasta: "s3://test-data/genomics/homo_sapiens/genome/seq/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set_chr21.fna", + fai: "s3://test-data/genomics/homo_sapiens/genome/seq/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set_chr21.fna.fai", + dict: "s3://test-data/genomics/homo_sapiens/genome/seq/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set_chr21.dict", + snap: "s3://reference-data/genomes/Hsapiens/GRCh38/snapaligner/" ] ], [ - file("${projectDir}/tests/inputs/fgumi/R1.fastq.gz", checkIfExists: true), - file("${projectDir}/tests/inputs/fgumi/R2.fastq.gz", checkIfExists: true) + file("https://github.com/nf-cmgg/test-datasets/raw/preprocessing/data/genomics/homo_sapiens/illumina/fastq/sample1_R1.fastq.gz", checkIfExists: true), + file("https://github.com/nf-cmgg/test-datasets/raw/preprocessing/data/genomics/homo_sapiens/illumina/fastq/sample1_R2.fastq.gz", checkIfExists: true) ], "snap", - file("${projectDir}/tests/inputs/fgumi/snap_index", checkIfExists: true), - file("${projectDir}/tests/inputs/fgumi/ref.fa", checkIfExists: true), + file("s3://reference-data/genomes/Hsapiens/GRCh38/snapaligner/", checkIfExists: true), + file("s3://test-data/genomics/homo_sapiens/genome/seq/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set_chr21.fna", checkIfExists: true), [] ]) """ diff --git a/tests/subworkflows/local/umi_consensus/main.nf.test b/tests/subworkflows/local/umi_consensus/main.nf.test index 0ca457d6..7135f5db 100644 --- a/tests/subworkflows/local/umi_consensus/main.nf.test +++ b/tests/subworkflows/local/umi_consensus/main.nf.test @@ -23,19 +23,19 @@ nextflow_workflow { markdup: "bamsormadup", fgumi_aware: true, genome_data: [ - fasta: "${projectDir}/tests/inputs/fgumi/ref.fa", - fai: "${projectDir}/tests/inputs/fgumi/ref.fa.fai", - dict: "${projectDir}/tests/inputs/fgumi/ref.dict", - snap: "${projectDir}/tests/inputs/fgumi/snap_index" + fasta: "s3://test-data/genomics/homo_sapiens/genome/seq/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set_chr21.fna", + fai: "s3://test-data/genomics/homo_sapiens/genome/seq/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set_chr21.fna.fai", + dict: "s3://test-data/genomics/homo_sapiens/genome/seq/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set_chr21.dict", + snap: "s3://reference-data/genomes/Hsapiens/GRCh38/snapaligner/" ] ], [ - file("${projectDir}/tests/inputs/fgumi/R1.fastq.gz", checkIfExists: true), - file("${projectDir}/tests/inputs/fgumi/R2.fastq.gz", checkIfExists: true) + file("https://github.com/nf-cmgg/test-datasets/raw/preprocessing/data/genomics/homo_sapiens/illumina/fastq/sample1_R1.fastq.gz", checkIfExists: true), + file("https://github.com/nf-cmgg/test-datasets/raw/preprocessing/data/genomics/homo_sapiens/illumina/fastq/sample1_R2.fastq.gz", checkIfExists: true) ], "snap", - file("${projectDir}/tests/inputs/fgumi/snap_index", checkIfExists: true), - file("${projectDir}/tests/inputs/fgumi/ref.fa", checkIfExists: true) + file("s3://reference-data/genomes/Hsapiens/GRCh38/snapaligner/", checkIfExists: true), + file("s3://test-data/genomics/homo_sapiens/genome/seq/GCA_000001405.15_GRCh38_full_plus_hs38d1_analysis_set_chr21.fna", checkIfExists: true) ]) """ } From fa69f2f81ad38eba6b7975c2caa3729d074036d1 Mon Sep 17 00:00:00 2001 From: freerkvandijk Date: Thu, 16 Apr 2026 09:22:50 +0200 Subject: [PATCH 25/25] Updated fgumi to version 0.1.3 --- modules/local/fgumi/snapzippersort/main.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/local/fgumi/snapzippersort/main.nf b/modules/local/fgumi/snapzippersort/main.nf index 0c6ec63b..0af50978 100644 --- a/modules/local/fgumi/snapzippersort/main.nf +++ b/modules/local/fgumi/snapzippersort/main.nf @@ -3,8 +3,8 @@ process FGUMI_SNAP_ZIPPER_SORT { label 'process_high' container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container - ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/24/2466270633749543330f352e34588f142de4988585ce63e7f22ee5ed1ff57450/data' - : 'community.wave.seqera.io/library/fgumi_samtools_snap-aligner:c9ba911435350668'}" + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/260799863489814407983695270f20538a7c28a25c1a14f4477c44e9955743b1/data' + : 'community.wave.seqera.io/library/fgumi_samtools_snap-aligner:fe040922c66ac98d'}" input: tuple val(meta), path(unmapped_bam), path(index, stageAs: "index/*"), path(fasta), path(dict)