Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
e3021ca
Merge pull request #132 from nf-cmgg/dev
matthdsm Jun 23, 2025
253c025
Preprocessing v3.0.0 release
matthdsm Mar 6, 2026
f5a2972
Initial commit fgumi implementation
freerkvandijk Apr 7, 2026
bd23c30
Fixed assets. Changed to fgumi_aware parameter.
freerkvandijk Apr 8, 2026
1795ab6
Changed pipeline wide read-structures to meta values
freerkvandijk Apr 8, 2026
9ea17ff
Added usage of default SNAP parameters in config
freerkvandijk Apr 8, 2026
f205480
Process memory now handled per nf-core style
freerkvandijk Apr 8, 2026
b2c80a7
Changes to fgumi extract to make it portable (nf-core)
freerkvandijk Apr 8, 2026
ac9c547
List input instead of own conversion
freerkvandijk Apr 8, 2026
05ee645
Split FGUMI_FILTER into seperate filter and sort module and updated t…
freerkvandijk Apr 8, 2026
cfdaac0
decoupled umi_aware and fgumi_aware
freerkvandijk Apr 8, 2026
31f924d
Refactor: fgumi modules to dedicated umi_consensus subworkflow
freerkvandijk Apr 8, 2026
3b30b71
removed output channel
freerkvandijk Apr 8, 2026
289d935
Changed tests to use template.bam and removed grouped.bam
freerkvandijk Apr 8, 2026
53de497
Changed some defaul resource requirements. Needs benchmarking though
freerkvandijk Apr 8, 2026
53549c2
Added some comments + fixed fgumi simplex parameter bug
freerkvandijk Apr 8, 2026
8af63a6
Dropped params
freerkvandijk Apr 9, 2026
0240c8b
Fixed ext.args
freerkvandijk Apr 9, 2026
fde7ed8
Moved hard coded options
freerkvandijk Apr 9, 2026
a6691fb
Removed duplex metrics from pipeline
freerkvandijk Apr 9, 2026
1cd4b3b
Removed output redundancy
freerkvandijk Apr 9, 2026
249df56
Removed test BAMs + added paths to github repo test BAMs
freerkvandijk Apr 9, 2026
56cf6cc
Updated filepaths in tests
freerkvandijk Apr 10, 2026
092a382
removed thread and mem args from config
freerkvandijk Apr 10, 2026
c1d00d5
params to meta
freerkvandijk Apr 10, 2026
b6091f0
Fixed paths in test
freerkvandijk Apr 10, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions assets/schema_input.json
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,24 @@
"description": "Run markdup in UMI-aware mode. This applies to Samtools only and requires the UMI to be in the read name.",
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't change this, but add another option

"default": false
},
"fgumi_aware": {
"meta": ["fgumi_aware"],
"type": "boolean",
"description": "Enable UMI-aware consensus processing through the fgumi branch.",
"default": false
},
"fgumi_read_structures": {
"meta": ["fgumi_read_structures"],
"type": "string",
"description": "Read structures passed to fgumi extract for this sample.",
"default": null
},
"fgumi_extract_umis_from_read_names": {
"meta": ["fgumi_extract_umis_from_read_names"],
"type": "boolean",
"description": "Override fgumi extraction from read names for this sample.",
"default": null
},
"skip_trimming": {
"meta": ["skip_trimming"],
"type": "boolean",
Expand Down
18 changes: 18 additions & 0 deletions assets/schema_sampleinfo.json
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,24 @@
"description": "Run markdup in UMI-aware mode. This applies to Samtools only and requires the UMI to be in the read name.",
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't change this, but add another option

"default": false
},
"fgumi_aware": {
"meta": ["fgumi_aware"],
"type": "boolean",
"description": "Enable UMI-aware consensus processing through the fgumi branch.",
"default": false
},
"fgumi_read_structures": {
"meta": ["fgumi_read_structures"],
"type": "string",
"description": "Read structures passed to fgumi extract for this sample.",
"default": null
},
"fgumi_extract_umis_from_read_names": {
"meta": ["fgumi_extract_umis_from_read_names"],
"type": "boolean",
"description": "Override fgumi extraction from read names for this sample.",
"default": null
},
"skip_trimming": {
"meta": ["skip_trimming"],
"type": "boolean",
Expand Down
121 changes: 121 additions & 0 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,127 @@ process {
}
}

//// FGUMI extract (step 1)
withName: '.*FGUMI_EXTRACT' {
cpus = 4
memory = 16.GB
ext.prefix = { "${meta.id}.fgumi.unmapped" }
ext.args = {
[
"--sample \"${meta.id}\"",
"--library \"${meta.library ?: meta.id}\"",
"--read-structures ${meta.fgumi_read_structures ?: '+T +T'}",
((meta.fgumi_extract_umis_from_read_names != null ? meta.fgumi_extract_umis_from_read_names : true) ? "--extract-umis-from-read-names" : ""),
"--compression-level ${params.fgumi_compression_level}",
].join(" ").trim()
}
}

//// FGUMI fastq | SNAP | zipper | template sort (step 3)
withName: '.*FGUMI_SNAP_ZIPPER_SORT' {
cpus = 16
memory = 64.GB
ext.prefix = { "${meta.id}.fgumi" }
ext.args = {
[
"-b-",
"-sm 20",
params.fgumi_snap_ignore_mismatched_pairs ? "-I" : "",
"-hc-",
"-S id",
"-sa",
"-xf 2",
meta.readgroup ? "-R \"@RG\\t" + meta.readgroup.findResults { rg -> rg.value?.trim() ? "${rg.key}:${rg.value}" : null }.join("\\t") + "\"" : "",
].join(" ").trim()
}
ext.args2 = {
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Snap params should be equivalent to default alignment config

[
"--threads ${task.cpus}",
].join(" ").trim()
}
ext.args3 = {
[
"--threads ${task.cpus}",
"--max-memory ${params.fgumi_sort_max_memory}",
"--memory-per-thread=${params.fgumi_sort_memory_per_thread ? 'true' : 'false'}",
].join(" ").trim()
}
ext.args4 = {
[
"--threads ${task.cpus}",
"--max-memory ${params.fgumi_sort_max_memory}",
"--memory-per-thread=${params.fgumi_sort_memory_per_thread ? 'true' : 'false'}",
].join(" ").trim()
}
ext.args5 = {
[
"-@ ${task.cpus}",
].join(" ").trim()
}
}

//// FGUMI group (step 4)
withName: '.*FGUMI_GROUP' {
cpus = 8
memory = 32.GB
ext.prefix = { "${meta.id}.fgumi.group" }
ext.args = {
[
"--strategy ${meta.fgumi_group_strategy ?: 'adjacency'}",
"--edits ${meta.fgumi_group_edits != null ? meta.fgumi_group_edits : 1}",
"--compression-level ${meta.fgumi_compression_level != null ? meta.fgumi_compression_level : 1}",
"--grouping-metrics ${meta.id}.fgumi.group.grouping_metrics.txt",
"--family-size-histogram ${meta.id}.fgumi.group.family_size_histogram.txt",
].join(" ").trim()
}
}

//// FGUMI simplex (step 5)
withName: '.*FGUMI_SIMPLEX' {
cpus = 8
memory = 32.GB
ext.prefix = { "${meta.id}.fgumi.simplex" }
ext.args = {
[
"--min-reads ${params.fgumi_simplex_min_reads}",
"--threads ${task.cpus}",
"--queue-memory ${params.fgumi_queue_memory}",
params.fgumi_queue_memory_per_thread ? "--queue-memory-per-thread" : "",
"--compression-level ${params.fgumi_compression_level}",
"--stats ${meta.id}.fgumi.simplex.consensus_metrics.txt",
].join(" ").trim()
}
}

//// FGUMI filter + coordinate sort/index (step 7)
withName: '.*FGUMI_FILTER' {
cpus = 4
memory = 16.GB
ext.prefix = { "${meta.id}.fgumi.filter" }
ext.args = {
[
"--min-reads 1,1,1",
"--stats ${meta.id}.fgumi.filter.filtering_metrics.txt",
].join(" ").trim()
}
}

//// FGUMI coordinate sort/index after filter (step 7)
withName: '.*FGUMI_SORT' {
cpus = 8
memory = 32.GB
ext.prefix = { "${meta.id}.fgumi.filter" }
ext.args = {
[
"--order coordinate",
"--write-index",
"--threads ${task.cpus}",
"--max-memory ${params.fgumi_sort_max_memory}",
"--memory-per-thread=${params.fgumi_sort_memory_per_thread ? 'true' : 'false'}",
].join(" ").trim()
}
}

// coverage
//// Mosdepth
withName: '.*COVERAGE:MOSDEPTH' {
Expand Down
6 changes: 6 additions & 0 deletions docs/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ A `fastq` samplesheet file consisting of paired-end data may look something like
aligner: bwamem
markdup: bamsormadup
umi_aware: false
fgumi_aware: false
skip_trimming: false
trim_front: 0
trim_tail: 0
Expand Down Expand Up @@ -67,6 +68,10 @@ Following table shows the fields that are used by the `fastq` samplesheet:

An [example samplesheet](../tests/inputs/test.yml) has been provided with the pipeline.

> [!NOTE]
> `umi_aware` and `fgumi_aware` are independent options.
> Use `umi_aware` for samtools markdup UMI mode, and `fgumi_aware` to run the fgumi consensus branch.

### Flowcell samplesheet

A `flowcell` samplesheet file consisting of one sequencing run may look something like the one below.
Expand Down Expand Up @@ -102,6 +107,7 @@ A `flowcell` sample info JSON/YML file consisting for one sequencing run may loo
aligner: bwamem
markdup: bamsormadup
umi_aware: false
fgumi_aware: false
skip_trimming: false
trim_front: 0
trim_tail: 0
Expand Down
14 changes: 14 additions & 0 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,9 @@ workflow {
rna_junctions = PREPROCESSING.out.rna_junctions
align_reports = PREPROCESSING.out.align_reports
sormadup_metrics = PREPROCESSING.out.sormadup_metrics
// Additional UMI consensus outputs.
family_size_histogram = PREPROCESSING.out.family_size_histogram
umi_filtered_consensus_bam = PREPROCESSING.out.umi_filtered_consensus_bam
mosdepth_global = PREPROCESSING.out.mosdepth_global
mosdepth_summary = PREPROCESSING.out.mosdepth_summary
mosdepth_regions = PREPROCESSING.out.mosdepth_regions
Expand Down Expand Up @@ -176,6 +179,17 @@ output {
metrics >> (meta.library ? "${meta.library}/${meta.samplename}/${meta.samplename}.duplicate_metrics.txt" : "${meta.samplename}/${meta.samplename}.duplicate_metrics.txt")
}
}
// UMI consensus artefacts are published per sample next to CRAM outputs.
family_size_histogram {
path { meta, histogram ->
histogram >> (meta.library ? "${meta.library}/${meta.samplename}/${histogram.name}" : "${meta.samplename}/${histogram.name}")
}
}
umi_filtered_consensus_bam {
path { meta, bam ->
bam >> (meta.library ? "${meta.library}/${meta.samplename}/${bam.name}" : "${meta.samplename}/${bam.name}")
}
}
mosdepth_global {
path { meta, _file ->
return (meta.library ? "${meta.library}/${meta.samplename}/" : "${meta.samplename}/")
Expand Down
40 changes: 40 additions & 0 deletions modules/local/fgumi/extract/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
process FGUMI_EXTRACT {
tag "$meta.id"
label 'process_medium'

container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container
? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/95/954170443a820787c9e02ef2135ebb8ec29c6b03633b0d61b5fafa98c59a1cce/data'
: 'community.wave.seqera.io/library/fgumi_r-base_r-ggplot2_r-scales:09c99070b82c1c28'}"

input:
tuple val(meta), path(reads)

output:
tuple val(meta), path("${prefix}.bam"), emit: bam
tuple val("${task.process}"), val('fgumi'), eval("fgumi --version | sed 's/^fgumi //;q'"), topic: versions, emit: versions_fgumi

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
// Derive per-thread queue memory from requested process resources.
def queue_memory_mb = (task.memory.mega / task.cpus * 0.75).intValue()
prefix = task.ext.prefix ?: "${meta.id}.fgumi.unmapped"

"""
fgumi extract \
--inputs ${reads} \
--output ${prefix}.bam \
--threads ${task.cpus} \
--queue-memory ${queue_memory_mb} \
--queue-memory-per-thread \
${args}
"""

stub:
prefix = task.ext.prefix ?: "${meta.id}.fgumi.unmapped"
"""
touch ${prefix}.bam
"""
}
38 changes: 38 additions & 0 deletions modules/local/fgumi/filter/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
process FGUMI_FILTER {
tag "$meta.id"
label 'process_medium'

container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container
? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/95/954170443a820787c9e02ef2135ebb8ec29c6b03633b0d61b5fafa98c59a1cce/data'
: 'community.wave.seqera.io/library/fgumi_r-base_r-ggplot2_r-scales:09c99070b82c1c28'}"

input:
tuple val(meta), path(bam), path(fasta)

output:
tuple val(meta), path("${prefix}.filtered.bam"), emit: bam
tuple val(meta), path("${prefix}.filtering_metrics.txt"), optional: true, emit: filtering_metrics
tuple val("${task.process}"), val('fgumi'), eval("fgumi --version | sed 's/^fgumi //;q'"), topic: versions, emit: versions_fgumi

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
prefix = task.ext.prefix ?: "${meta.id}.fgumi.filter"

"""
fgumi filter \
--input ${bam} \
--output ${prefix}.filtered.bam \
--ref ${fasta} \
${args}
"""

stub:
prefix = task.ext.prefix ?: "${meta.id}.fgumi.filter"
"""
touch ${prefix}.filtered.bam
touch ${prefix}.filtering_metrics.txt
"""
}
44 changes: 44 additions & 0 deletions modules/local/fgumi/group/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
process FGUMI_GROUP {
tag "$meta.id"
label 'process_medium'

container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container
? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/95/954170443a820787c9e02ef2135ebb8ec29c6b03633b0d61b5fafa98c59a1cce/data'
: 'community.wave.seqera.io/library/fgumi_r-base_r-ggplot2_r-scales:09c99070b82c1c28'}"

input:
tuple val(meta), path(bam)

output:
tuple val(meta), path("${prefix}.bam"), emit: bam
tuple val(meta), path("${prefix}.grouping_metrics.txt"), optional: true, emit: grouping_metrics
tuple val(meta), path("${prefix}.family_size_histogram.txt"), optional: true, emit: family_size_histogram
tuple val("${task.process}"), val('fgumi'), eval("fgumi --version | sed 's/^fgumi //;q'"), topic: versions, emit: versions_fgumi

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
// Derive per-thread queue memory from requested process resources.
def queue_memory_mb = (task.memory.mega / task.cpus * 0.75).intValue()
prefix = task.ext.prefix ?: "${meta.id}.fgumi.group"

"""
fgumi group \
--input ${bam} \
--output ${prefix}.bam \
--threads ${task.cpus} \
--queue-memory ${queue_memory_mb} \
--queue-memory-per-thread \
${args}
"""

stub:
prefix = task.ext.prefix ?: "${meta.id}.fgumi.group"
"""
touch ${prefix}.bam
touch ${prefix}.grouping_metrics.txt
touch ${prefix}.family_size_histogram.txt
"""
}
37 changes: 37 additions & 0 deletions modules/local/fgumi/simplex/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
process FGUMI_SIMPLEX {
tag "$meta.id"
label 'process_medium'

container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container
? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/95/954170443a820787c9e02ef2135ebb8ec29c6b03633b0d61b5fafa98c59a1cce/data'
: 'community.wave.seqera.io/library/fgumi_r-base_r-ggplot2_r-scales:09c99070b82c1c28'}"

input:
tuple val(meta), path(bam)

output:
tuple val(meta), path("${prefix}.bam"), emit: bam
tuple val(meta), path("${prefix}.consensus_metrics.txt"), optional: true, emit: consensus_metrics
tuple val("${task.process}"), val('fgumi'), eval("fgumi --version | sed 's/^fgumi //;q'"), topic: versions, emit: versions_fgumi

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
prefix = task.ext.prefix ?: "${meta.id}.fgumi.simplex"

"""
fgumi simplex \
--input ${bam} \
--output ${prefix}.bam \
${args}
"""

stub:
prefix = task.ext.prefix ?: "${meta.id}.fgumi.simplex"
"""
touch ${prefix}.bam
touch ${prefix}.consensus_metrics.txt
"""
}
Loading
Loading