diff --git a/.nf-core.yml b/.nf-core.yml index 0f557db5..061ecb02 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -1,29 +1,29 @@ repository_type: pipeline lint: files_exist: - - CODE_OF_CONDUCT.md - - assets/nf-core-sash_logo_light.png - - docs/images/nf-core-sash_logo_light.png - - docs/images/nf-core-sash_logo_dark.png - - .github/ISSUE_TEMPLATE/config.yml - - .github/workflows/awstest.yml - - .github/workflows/awsfulltest.yml - - .github/ISSUE_TEMPLATE/bug_report.yml - - .github/workflows/branch.yml - - .github/workflows/ci.yml - - .github/workflows/linting_comment.yml - - .github/workflows/linting.yml - - conf/igenomes.config + - CODE_OF_CONDUCT.md + - assets/nf-core-sash_logo_light.png + - docs/images/nf-core-sash_logo_light.png + - docs/images/nf-core-sash_logo_dark.png + - .github/ISSUE_TEMPLATE/config.yml + - .github/workflows/awstest.yml + - .github/workflows/awsfulltest.yml + - .github/ISSUE_TEMPLATE/bug_report.yml + - .github/workflows/branch.yml + - .github/workflows/ci.yml + - .github/workflows/linting_comment.yml + - .github/workflows/linting.yml + - conf/igenomes.config nextflow_config: - - manifest.name - - manifest.homePage - - process.cpus - - process.memory - - process.time - - custom_config + - manifest.name + - manifest.homePage + - process.cpus + - process.memory + - process.time + - custom_config multiqc_config: - - report_comment + - report_comment files_unchanged: - - .github/ISSUE_TEMPLATE/bug_report.yml + - .github/ISSUE_TEMPLATE/bug_report.yml readme: - - nextflow_badge + - nextflow_badge diff --git a/CHANGELOG.md b/CHANGELOG.md index 21dc74a4..7f47fe17 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -113,9 +113,9 @@ Initial release of umccr/sash, created with the [nf-core](https://nf-co.re/) tem ### Dependencies -| Tool | Old | New | -|------|-----|-----| -| Linx | 1.25 | 2.0 | -| Purple | 4.0.1 | 4.1.0 | -| Bolt | — | umccr/bolt#6 | -| GPGR | — | umccr/gpgr#88 | +| Tool | Old | New | +| ------ | ----- | ------------- | +| Linx | 1.25 | 2.0 | +| Purple | 4.0.1 | 4.1.0 | +| Bolt | — | umccr/bolt#6 | +| GPGR | — | umccr/gpgr#88 | diff --git a/README.md b/README.md index 6c534c11..bee0ac9b 100644 --- a/README.md +++ b/README.md @@ -11,9 +11,9 @@ functionality. ## Table of contents -* [Summary](#summary) -* [Requirements](#requirements) -* [Usage](#usage) +- [Summary](#summary) +- [Requirements](#requirements) +- [Usage](#usage) ## Summary diff --git a/conf/modules.config b/conf/modules.config index 0dc483aa..730e5af5 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -57,7 +57,7 @@ process { if (fp.equals('versions.yml')) { return null - } else if (fp.matches('output/.*.pcgr_acmg.grch38.html')) { + } else if (fp.matches('output/.*.pcgr.grch38.html')) { return "${meta.key}/${meta.tumor_id}.pcgr.html" } else { def fp_out = fp.replaceFirst(/output\//, '') @@ -126,7 +126,30 @@ process { publishDir = [ path: { "${params.outdir}" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/${filename}" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/${filename}" } + ] + } + + + withName: 'SIGRAP_HRDETECT' { + publishDir = [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/sigrap/hrdetect/${filename}" } + ] + } + + withName: 'SIGRAP_MUTPAT' { + publishDir = [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> + if (filename.equals('versions.yml')) { + return null + } else { + return "${meta.key}/sigrap/${filename}" + } + } ] } @@ -212,4 +235,12 @@ process { ] } + withName: 'VCF2MAF' { + publishDir = [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/vcf2maf/${filename}" }, + ] + } + } diff --git a/conf/refdata.config b/conf/refdata.config index 6bef3738..f57e5cf2 100644 --- a/conf/refdata.config +++ b/conf/refdata.config @@ -4,7 +4,7 @@ params { umccr_reference_data = '2--0' hmf_reference_data = 'hmf_pipeline_resources.38_v2.2.0--3' - pcgr = '20220203' + pcgr = '20250314' snpeff = '5_1' oncokb = '4.12' @@ -20,7 +20,8 @@ params { } miscdata_paths { - pcgr_dir = "databases/pcgr/v${params.data_versions.pcgr}/" + pcgr_dir = "databases/pcgr/pcgr_ref_data.${params.data_versions.pcgr}.grch38.tgz" + vep_dir = "databases/pcgr/homo_sapiens_vep_113_GRCh38.tar.gz" snpeff_dir = "databases/snpeff/v${params.data_versions.snpeff}/" oncokb_genes = "databases/oncokb/v${params.data_versions.oncokb}/cancerGeneList.txt" diff --git a/modules/local/bolt/other/cancer_report/main.nf b/modules/local/bolt/other/cancer_report/main.nf index 19469ca1..dff58fae 100644 --- a/modules/local/bolt/other/cancer_report/main.nf +++ b/modules/local/bolt/other/cancer_report/main.nf @@ -2,10 +2,10 @@ process BOLT_OTHER_CANCER_REPORT { tag "${meta.id}" label 'process_low' - container 'ghcr.io/umccr/bolt:0.2.17-gpgr' + container 'docker.io/qclayssen/bolt:0.3.0-dev-14-gpgr' input: - tuple val(meta), path(smlv_somatic_vcf), path(smlv_somatic_bcftools_stats), path(smlv_somatic_counts_process), path(sv_somatic_tsv), path(sv_somatic_vcf), path(cnv_somatic_tsv), path(af_global), path(af_keygenes), path(purple_baf_plot), path(purple_dir), path(virusbreakend_dir), path(dragen_hrd) + tuple val(meta), path(smlv_somatic_vcf), path(smlv_somatic_bcftools_stats), path(smlv_somatic_counts_process), path(sv_somatic_tsv), path(sv_somatic_vcf), path(cnv_somatic_tsv), path(af_global), path(af_keygenes), path(purple_baf_plot), path(purple_dir), path(virusbreakend_dir), path(dragen_hrd), path(smlv_somatic_mutpat), path(smlv_somatic_hrdetect), path(smlv_somatic_chord) path somatic_driver_panel path oncokb_genes @@ -35,6 +35,10 @@ process BOLT_OTHER_CANCER_REPORT { --smlv_somatic_bcftools_stats_fp \$(pwd)/${smlv_somatic_bcftools_stats} \\ --smlv_somatic_counts_process_fp \$(pwd)/${smlv_somatic_counts_process} \\ \\ + --mutpat_dir \$(pwd)/${smlv_somatic_mutpat} \\ + --hrdetect_file \$(pwd)/${smlv_somatic_hrdetect} \\ + --chord_file \$(pwd)/${smlv_somatic_chord} \\ + \\ --sv_somatic_tsv_fp \$(pwd)/${sv_somatic_tsv} \\ --sv_somatic_vcf_fp \$(pwd)/${sv_somatic_vcf} \\ --cnv_somatic_tsv_fp \$(pwd)/${cnv_somatic_tsv} \\ diff --git a/modules/local/bolt/other/multiqc_report/main.nf b/modules/local/bolt/other/multiqc_report/main.nf index f26d6921..a5752f09 100644 --- a/modules/local/bolt/other/multiqc_report/main.nf +++ b/modules/local/bolt/other/multiqc_report/main.nf @@ -2,7 +2,7 @@ process BOLT_OTHER_MULTIQC_REPORT { tag "${meta.id}" label 'process_low' - container 'ghcr.io/umccr/bolt:0.2.17-multiqc' + container 'ghcr.io/umccr/bolt:0.3.0-dev-20-multiqc' input: tuple val(meta), path(input_files) diff --git a/modules/local/bolt/other/purple_baf_plot/main.nf b/modules/local/bolt/other/purple_baf_plot/main.nf index c52d8202..6ccb318a 100644 --- a/modules/local/bolt/other/purple_baf_plot/main.nf +++ b/modules/local/bolt/other/purple_baf_plot/main.nf @@ -2,7 +2,7 @@ process BOLT_OTHER_PURPLE_BAF_PLOT { tag "${meta.id}" label 'process_low' - container 'ghcr.io/umccr/bolt:0.2.17-circos' + container 'ghcr.io/umccr/bolt:0.3.0-dev-20-circos' input: tuple val(meta), path(purple_dir) diff --git a/modules/local/bolt/smlv_germline/prepare/main.nf b/modules/local/bolt/smlv_germline/prepare/main.nf index 2fc3cc4a..615078dd 100644 --- a/modules/local/bolt/smlv_germline/prepare/main.nf +++ b/modules/local/bolt/smlv_germline/prepare/main.nf @@ -2,7 +2,7 @@ process BOLT_SMLV_GERMLINE_PREPARE { tag "${meta.id}" label 'process_low' - container 'ghcr.io/umccr/bolt:0.2.17' + container 'ghcr.io/umccr/bolt:0.3.0-dev-20' input: tuple val(meta), path(smlv_vcf) diff --git a/modules/local/bolt/smlv_germline/report/main.nf b/modules/local/bolt/smlv_germline/report/main.nf index 3f5fdea9..0acfffde 100644 --- a/modules/local/bolt/smlv_germline/report/main.nf +++ b/modules/local/bolt/smlv_germline/report/main.nf @@ -2,12 +2,13 @@ process BOLT_SMLV_GERMLINE_REPORT { tag "${meta.id}" label 'process_low' - container 'ghcr.io/umccr/bolt:0.2.17-pcgr' + container 'ghcr.io/umccr/bolt:0.3.0-dev-20-pcgr' input: tuple val(meta), path(smlv_vcf), path(smlv_unfiltered_vcf) path germline_predisposition_panel_genes path pcgr_data_dir + path vep_dir output: tuple val(meta), path("output/*.variant_counts_type.yaml"), emit: counts_type @@ -32,6 +33,7 @@ process BOLT_SMLV_GERMLINE_REPORT { --pcgrr_conda pcgrr \\ --germline_panel_list_fp ${germline_predisposition_panel_genes} \\ --pcgr_data_dir ${pcgr_data_dir} \\ + --vep_dir ${vep_dir} \\ --threads ${task.cpus} \\ --output_dir output/ diff --git a/modules/local/bolt/smlv_somatic/annotate/main.nf b/modules/local/bolt/smlv_somatic/annotate/main.nf index d28aeeb4..f6bb82cc 100644 --- a/modules/local/bolt/smlv_somatic/annotate/main.nf +++ b/modules/local/bolt/smlv_somatic/annotate/main.nf @@ -2,7 +2,7 @@ process BOLT_SMLV_SOMATIC_ANNOTATE { tag "${meta.id}" label 'process_low' - container 'ghcr.io/umccr/bolt:0.2.17-pcgr' + container 'ghcr.io/umccr/bolt:0.3.0-dev-20-pcgr' input: tuple val(meta), path(smlv_vcf) @@ -10,6 +10,7 @@ process BOLT_SMLV_SOMATIC_ANNOTATE { path annotations_dir path pon_dir path pcgr_data_dir + path vep_dir output: tuple val(meta), path("output/${meta.tumor_id}.annotations.vcf.gz"), emit: vcf @@ -19,7 +20,7 @@ process BOLT_SMLV_SOMATIC_ANNOTATE { task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' + def chunk_size_arg = params.pcgr_variant_chunk_size ? "--pcgr_variant_chunk_size ${params.pcgr_variant_chunk_size}" : '' """ bolt smlv_somatic annotate \\ @@ -30,10 +31,12 @@ process BOLT_SMLV_SOMATIC_ANNOTATE { --annotations_dir ${annotations_dir} \\ --pon_dir ${pon_dir} \\ --pcgr_data_dir ${pcgr_data_dir} \\ + --vep_dir ${vep_dir} \\ --pcgr_conda pcgr \\ --pcgrr_conda pcgrr \\ --threads ${task.cpus} \\ - --output_dir output/ + --output_dir output/ \\ + ${chunk_size_arg} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/bolt/smlv_somatic/filter/main.nf b/modules/local/bolt/smlv_somatic/filter/main.nf index 93519a82..f76db5f8 100644 --- a/modules/local/bolt/smlv_somatic/filter/main.nf +++ b/modules/local/bolt/smlv_somatic/filter/main.nf @@ -2,7 +2,7 @@ process BOLT_SMLV_SOMATIC_FILTER { tag "${meta.id}" label 'process_low' - container 'ghcr.io/umccr/bolt:0.2.17' + container 'ghcr.io/umccr/bolt:0.3.0-dev-20' input: tuple val(meta), path(smlv_vcf) diff --git a/modules/local/bolt/smlv_somatic/report/main.nf b/modules/local/bolt/smlv_somatic/report/main.nf index cbc160c1..15225680 100644 --- a/modules/local/bolt/smlv_somatic/report/main.nf +++ b/modules/local/bolt/smlv_somatic/report/main.nf @@ -2,11 +2,12 @@ process BOLT_SMLV_SOMATIC_REPORT { tag "${meta.id}" label 'process_low' - container 'ghcr.io/umccr/bolt:0.2.17-pcgr' + container 'ghcr.io/umccr/bolt:0.3.0-dev-20-pcgr' input: tuple val(meta), path(smlv_vcf), path(smlv_filters_vcf), path(smlv_dragen_vcf), path(purple_purity) path pcgr_data_dir + path vep_dir path somatic_driver_panel_regions_coding path giab_regions path genome_fasta @@ -19,7 +20,7 @@ process BOLT_SMLV_SOMATIC_REPORT { tuple val(meta), path("output/*.variant_counts_type.yaml") , emit: counts_type tuple val(meta), path("output/*.variant_counts_process.json"), emit: counts_process path 'output/pcgr/' , emit: pcgr_dir - path "output/*.pcgr_acmg.grch38.html" , emit: pcgr_report + path "output/*.pcgr.grch38.html" , emit: pcgr_report path 'versions.yml' , emit: versions when: @@ -40,6 +41,7 @@ process BOLT_SMLV_SOMATIC_REPORT { --pcgr_conda pcgr \\ --pcgrr_conda pcgrr \\ --pcgr_data_dir ${pcgr_data_dir} \\ + --vep_dir ${vep_dir} \\ --purple_purity_fp ${purple_purity} \\ \\ --cancer_genes_fp ${somatic_driver_panel_regions_coding} \\ @@ -49,7 +51,7 @@ process BOLT_SMLV_SOMATIC_REPORT { --threads ${task.cpus} \\ --output_dir output/ - mv output/pcgr/${meta.tumor_id}.pcgr_acmg.grch38.html output/ + mv output/pcgr/${meta.tumor_id}.pcgr.grch38.html output/ cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -66,7 +68,7 @@ process BOLT_SMLV_SOMATIC_REPORT { touch output/${meta.tumor_id}.somatic.variant_counts_type.yaml touch output/${meta.tumor_id}.somatic.variant_counts_process.json touch output/${meta.tumor_id}.somatic.bcftools_stats.txt - touch output/${meta.tumor_id}.pcgr_acmg.grch38.html + touch output/${meta.tumor_id}.pcgr.grch38.html echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml """ } diff --git a/modules/local/bolt/smlv_somatic/rescue/main.nf b/modules/local/bolt/smlv_somatic/rescue/main.nf index 2e1ac64b..f274231d 100644 --- a/modules/local/bolt/smlv_somatic/rescue/main.nf +++ b/modules/local/bolt/smlv_somatic/rescue/main.nf @@ -2,7 +2,7 @@ process BOLT_SMLV_SOMATIC_RESCUE { tag "${meta.id}" label 'process_low' - container 'ghcr.io/umccr/bolt:0.2.17' + container 'ghcr.io/umccr/bolt:0.3.0-dev-20' input: tuple val(meta), path(smlv_vcf), path(smlv_tbi), path(sage_smlv_vcf), path(sage_smlv_tbi) diff --git a/modules/local/bolt/sv_somatic/annotate/main.nf b/modules/local/bolt/sv_somatic/annotate/main.nf index af48b4f5..ecd27b5c 100644 --- a/modules/local/bolt/sv_somatic/annotate/main.nf +++ b/modules/local/bolt/sv_somatic/annotate/main.nf @@ -2,7 +2,7 @@ process BOLT_SV_SOMATIC_ANNOTATE { tag "${meta.id}" label 'process_low' - container 'ghcr.io/umccr/bolt:0.2.17-snpeff' + container 'ghcr.io/umccr/bolt:0.3.0-dev-20-snpeff' input: tuple val(meta), path(sv_vcf), path(cnv_tsv) diff --git a/modules/local/bolt/sv_somatic/prioritise/main.nf b/modules/local/bolt/sv_somatic/prioritise/main.nf index 710cb192..4f78b51b 100644 --- a/modules/local/bolt/sv_somatic/prioritise/main.nf +++ b/modules/local/bolt/sv_somatic/prioritise/main.nf @@ -2,7 +2,7 @@ process BOLT_SV_SOMATIC_PRIORITISE { tag "${meta.id}" label 'process_low' - container 'ghcr.io/umccr/bolt:0.2.17' + container 'ghcr.io/umccr/bolt:0.3.0-dev-20' input: tuple val(meta), path(sv_vcf) diff --git a/modules/local/custom/extract_tarball/main.nf b/modules/local/custom/extract_tarball/main.nf new file mode 100644 index 00000000..1ebc1011 --- /dev/null +++ b/modules/local/custom/extract_tarball/main.nf @@ -0,0 +1,33 @@ +process CUSTOM_EXTRACTTARBALL { + label 'process_single' + + conda "conda-forge::tar=1.34" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'quay.io/nf-core/ubuntu:20.04' }" + + input: + tuple val(meta), path(tarball) + + output: + path "${meta.id}/", emit: extracted_dir + path '.command.*', emit: command_files + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def strip = meta.strip_components != null ? meta.strip_components : 1 + def target = meta.subdir ? "${meta.id}/${meta.subdir}" : "${meta.id}" + + """ + mkdir -p ${target} + tar ${args} -xzvf ${tarball} --strip-components ${strip} -C ${target}/ + """ + + stub: + """ + mkdir -p ${meta.id}/ + """ +} diff --git a/modules/local/pave/somatic/meta.yml b/modules/local/pave/somatic/meta.yml index 97aa6729..552537c4 100644 --- a/modules/local/pave/somatic/meta.yml +++ b/modules/local/pave/somatic/meta.yml @@ -12,7 +12,7 @@ tools: description: Annotates small variant VCF with gene, transcript coding and protein effects. homepage: https://github.com/hartwigmedical/hmftools/tree/master/pave documentation: https://github.com/hartwigmedical/hmftools/tree/master/pave - licence: ['GPL v3'] + licence: ["GPL v3"] input: - meta: type: map diff --git a/modules/local/sigrap/chord/main.nf b/modules/local/sigrap/chord/main.nf new file mode 100644 index 00000000..44810f60 --- /dev/null +++ b/modules/local/sigrap/chord/main.nf @@ -0,0 +1,37 @@ +process SIGRAP_CHORD { + tag "${meta.id}" + label 'process_low' + + container 'docker.io/qclayssen/sigrap:0.2.0-dev-7' + + input: + tuple val(meta), path(chord_prediction_tsv) + + output: + tuple val(meta), path('chord.json.gz') , emit: chord_json + path 'versions.yml' , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + """ + sigrap.R chord \\ + --sample ${meta.id} \\ + --chord ${chord_prediction_tsv} \\ + --out chord.json.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sigrap: \$(sigrap.R --version | sed 's/^.*version //') + END_VERSIONS + """ + + stub: + """ + touch chord.json.gz + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml + """ +} diff --git a/modules/local/sigrap/hrdetect/main.nf b/modules/local/sigrap/hrdetect/main.nf new file mode 100644 index 00000000..8cb5fa57 --- /dev/null +++ b/modules/local/sigrap/hrdetect/main.nf @@ -0,0 +1,39 @@ +process SIGRAP_HRDETECT { + tag "${meta.id}" + label 'process_low' + + container 'docker.io/qclayssen/sigrap:0.2.0-dev-7' + + input: + tuple val(meta), path(smlv_somatic_vcf), path(sv_somatic_vcf), path(cnv_somatic_tsv) + + output: + tuple val(meta), path('hrdetect.json.gz') , emit: hrdetect_json + path 'versions.yml' , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + """ + sigrap.R hrdetect \\ + --sample ${meta.id} \\ + --snv ${smlv_somatic_vcf} \\ + --sv ${sv_somatic_vcf} \\ + --cnv ${cnv_somatic_tsv} \\ + --out hrdetect.json.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sigrap: \$(sigrap.R --version | sed 's/^.*version //') + END_VERSIONS + """ + + stub: + """ + touch hrdetect.json.gz + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml + """ +} diff --git a/modules/local/sigrap/mutpat/main.nf b/modules/local/sigrap/mutpat/main.nf new file mode 100644 index 00000000..b4b34102 --- /dev/null +++ b/modules/local/sigrap/mutpat/main.nf @@ -0,0 +1,41 @@ +process SIGRAP_MUTPAT { + tag "${meta.id}" + label 'process_low' + + container 'docker.io/qclayssen/sigrap:0.2.0-dev-7' + + input: + tuple val(meta), path(smlv_somatic_vcf) + + output: + tuple val(meta), path('mutpat/') , emit: mutpat_output + path 'versions.yml' , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + """ + sigrap.R mutpat \\ + --sample ${meta.id} \\ + --snv ${smlv_somatic_vcf} \\ + --rainfall \\ + --strand-bias \\ + --predefined-dbs-mbs \\ + --out mutpat/ + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sigrap: \$(sigrap.R --version | sed 's/^.*version //') + END_VERSIONS + """ + + stub: + """ + mkdir -p sigrap/mutpat/ + touch sigrap/mutpat/stub_output + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml + """ +} diff --git a/modules/local/vcf2maf/main.nf b/modules/local/vcf2maf/main.nf new file mode 100644 index 00000000..707f7510 --- /dev/null +++ b/modules/local/vcf2maf/main.nf @@ -0,0 +1,50 @@ +process VCF2MAF { + tag "${meta.id}" + label 'process_medium' + + container 'quay.io/biocontainers/vcf2maf:1.6.22--hdfd78af_0' + + input: + tuple val(meta), path(vcf) + path genome_fasta + + + output: + tuple val(meta), path("*.maf"), emit: maf + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + """ + gunzip -c ${vcf} > ${meta.id}-temp.vcf + + vcf2maf.pl \\ + --inhibit-vep \\ + --input-vcf ${meta.id}-temp.vcf \\ + --output-maf ${meta.id}.maf \\ + --ref-fasta ${genome_fasta} \\ + --tumor-id ${meta.tumor_id} \\ + --normal-id ${meta.normal_id} \\ + --ncbi-build "GRCh38" \\ + ${args} + + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + vcf2maf: \$(vcf2maf.pl --help | grep -o 'vcf2maf [0-9.]*' | sed 's/vcf2maf //' || echo "1.6.22") + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.maf + + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml + END_VERSIONS + """ +} diff --git a/nextflow.config b/nextflow.config index 00fc8aaf..479336a7 100644 --- a/nextflow.config +++ b/nextflow.config @@ -25,6 +25,7 @@ params { validate_params = true show_hidden_params = false schema_ignore_params = 'hmfdata_paths,umccrdata_paths,miscdata_paths,data_versions,genome' + pcgr_variant_chunk_size = null // Max resource options // Defaults only, expecting to be overwritten @@ -143,6 +144,7 @@ env { R_PROFILE_USER = "/.Rprofile" R_ENVIRON_USER = "/.Renviron" JULIA_DEPOT_PATH = "/usr/local/share/julia" + XDG_CACHE_HOME = "/tmp/quarto_cache_home" } // Capture exit codes from upstream processes when piping @@ -174,7 +176,7 @@ manifest { mainScript = 'main.nf' nextflowVersion = '!>=22.10.6' version = '0.6.3' - doi = '' + doi = 'https://doi.org/10.5281/zenodo.15833493' } // Load modules.config for DSL2 module specific options diff --git a/nextflow_schema.json b/nextflow_schema.json index 47c839df..3d34050c 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -10,10 +10,7 @@ "type": "object", "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", - "required": [ - "input", - "outdir" - ], + "required": ["input", "outdir"], "properties": { "input": { "type": "string", @@ -122,14 +119,7 @@ "description": "Method used to save pipeline results to output directory.", "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", "fa_icon": "fas fa-copy", - "enum": [ - "symlink", - "rellink", - "link", - "copy", - "copyNoFollow", - "move" - ], + "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], "hidden": true }, "email_on_fail": { @@ -179,6 +169,12 @@ "description": "Show all params when using `--help`", "hidden": true, "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters." + }, + "pcgr_variant_chunk_size": { + "type": "integer", + "description": "Override maximum variants per PCGR chunk for the BOLT somatic annotate step.", + "help_text": "Leave unset to use the default chunk size defined by BOLT. Provide an integer to adjust PCGR chunking behaviour when needed.", + "fa_icon": "fas fa-database" } } } diff --git a/pipeline_template.yml b/pipeline_template.yml index c8064235..9e816eb2 100644 --- a/pipeline_template.yml +++ b/pipeline_template.yml @@ -1,7 +1,7 @@ prefix: umccr skip: -- github -- ci -- github_badges -- igenomes -- nf_core_configs + - github + - ci + - github_badges + - igenomes + - nf_core_configs diff --git a/subworkflows/local/prepare_input.nf b/subworkflows/local/prepare_input.nf index 244ed699..ac3993c2 100644 --- a/subworkflows/local/prepare_input.nf +++ b/subworkflows/local/prepare_input.nf @@ -124,6 +124,13 @@ workflow PREPARE_INPUT { return [meta, virusbreakend_dir] } + // CHORD: homologous recombination deficiency prediction + // channel: [ meta, chord_prediction_tsv ] + ch_chord = ch_metas.map { meta -> + def base = file(meta.oncoanalyser_dir).toUriString() + return [meta, "${base}/chord/${meta.tumor_id}.chord.prediction.tsv"] + } + // HRD: homologous recombination deficiency scores // channel: [ meta, hrdscore_csv ] ch_input_hrd = ch_metas.map { meta -> @@ -173,6 +180,7 @@ workflow PREPARE_INPUT { cobalt = ch_cobalt // channel: [ meta, cobalt_dir ] sage_somatic = ch_sage_somatic // channel: [ meta, sage_somatic_vcf, sage_somatic_tbi ] virusbreakend = ch_virusbreakend // channel: [ meta, virusbreakend_dir ] + chord = ch_chord // channel: [ meta, chord_prediction_tsv ] call_inputs = ch_call_inputs // channel: [ meta_esvee, esvee_ref_depth_vcf, esvee_prep_dir ] // DRAGEN channels diff --git a/subworkflows/local/prepare_reference.nf b/subworkflows/local/prepare_reference.nf index 7ef91a5a..26115d7d 100644 --- a/subworkflows/local/prepare_reference.nf +++ b/subworkflows/local/prepare_reference.nf @@ -2,6 +2,8 @@ // Prepare reference data as required // +include { CUSTOM_EXTRACTTARBALL as DECOMP_MISC_DATA } from '../../modules/local/custom/extract_tarball/main' + workflow PREPARE_REFERENCE { take: @@ -18,6 +20,29 @@ workflow PREPARE_REFERENCE { ch_umccr_data = createDataMap(params.umccrdata_paths, umccr_reference_data_path) ch_misc_data = createDataMap(params.miscdata_paths, params.ref_data_path) + // + // Extract tarball resources (e.g. PCGR data, VEP cache) when provided as .tar.gz/.tgz + // + misc_tarball_inputs = getTarballInputs(params.miscdata_paths, params.ref_data_path) + if (misc_tarball_inputs) { + ch_misc_data_inputs = Channel.fromList(misc_tarball_inputs) + + DECOMP_MISC_DATA(ch_misc_data_inputs) + + ch_misc_data_extracted = DECOMP_MISC_DATA.out.extracted_dir + .collect() + .map { dir_list -> + // Convert list of directories to a map of [name: dir] + def extracted_map = dir_list.collectEntries { dir -> + [(dir.getFileName().toString()): dir] + } + // Merge extracted data with existing misc_data map + return createDataMap(params.miscdata_paths, params.ref_data_path) + extracted_map + } + + ch_misc_data = ch_misc_data_extracted + } + // // Prepare genome paths and info // @@ -45,6 +70,34 @@ def createDataMap(entries, ref_data_base_path) { } } +def getTarballInputs(entries, ref_data_base_path) { + return entries + .findAll { name, relpath -> + if (!relpath) { + return false + } + def rel = relpath.toString() + return rel.endsWith('.tar.gz') || rel.endsWith('.tgz') + } + .collect { name, relpath -> + def tarball = joinPath(ref_data_base_path, relpath) + def meta + if (name == 'vep_dir') { + // VEP cache: strip wrapper dir, extract into homo_sapiens subdir + // Result: vep_dir/homo_sapiens/113_GRCh38/ + meta = [id: name, strip_components: 1, subdir: 'homo_sapiens'] + } else if (name == 'pcgr_dir') { + // PCGR bundle: don't strip, tarball contains data/ directory + // Result: pcgr_dir/data/grch38/ + meta = [id: name, strip_components: 0] + } else { + // Default: strip top-level wrapper directory + meta = [id: name, strip_components: 1] + } + return [meta, tarball] + } +} + def joinPath(a, b) { def a_noslash = file(a).toUriString().replaceAll('/$', '') return file("${a_noslash}/${b}", checkIfExists: true) diff --git a/workflows/sash.nf b/workflows/sash.nf index eb1bbb72..bad9c60d 100644 --- a/workflows/sash.nf +++ b/workflows/sash.nf @@ -42,13 +42,17 @@ include { BOLT_SMLV_GERMLINE_PREPARE } from '../modules/local/bolt/smlv_germline include { BOLT_SMLV_GERMLINE_REPORT } from '../modules/local/bolt/smlv_germline/report/main' include { BOLT_SMLV_SOMATIC_ANNOTATE } from '../modules/local/bolt/smlv_somatic/annotate/main' include { BOLT_SMLV_SOMATIC_FILTER } from '../modules/local/bolt/smlv_somatic/filter/main' -include { BOLT_SMLV_SOMATIC_REPORT } from '../modules/local/bolt/smlv_somatic/report/main' include { BOLT_SMLV_SOMATIC_RESCUE } from '../modules/local/bolt/smlv_somatic/rescue/main' +include { BOLT_SMLV_SOMATIC_REPORT } from '../modules/local/bolt/smlv_somatic/report/main' include { BOLT_SV_SOMATIC_ANNOTATE } from '../modules/local/bolt/sv_somatic/annotate/main' include { BOLT_SV_SOMATIC_PRIORITISE } from '../modules/local/bolt/sv_somatic/prioritise/main' -include { ESVEE_CALL } from '../modules/local/esvee/call/main' include { PAVE_SOMATIC } from '../modules/local/pave/somatic/main' +include { SIGRAP_HRDETECT } from '../modules/local/sigrap/hrdetect/main' +include { SIGRAP_MUTPAT } from '../modules/local/sigrap/mutpat/main' +include { VCF2MAF } from '../modules/local/vcf2maf/main' + +include { ESVEE_CALL } from '../modules/local/esvee/call/main' include { LINX_ANNOTATION } from '../subworkflows/local/linx_annotation' include { LINX_PLOTTING } from '../subworkflows/local/linx_plotting' include { PREPARE_INPUT } from '../subworkflows/local/prepare_input' @@ -74,13 +78,6 @@ workflow SASH { // channel: [ versions.yml ] ch_versions = Channel.empty() - - - - // - // Prepare inputs from samplesheet - // - PREPARE_INPUT( file(params.input), ) @@ -93,6 +90,7 @@ workflow SASH { ch_sage_somatic = PREPARE_INPUT.out.sage_somatic // channel: [ meta, sage_somatic_vcf, sage_somatic_tbi ] ch_virusbreakend = PREPARE_INPUT.out.virusbreakend // channel: [ meta, virusbreakend_dir ] ch_call_inputs = PREPARE_INPUT.out.call_inputs // channel: [ meta_esvee, esvee_ref_depth_vcf, esvee_prep_dir ] + ch_chord = PREPARE_INPUT.out.chord // channel: [ meta, chord_prediction_tsv ] // DRAGEN inputs ch_input_hrd = PREPARE_INPUT.out.hrd // channel: [ meta, hrdscore_csv ] @@ -120,8 +118,7 @@ workflow SASH { // Somatic small variants // - - + // Prepare rescue inputs with meta transformation // channel: [ meta_bolt, dragen_somatic_vcf, dragen_somatic_tbi, sage_somatic_vcf, sage_somatic_tbi ] ch_smlv_somatic_rescue_inputs = WorkflowSash.groupByMeta( ch_input_vcf_somatic, @@ -152,6 +149,7 @@ workflow SASH { umccr_data.annotations_dir, misc_data.pon_dir, misc_data.pcgr_dir, + misc_data.vep_dir ) ch_versions = ch_versions.mix(BOLT_SMLV_SOMATIC_ANNOTATE.out.versions) @@ -162,6 +160,7 @@ workflow SASH { ch_versions = ch_versions.mix(BOLT_SMLV_SOMATIC_FILTER.out.versions) + // Restore meta and create clean outputs // channel: [ meta, smlv_somatic_vcf ] ch_smlv_somatic_out = WorkflowSash.restoreMeta(BOLT_SMLV_SOMATIC_FILTER.out.vcf, ch_inputs) .map { meta, vcf, tbi -> [meta, vcf] } @@ -181,12 +180,42 @@ workflow SASH { hmf_data.gnomad_resource, ) + ch_versions = ch_versions.mix(PAVE_SOMATIC.out.versions) + // channel: [ meta, pave_somatic_vcf ] ch_pave_somatic_out = WorkflowSash.restoreMeta(PAVE_SOMATIC.out.vcf, ch_inputs) + // + // Convert somatic VCF to MAF format + // + + // channel: [ meta_vcf2maf, smlv_somatic_vcf ] + ch_vcf2maf_inputs = ch_smlv_somatic_out.map { meta, vcf -> + def meta_vcf2maf = [ + key: meta.id, + id: meta.id, + tumor_id: meta.tumor_id, + normal_id: meta.normal_id, + ] + return [meta_vcf2maf, vcf] + } + + VCF2MAF( + ch_vcf2maf_inputs, + genome.fasta + ) + + ch_versions = ch_versions.mix(VCF2MAF.out.versions) + + // channel: [ meta, somatic_maf ] + ch_vcf2maf_out = VCF2MAF.out.maf + + + + // // Germline small variants // @@ -219,7 +248,6 @@ workflow SASH { // // Somatic structural variants - // ESVEE_CALL( @@ -304,6 +332,7 @@ workflow SASH { BOLT_SMLV_SOMATIC_REPORT( ch_smlv_somatic_report_inputs, misc_data.pcgr_dir, + misc_data.vep_dir, umccr_data.somatic_panel_regions_cds, hmf_data.sage_highconf_regions, genome.fasta, @@ -330,6 +359,7 @@ workflow SASH { ch_smlv_germline_report_inputs, umccr_data.germline_panel_genes, misc_data.pcgr_dir, + misc_data.vep_dir ) ch_versions = ch_versions.mix(BOLT_SMLV_GERMLINE_REPORT.out.versions) @@ -430,11 +460,60 @@ workflow SASH { + // + // Sigrap + // + + // channel: [ meta_sigrap, smlv_somatic_vcf, sv_somatic_vcf, cnv_somatic_tsv ] + ch_sigrap_hrdetect_inputs = WorkflowSash.groupByMeta( + ch_smlv_somatic_out, + ch_sv_somatic_sv_vcf_out, + ch_sv_somatic_cnv_tsv_out, + ) + .map { meta, smlv_vcf, sv_vcf, cnv_tsv -> + def meta_sigrap = [ + key: meta.id, + id: meta.id, + tumor_id: meta.tumor_id, + ] + return [meta_sigrap, smlv_vcf, sv_vcf, cnv_tsv] + } + + SIGRAP_HRDETECT( + ch_sigrap_hrdetect_inputs + ) + + // channel: [ meta, hrdetect_json ] + ch_sigrap_hrdetect = WorkflowSash.restoreMeta(SIGRAP_HRDETECT.out.hrdetect_json, ch_inputs) + ch_versions = ch_versions.mix(SIGRAP_HRDETECT.out.versions) + + // channel: [ meta_sigrap, smlv_somatic_vcf ] + ch_sigrap_mutpat_inputs = ch_smlv_somatic_out.map { meta, vcf -> + def meta_sigrap = [ + key: meta.id, + id: meta.id, + tumor_id: meta.tumor_id, + ] + return [meta_sigrap, vcf] + } + + SIGRAP_MUTPAT( + ch_sigrap_mutpat_inputs + ) + + // channel: [ meta, mutpat_output ] + ch_sigrap_mutpat = WorkflowSash.restoreMeta(SIGRAP_MUTPAT.out.mutpat_output, ch_inputs) + ch_versions = ch_versions.mix(SIGRAP_MUTPAT.out.versions) + + + + + // // Generate the cancer report // - // channel: [ meta_bolt, smlv_somatic_vcf, smlv_somatic_bcftools_stats, smlv_somatic_counts_process, sv_tsv, sv_vcf, cnv_tsv, af_global, af_keygenes, purple_baf_circos_plot, purple_dir, virusbreakend_dir, dragen_hrd ] + // channel: [ meta_bolt, smlv_somatic_vcf, smlv_somatic_bcftools_stats, smlv_somatic_counts_process, sv_tsv, sv_vcf, cnv_tsv, af_global, af_keygenes, purple_baf_circos_plot, purple_dir, virusbreakend_dir, dragen_hrd, mutpat, hrdetect, chord ] ch_cancer_report_inputs = WorkflowSash.groupByMeta( ch_smlv_somatic_out, ch_smlv_somatic_report_stats_out, @@ -448,6 +527,9 @@ workflow SASH { PURPLE_CALLING.out.purple_dir, ch_virusbreakend, ch_input_hrd, + ch_sigrap_mutpat, + ch_sigrap_hrdetect, + ch_chord, flatten_mode: 'nonrecursive', ) .map { @@ -478,11 +560,11 @@ workflow SASH { // channel: [ meta, somatic_dragen_dir ] ch_input_dragen_somatic_dir = ch_inputs - .map { meta -> [meta, meta.dragen_somatic_dir] } + .map { meta -> [meta, file(meta.dragen_somatic_dir)] } // channel: [ meta, germline_dragen_dir ] ch_input_dragen_germline_dir = ch_inputs - .map { meta -> [meta, meta.dragen_germline_dir] } + .map { meta -> [meta, file(meta.dragen_germline_dir)] } // channel: [ meta_multiqc, [somatic_dragen_dir, germline_dragen_dir, somatic_bcftools_stats, germline_bcftools_stats, somatic_counts_type, germline_counts_type, purple_dir] ] ch_multiqc_report_inputs = WorkflowSash.groupByMeta( @@ -518,7 +600,7 @@ workflow SASH { // - // Annotate post processed strucutral variant events + // Annotate post processed structural variant events // LINX_ANNOTATION( @@ -542,7 +624,6 @@ workflow SASH { - // // TASK: Aggregate software versions //