From c2ed3d4e4d14a8d13bb5476826cea65bd5672016 Mon Sep 17 00:00:00 2001 From: Andrew Thrasher Date: Mon, 18 May 2026 11:35:01 -0400 Subject: [PATCH 1/4] feat: make FASTQ analysis optional --- workflows/qc/quality-check-standard.wdl | 199 +++++++++++++----------- 1 file changed, 104 insertions(+), 95 deletions(-) diff --git a/workflows/qc/quality-check-standard.wdl b/workflows/qc/quality-check-standard.wdl index 8d80458c1..395d190f6 100644 --- a/workflows/qc/quality-check-standard.wdl +++ b/workflows/qc/quality-check-standard.wdl @@ -118,6 +118,10 @@ workflow quality_check_standard { description: "Only process a random sampling of approximately `n` reads. Any `n <= 0` for processing entire input.", warning: "Subsampling is done probabalistically so the exact number of reads in the output will have some variation.", } + run_fastq_analysis: { + description: "Create FASTQs from the input BAM and run FASTQ-level analyses?", + help: "If false, the pipeline skips SAMtools bam-to-fastq, fqlint, Kraken2, fastp, librarian, and comparative Kraken2. Also disables qualimap_rnaseq (requires a collated BAM from bam_to_fastq).", + } } input { @@ -153,6 +157,7 @@ workflow quality_check_standard { Boolean use_all_cores = false Int optical_distance = 0 Int subsample_n_reads = -1 + Boolean run_fastq_analysis = true } call parse_input { input: @@ -164,7 +169,7 @@ workflow quality_check_standard { call flag_filter.validate_flag_filter as kraken_filter_validator { input: flags = standard_filter, } - if (run_comparative_kraken) { + if (run_comparative_kraken && run_fastq_analysis) { call flag_filter.validate_flag_filter as comparative_kraken_filter_validator { input: flags = comparative_filter, } @@ -254,109 +259,111 @@ workflow quality_check_standard { prefix = post_subsample_prefix, } - call samtools.bam_to_fastq after quickcheck after kraken_filter_validator { input: - bam = post_subsample_bam, - bitwise_filter = standard_filter, - prefix = post_subsample_prefix, - # RNA needs a collated BAM for Qualimap - # DNA can skip the associated storage costs - retain_collated_bam = rna, - # disabling fast_mode enables writing of secondary and supplementary alignments - # to the collated BAM when processing RNA. - # Those alignments are used downstream by Qualimap. - fast_mode = (!rna), - paired_end = true, # matches default but prevents user from overriding - use_all_cores, - } - - call fq.fqlint { input: - read_one_fastq = select_first([ - bam_to_fastq.read_one_fastq_gz, - "undefined", - ]), - read_two_fastq = select_first([ - bam_to_fastq.read_two_fastq_gz, - "undefined", - ]), - } - call kraken2.kraken after fqlint { input: - read_one_fastq_gz = select_first([ - bam_to_fastq.read_one_fastq_gz, - "undefined", - ]), - read_two_fastq_gz = select_first([ - bam_to_fastq.read_two_fastq_gz, - "undefined", - ]), - db = kraken_db, - store_sequences = store_kraken_sequences, - prefix = post_subsample_prefix, - use_all_cores, - } - if (run_fastp) { - call fp.fastp after fqlint { input: - read_one_fastq = select_first([ - bam_to_fastq.read_one_fastq_gz, - "undefined", - ]), - read_two_fastq = select_first([ - bam_to_fastq.read_two_fastq_gz, - "undefined", - ]), - output_fastq = false, - } - } - if (run_librarian) { - call libraran_tasks.librarian after fqlint { input: - read_one_fastq = select_first([ - bam_to_fastq.read_one_fastq_gz, - "undefined", - ]), - } - } - - if (run_comparative_kraken) { - call samtools.bam_to_fastq as alt_filtered_fastq after quickcheck after comparative_kraken_filter_validator { - input: + if (run_fastq_analysis) { + call samtools.bam_to_fastq after quickcheck after kraken_filter_validator { input: bam = post_subsample_bam, - bitwise_filter = comparative_filter, - prefix = post_subsample_prefix + ".alt_filtered", - # matches default but prevents user from overriding - # If the user wants a collated BAM, they should save the one - # from the first bam_to_fastq call. - retain_collated_bam = false, - # matches default but prevents user from overriding - # Since the only output here is FASTQs, we can disable fast mode. - # This discards secondary and supplementary alignments, which should not - # be converted to FASTQs. (Is that true?) - fast_mode = true, + bitwise_filter = standard_filter, + prefix = post_subsample_prefix, + # RNA needs a collated BAM for Qualimap + # DNA can skip the associated storage costs + retain_collated_bam = rna, + # disabling fast_mode enables writing of secondary and supplementary alignments + # to the collated BAM when processing RNA. + # Those alignments are used downstream by Qualimap. + fast_mode = (!rna), paired_end = true, # matches default but prevents user from overriding use_all_cores, } - call fq.fqlint as alt_filtered_fqlint { input: + + call fq.fqlint { input: read_one_fastq = select_first([ - alt_filtered_fastq.read_one_fastq_gz, + bam_to_fastq.read_one_fastq_gz, "undefined", ]), read_two_fastq = select_first([ - alt_filtered_fastq.read_two_fastq_gz, + bam_to_fastq.read_two_fastq_gz, "undefined", ]), } - call kraken2.kraken as comparative_kraken after alt_filtered_fqlint { input: + call kraken2.kraken after fqlint { input: read_one_fastq_gz = select_first([ - alt_filtered_fastq.read_one_fastq_gz, + bam_to_fastq.read_one_fastq_gz, "undefined", ]), read_two_fastq_gz = select_first([ - alt_filtered_fastq.read_two_fastq_gz, + bam_to_fastq.read_two_fastq_gz, "undefined", ]), db = kraken_db, store_sequences = store_kraken_sequences, - prefix = post_subsample_prefix + ".alt_filtered", + prefix = post_subsample_prefix, use_all_cores, } + if (run_fastp) { + call fp.fastp after fqlint { input: + read_one_fastq = select_first([ + bam_to_fastq.read_one_fastq_gz, + "undefined", + ]), + read_two_fastq = select_first([ + bam_to_fastq.read_two_fastq_gz, + "undefined", + ]), + output_fastq = false, + } + } + if (run_librarian) { + call libraran_tasks.librarian after fqlint { input: + read_one_fastq = select_first([ + bam_to_fastq.read_one_fastq_gz, + "undefined", + ]), + } + } + + if (run_comparative_kraken) { + call samtools.bam_to_fastq as alt_filtered_fastq after quickcheck after comparative_kraken_filter_validator { + input: + bam = post_subsample_bam, + bitwise_filter = comparative_filter, + prefix = post_subsample_prefix + ".alt_filtered", + # matches default but prevents user from overriding + # If the user wants a collated BAM, they should save the one + # from the first bam_to_fastq call. + retain_collated_bam = false, + # matches default but prevents user from overriding + # Since the only output here is FASTQs, we can disable fast mode. + # This discards secondary and supplementary alignments, which should not + # be converted to FASTQs. (Is that true?) + fast_mode = true, + paired_end = true, # matches default but prevents user from overriding + use_all_cores, + } + call fq.fqlint as alt_filtered_fqlint { input: + read_one_fastq = select_first([ + alt_filtered_fastq.read_one_fastq_gz, + "undefined", + ]), + read_two_fastq = select_first([ + alt_filtered_fastq.read_two_fastq_gz, + "undefined", + ]), + } + call kraken2.kraken as comparative_kraken after alt_filtered_fqlint { input: + read_one_fastq_gz = select_first([ + alt_filtered_fastq.read_one_fastq_gz, + "undefined", + ]), + read_two_fastq_gz = select_first([ + alt_filtered_fastq.read_two_fastq_gz, + "undefined", + ]), + db = kraken_db, + store_sequences = store_kraken_sequences, + prefix = post_subsample_prefix + ".alt_filtered", + use_all_cores, + } + } } call mosdepth.coverage as wg_coverage after quickcheck { input: @@ -392,18 +399,20 @@ workflow quality_check_standard { ]), outfile_name = post_subsample_prefix + ".strandedness.tsv", } - call qualimap.rnaseq as qualimap_rnaseq { input: - bam = select_first([ - bam_to_fastq.collated_bam, - "undefined", - ]), - prefix = post_subsample_prefix + ".qualimap_rnaseq_results", - gtf = select_first([ - gtf, - "undefined", - ]), - name_sorted = true, - paired_end = true, # matches default but prevents user from overriding + if (run_fastq_analysis) { + call qualimap.rnaseq as qualimap_rnaseq { input: + bam = select_first([ + bam_to_fastq.collated_bam, + "undefined", + ]), + prefix = post_subsample_prefix + ".qualimap_rnaseq_results", + gtf = select_first([ + gtf, + "undefined", + ]), + name_sorted = true, + paired_end = true, # matches default but prevents user from overriding + } } } if (mark_duplicates) { From 4c1e1115831153802050e96378864218f2aaace3 Mon Sep 17 00:00:00 2001 From: Andrew Thrasher Date: Mon, 18 May 2026 11:39:45 -0400 Subject: [PATCH 2/4] chore: exceptions for tasks with no outputs --- data_structures/flag_filter.wdl | 1 + data_structures/read_group.wdl | 1 + tools/fq.wdl | 1 + tools/samtools.wdl | 1 + tools/util.wdl | 2 ++ workflows/dnaseq/dnaseq-standard.wdl | 1 + workflows/rnaseq/rnaseq-standard.wdl | 1 + 7 files changed, 8 insertions(+) diff --git a/data_structures/flag_filter.wdl b/data_structures/flag_filter.wdl index 3e23f724a..dcbf73e8f 100644 --- a/data_structures/flag_filter.wdl +++ b/data_structures/flag_filter.wdl @@ -67,6 +67,7 @@ struct FlagFilter { String exclude_if_all # samtools -G } +#@ except: EmptyOutputs task validate_string_is_12bit_int { meta { description: "Validates that a string is a octal, decimal, or hexadecimal number and less than 2^12." diff --git a/data_structures/read_group.wdl b/data_structures/read_group.wdl index 94632b4c6..bde09af24 100644 --- a/data_structures/read_group.wdl +++ b/data_structures/read_group.wdl @@ -143,6 +143,7 @@ task get_read_groups { } } +#@ except: EmptyOutputs task validate_read_group { meta { description: "Validate a `ReadGroup` struct's fields are defined and well-formed" diff --git a/tools/fq.wdl b/tools/fq.wdl index bffb332be..e2d5c566a 100755 --- a/tools/fq.wdl +++ b/tools/fq.wdl @@ -1,6 +1,7 @@ ## [Homepage](https://github.com/stjude-rust-labs/fq) version 1.1 +#@ except: EmptyOutputs task fqlint { meta { description: "Performs quality control on the input FASTQs to ensure proper formatting" diff --git a/tools/samtools.wdl b/tools/samtools.wdl index ace383ece..d0ad37742 100755 --- a/tools/samtools.wdl +++ b/tools/samtools.wdl @@ -3,6 +3,7 @@ version 1.1 import "../data_structures/flag_filter.wdl" +#@ except: EmptyOutputs task quickcheck { meta { description: "Runs Samtools quickcheck on the input BAM file." diff --git a/tools/util.wdl b/tools/util.wdl index 319d72ed9..2a82680c8 100644 --- a/tools/util.wdl +++ b/tools/util.wdl @@ -132,6 +132,7 @@ task calc_feature_lengths { } } +#@ except: EmptyOutputs task compression_integrity { meta { description: "Checks the compression integrity of a bgzipped file" @@ -358,6 +359,7 @@ task global_phred_scores { } } +#@ except: EmptyOutputs task check_fastq_and_rg_concordance { meta { description: "Validates FASTQs and read group records are concordant" diff --git a/workflows/dnaseq/dnaseq-standard.wdl b/workflows/dnaseq/dnaseq-standard.wdl index f0a481509..faa9be835 100644 --- a/workflows/dnaseq/dnaseq-standard.wdl +++ b/workflows/dnaseq/dnaseq-standard.wdl @@ -133,6 +133,7 @@ workflow dnaseq_standard_experimental { } } +#@ except: EmptyOutputs task parse_input { meta { description: "Parses and validates the `dnaseq_standard` workflow's provided inputs" diff --git a/workflows/rnaseq/rnaseq-standard.wdl b/workflows/rnaseq/rnaseq-standard.wdl index ae5c53641..d100a9af7 100755 --- a/workflows/rnaseq/rnaseq-standard.wdl +++ b/workflows/rnaseq/rnaseq-standard.wdl @@ -141,6 +141,7 @@ workflow rnaseq_standard { } } +#@ except: EmptyOutputs task parse_input { meta { description: "Parses and validates the `rnaseq_standard[_fastq]` workflows' provided inputs" From cab4b11ff6da81ff29f8ad4736442dcf916126c1 Mon Sep 17 00:00:00 2001 From: Andrew Thrasher Date: Mon, 18 May 2026 11:39:58 -0400 Subject: [PATCH 3/4] chore: update CHANGELOG --- workflows/qc/CHANGELOG.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/workflows/qc/CHANGELOG.md b/workflows/qc/CHANGELOG.md index 57fe79cb8..876359a4e 100644 --- a/workflows/qc/CHANGELOG.md +++ b/workflows/qc/CHANGELOG.md @@ -4,6 +4,12 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/). +## 2026 May + +### Added + +- `quality_check_standard` workflow: optional FASTQ analysis via new input `run_fastq_analysis`, allowing callers to skip BAM-to-FASTQ conversion and FASTQ-level tools (Kraken2, fastp, librarian) ([#315](https://github.com/stjudecloud/workflows/pull/315)) + ## 2025 September ### Changed From 8263a44c9faae7883c9ea4472feda7ec715f6b4d Mon Sep 17 00:00:00 2001 From: Andrew Thrasher Date: Mon, 18 May 2026 11:43:01 -0400 Subject: [PATCH 4/4] chore: format --- workflows/qc/quality-check-standard.wdl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/workflows/qc/quality-check-standard.wdl b/workflows/qc/quality-check-standard.wdl index 395d190f6..09a3d215b 100644 --- a/workflows/qc/quality-check-standard.wdl +++ b/workflows/qc/quality-check-standard.wdl @@ -109,6 +109,10 @@ workflow quality_check_standard { warning: "These files can be very large.", } use_all_cores: "Use all cores? Recommended for cloud environments." + run_fastq_analysis: { + description: "Create FASTQs from the input BAM and run FASTQ-level analyses?", + help: "If false, the pipeline skips SAMtools bam-to-fastq, fqlint, Kraken2, fastp, librarian, and comparative Kraken2. Also disables qualimap_rnaseq (requires a collated BAM from bam_to_fastq).", + } optical_distance: { description: "Maximum distance between read coordinates to consider them optical duplicates instead of library duplicates (e.g. PCR duplicates).", help: "If `mark_duplicates == false`, this parameter is ignored. If `0`, then _optical_ duplicate marking is disabled and only traditional duplicate marking will be performed. Suggested settings of 100 for unpatterned versions of the Illumina platform (e.g. HiSeq) or 2500 for patterned flowcell models (e.g. NovaSeq). Review the `mark_duplicates` task in `../../tools/picard.wdl` for more information.", @@ -118,10 +122,6 @@ workflow quality_check_standard { description: "Only process a random sampling of approximately `n` reads. Any `n <= 0` for processing entire input.", warning: "Subsampling is done probabalistically so the exact number of reads in the output will have some variation.", } - run_fastq_analysis: { - description: "Create FASTQs from the input BAM and run FASTQ-level analyses?", - help: "If false, the pipeline skips SAMtools bam-to-fastq, fqlint, Kraken2, fastp, librarian, and comparative Kraken2. Also disables qualimap_rnaseq (requires a collated BAM from bam_to_fastq).", - } } input { @@ -155,9 +155,9 @@ workflow quality_check_standard { Boolean store_kraken_sequences = false Boolean output_intermediate_files = false Boolean use_all_cores = false + Boolean run_fastq_analysis = true Int optical_distance = 0 Int subsample_n_reads = -1 - Boolean run_fastq_analysis = true } call parse_input { input: