Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions data_structures/flag_filter.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ struct FlagFilter {
String exclude_if_all # samtools -G
}

#@ except: EmptyOutputs
task validate_string_is_12bit_int {
meta {
description: "Validates that a string is a octal, decimal, or hexadecimal number and less than 2^12."
Expand Down
1 change: 1 addition & 0 deletions data_structures/read_group.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@ task get_read_groups {
}
}

#@ except: EmptyOutputs
task validate_read_group {
meta {
description: "Validate a `ReadGroup` struct's fields are defined and well-formed"
Expand Down
1 change: 1 addition & 0 deletions tools/fq.wdl
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
## [Homepage](https://github.com/stjude-rust-labs/fq)
version 1.1

#@ except: EmptyOutputs
task fqlint {
meta {
description: "Performs quality control on the input FASTQs to ensure proper formatting"
Expand Down
1 change: 1 addition & 0 deletions tools/samtools.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ version 1.1

import "../data_structures/flag_filter.wdl"

#@ except: EmptyOutputs
task quickcheck {
meta {
description: "Runs Samtools quickcheck on the input BAM file."
Expand Down
2 changes: 2 additions & 0 deletions tools/util.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ task calc_feature_lengths {
}
}

#@ except: EmptyOutputs
task compression_integrity {
meta {
description: "Checks the compression integrity of a bgzipped file"
Expand Down Expand Up @@ -358,6 +359,7 @@ task global_phred_scores {
}
}

#@ except: EmptyOutputs
task check_fastq_and_rg_concordance {
meta {
description: "Validates FASTQs and read group records are concordant"
Expand Down
1 change: 1 addition & 0 deletions workflows/dnaseq/dnaseq-standard.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,7 @@ workflow dnaseq_standard_experimental {
}
}

#@ except: EmptyOutputs
task parse_input {
meta {
description: "Parses and validates the `dnaseq_standard` workflow's provided inputs"
Expand Down
6 changes: 6 additions & 0 deletions workflows/qc/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@ All notable changes to this project will be documented in this file.

The format is based on [Keep a Changelog](http://keepachangelog.com/).

## 2026 May

### Added

- `quality_check_standard` workflow: optional FASTQ analysis via new input `run_fastq_analysis`, allowing callers to skip BAM-to-FASTQ conversion and FASTQ-level tools (Kraken2, fastp, librarian) ([#315](https://github.com/stjudecloud/workflows/pull/315))

## 2025 September

### Changed
Expand Down
199 changes: 104 additions & 95 deletions workflows/qc/quality-check-standard.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,10 @@ workflow quality_check_standard {
warning: "These files can be very large.",
}
use_all_cores: "Use all cores? Recommended for cloud environments."
run_fastq_analysis: {
description: "Create FASTQs from the input BAM and run FASTQ-level analyses?",
help: "If false, the pipeline skips SAMtools bam-to-fastq, fqlint, Kraken2, fastp, librarian, and comparative Kraken2. Also disables qualimap_rnaseq (requires a collated BAM from bam_to_fastq).",
}
optical_distance: {
description: "Maximum distance between read coordinates to consider them optical duplicates instead of library duplicates (e.g. PCR duplicates).",
help: "If `mark_duplicates == false`, this parameter is ignored. If `0`, then _optical_ duplicate marking is disabled and only traditional duplicate marking will be performed. Suggested settings of 100 for unpatterned versions of the Illumina platform (e.g. HiSeq) or 2500 for patterned flowcell models (e.g. NovaSeq). Review the `mark_duplicates` task in `../../tools/picard.wdl` for more information.",
Expand Down Expand Up @@ -151,6 +155,7 @@ workflow quality_check_standard {
Boolean store_kraken_sequences = false
Boolean output_intermediate_files = false
Boolean use_all_cores = false
Boolean run_fastq_analysis = true
Int optical_distance = 0
Int subsample_n_reads = -1
}
Expand All @@ -164,7 +169,7 @@ workflow quality_check_standard {
call flag_filter.validate_flag_filter as kraken_filter_validator { input:
flags = standard_filter,
}
if (run_comparative_kraken) {
if (run_comparative_kraken && run_fastq_analysis) {
call flag_filter.validate_flag_filter as comparative_kraken_filter_validator { input:
flags = comparative_filter,
}
Expand Down Expand Up @@ -254,109 +259,111 @@ workflow quality_check_standard {
prefix = post_subsample_prefix,
}

call samtools.bam_to_fastq after quickcheck after kraken_filter_validator { input:
bam = post_subsample_bam,
bitwise_filter = standard_filter,
prefix = post_subsample_prefix,
# RNA needs a collated BAM for Qualimap
# DNA can skip the associated storage costs
retain_collated_bam = rna,
# disabling fast_mode enables writing of secondary and supplementary alignments
# to the collated BAM when processing RNA.
# Those alignments are used downstream by Qualimap.
fast_mode = (!rna),
paired_end = true, # matches default but prevents user from overriding
use_all_cores,
}

call fq.fqlint { input:
read_one_fastq = select_first([
bam_to_fastq.read_one_fastq_gz,
"undefined",
]),
read_two_fastq = select_first([
bam_to_fastq.read_two_fastq_gz,
"undefined",
]),
}
call kraken2.kraken after fqlint { input:
read_one_fastq_gz = select_first([
bam_to_fastq.read_one_fastq_gz,
"undefined",
]),
read_two_fastq_gz = select_first([
bam_to_fastq.read_two_fastq_gz,
"undefined",
]),
db = kraken_db,
store_sequences = store_kraken_sequences,
prefix = post_subsample_prefix,
use_all_cores,
}
if (run_fastp) {
call fp.fastp after fqlint { input:
read_one_fastq = select_first([
bam_to_fastq.read_one_fastq_gz,
"undefined",
]),
read_two_fastq = select_first([
bam_to_fastq.read_two_fastq_gz,
"undefined",
]),
output_fastq = false,
}
}
if (run_librarian) {
call libraran_tasks.librarian after fqlint { input:
read_one_fastq = select_first([
bam_to_fastq.read_one_fastq_gz,
"undefined",
]),
}
}

if (run_comparative_kraken) {
call samtools.bam_to_fastq as alt_filtered_fastq after quickcheck after comparative_kraken_filter_validator {
input:
if (run_fastq_analysis) {
call samtools.bam_to_fastq after quickcheck after kraken_filter_validator { input:
bam = post_subsample_bam,
bitwise_filter = comparative_filter,
prefix = post_subsample_prefix + ".alt_filtered",
# matches default but prevents user from overriding
# If the user wants a collated BAM, they should save the one
# from the first bam_to_fastq call.
retain_collated_bam = false,
# matches default but prevents user from overriding
# Since the only output here is FASTQs, we can disable fast mode.
# This discards secondary and supplementary alignments, which should not
# be converted to FASTQs. (Is that true?)
fast_mode = true,
bitwise_filter = standard_filter,
prefix = post_subsample_prefix,
# RNA needs a collated BAM for Qualimap
# DNA can skip the associated storage costs
retain_collated_bam = rna,
# disabling fast_mode enables writing of secondary and supplementary alignments
# to the collated BAM when processing RNA.
# Those alignments are used downstream by Qualimap.
fast_mode = (!rna),
paired_end = true, # matches default but prevents user from overriding
use_all_cores,
}
call fq.fqlint as alt_filtered_fqlint { input:

call fq.fqlint { input:
read_one_fastq = select_first([
alt_filtered_fastq.read_one_fastq_gz,
bam_to_fastq.read_one_fastq_gz,
"undefined",
]),
read_two_fastq = select_first([
alt_filtered_fastq.read_two_fastq_gz,
bam_to_fastq.read_two_fastq_gz,
"undefined",
]),
}
call kraken2.kraken as comparative_kraken after alt_filtered_fqlint { input:
call kraken2.kraken after fqlint { input:
read_one_fastq_gz = select_first([
alt_filtered_fastq.read_one_fastq_gz,
bam_to_fastq.read_one_fastq_gz,
"undefined",
]),
read_two_fastq_gz = select_first([
alt_filtered_fastq.read_two_fastq_gz,
bam_to_fastq.read_two_fastq_gz,
"undefined",
]),
db = kraken_db,
store_sequences = store_kraken_sequences,
prefix = post_subsample_prefix + ".alt_filtered",
prefix = post_subsample_prefix,
use_all_cores,
}
if (run_fastp) {
call fp.fastp after fqlint { input:
read_one_fastq = select_first([
bam_to_fastq.read_one_fastq_gz,
"undefined",
]),
read_two_fastq = select_first([
bam_to_fastq.read_two_fastq_gz,
"undefined",
]),
output_fastq = false,
}
}
if (run_librarian) {
call libraran_tasks.librarian after fqlint { input:
read_one_fastq = select_first([
bam_to_fastq.read_one_fastq_gz,
"undefined",
]),
}
}

if (run_comparative_kraken) {
call samtools.bam_to_fastq as alt_filtered_fastq after quickcheck after comparative_kraken_filter_validator {
input:
bam = post_subsample_bam,
bitwise_filter = comparative_filter,
prefix = post_subsample_prefix + ".alt_filtered",
# matches default but prevents user from overriding
# If the user wants a collated BAM, they should save the one
# from the first bam_to_fastq call.
retain_collated_bam = false,
# matches default but prevents user from overriding
# Since the only output here is FASTQs, we can disable fast mode.
# This discards secondary and supplementary alignments, which should not
# be converted to FASTQs. (Is that true?)
fast_mode = true,
paired_end = true, # matches default but prevents user from overriding
use_all_cores,
}
call fq.fqlint as alt_filtered_fqlint { input:
read_one_fastq = select_first([
alt_filtered_fastq.read_one_fastq_gz,
"undefined",
]),
read_two_fastq = select_first([
alt_filtered_fastq.read_two_fastq_gz,
"undefined",
]),
}
call kraken2.kraken as comparative_kraken after alt_filtered_fqlint { input:
read_one_fastq_gz = select_first([
alt_filtered_fastq.read_one_fastq_gz,
"undefined",
]),
read_two_fastq_gz = select_first([
alt_filtered_fastq.read_two_fastq_gz,
"undefined",
]),
db = kraken_db,
store_sequences = store_kraken_sequences,
prefix = post_subsample_prefix + ".alt_filtered",
use_all_cores,
}
}
}

call mosdepth.coverage as wg_coverage after quickcheck { input:
Expand Down Expand Up @@ -392,18 +399,20 @@ workflow quality_check_standard {
]),
outfile_name = post_subsample_prefix + ".strandedness.tsv",
}
call qualimap.rnaseq as qualimap_rnaseq { input:
bam = select_first([
bam_to_fastq.collated_bam,
"undefined",
]),
prefix = post_subsample_prefix + ".qualimap_rnaseq_results",
gtf = select_first([
gtf,
"undefined",
]),
name_sorted = true,
paired_end = true, # matches default but prevents user from overriding
if (run_fastq_analysis) {
call qualimap.rnaseq as qualimap_rnaseq { input:
bam = select_first([
bam_to_fastq.collated_bam,
"undefined",
]),
prefix = post_subsample_prefix + ".qualimap_rnaseq_results",
gtf = select_first([
gtf,
"undefined",
]),
name_sorted = true,
paired_end = true, # matches default but prevents user from overriding
}
}
}
if (mark_duplicates) {
Expand Down
1 change: 1 addition & 0 deletions workflows/rnaseq/rnaseq-standard.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ workflow rnaseq_standard {
}
}

#@ except: EmptyOutputs
task parse_input {
meta {
description: "Parses and validates the `rnaseq_standard[_fastq]` workflows' provided inputs"
Expand Down
Loading