From 328566412a2d24983e57b2760270a5282684d687 Mon Sep 17 00:00:00 2001 From: Ari Frantz Date: Wed, 3 Sep 2025 17:01:40 -0400 Subject: [PATCH 01/47] init demo --- sprocket-tests/custom/quickcheck.sh | 10 ++++ .../data_structures/flag_filter.toml | 49 +++++++++++++++++++ sprocket-tests/tools/picard.toml | 11 +++++ sprocket-tests/tools/samtools.toml | 31 ++++++++++++ 4 files changed, 101 insertions(+) create mode 100644 sprocket-tests/custom/quickcheck.sh create mode 100644 sprocket-tests/data_structures/flag_filter.toml create mode 100644 sprocket-tests/tools/picard.toml create mode 100644 sprocket-tests/tools/samtools.toml diff --git a/sprocket-tests/custom/quickcheck.sh b/sprocket-tests/custom/quickcheck.sh new file mode 100644 index 000000000..8c270cc8e --- /dev/null +++ b/sprocket-tests/custom/quickcheck.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +set -euo pipefail + +out_json=$1 + +out_bam=$(jq -r .bam "$out_json") + +samtools quickcheck "$out_bam" + diff --git a/sprocket-tests/data_structures/flag_filter.toml b/sprocket-tests/data_structures/flag_filter.toml new file mode 100644 index 000000000..549004e1f --- /dev/null +++ b/sprocket-tests/data_structures/flag_filter.toml @@ -0,0 +1,49 @@ +[[validate_string_is_12bit_int]] +name = "decimal_passes" # each test must have a unique identifier +[validate_string_is_12bit_int.inputs] +number = "5" +# without any tests explicitly configured, Sprocket will consider the task executing with a 0 exit code to be a "pass" and any non-zero exit code as a "fail" + +[[validate_string_is_12bit_int]] +name = "hexadecimal_passes" +[validate_string_is_12bit_int.inputs] +number = "0x900" +[validate_string_is_12bit_int.tests] +stdout.contains = "Input number (0x900) is valid" # builtin test for checking STDOUT logs + +[[validate_string_is_12bit_int]] +name = "too_big_hexadecimal_fails" +[validate_string_is_12bit_int.inputs] +number = "0x1000" +[validate_string_is_12bit_int.tests] +exit_code = 42 # the task should fail for this test +stderr.contains = "Input number (0x1000) is invalid" # similar to the stdout test + +[[validate_string_is_12bit_int]] +name = "too_big_decimal_fails" +[validate_string_is_12bit_int.inputs] +number = "4096" +[validate_string_is_12bit_int.tests] +exit_code = 42 +stderr.contains = [ + "Input number (4096) interpreted as decimal", + "But number must be less than 4096!", +] # `contains` test can also be an array of strings + +[[validate_flag_filter]] # a workflow test +name = "valid_FlagFilter_passes" +[validate_flag_filter.inputs.flags] +include_if_all = "3" # decimal +exclude_if_any = "0xF04" # hexadecimal +include_if_any = "03" # octal +exclude_if_all = "4095" # decimal + +[[validate_flag_filter]] +name = "invalid_FlagFilter_fails" +[validate_flag_filter.inputs.flags] +include_if_all = "" # empty string +exclude_if_any = "this is not a number" +include_if_any = "000000000011" # binary interpreted as octal. Too many digits for octal +exclude_if_all = "4095" # this is fine +[validate_flag_filter.tests] +should_fail = true diff --git a/sprocket-tests/tools/picard.toml b/sprocket-tests/tools/picard.toml new file mode 100644 index 000000000..b1723646b --- /dev/null +++ b/sprocket-tests/tools/picard.toml @@ -0,0 +1,11 @@ +[[merge_sam_files]] +name = "Merge works" +[merge_sam_files.inputs] +bams = [ + "$FIXTURES/test1.bam", + "$FIXTURES/test2.bam", +] +prefix = "test.merged" +[merge_sam_files.tests] +custom = "quickcheck.sh" + diff --git a/sprocket-tests/tools/samtools.toml b/sprocket-tests/tools/samtools.toml new file mode 100644 index 000000000..070d9c0bc --- /dev/null +++ b/sprocket-tests/tools/samtools.toml @@ -0,0 +1,31 @@ +[[bam_to_fastq]] +name = "kitchen_sink" +[[bam_to_fastq.matrix]] +bam = [ + "$FIXTURES/test1.bam", + "$FIXTURES/test2.bam", + "$FIXTURES/test3.bam", +] +bam_index = [ + "$FIXTURES/test1.bam.bai", + "$FIXTURES/test2.bam.bai", + "$FIXTURES/test3.bam.bai", +] +[[bam_to_fastq.matrix]] +bitwise_filter = [ + { include_if_all = "0x0", exclude_if_any = "0x900", include_if_any = "0x0", exclude_if_all = "0x0" }, + { include_if_all = "00", exclude_if_any = "0x904", include_if_any = "3", exclude_if_all = "0" }, +] +[[bam_to_fastq.matrix]] +paired_end = [true, false] +[[bam_to_fastq.matrix]] +retain_collated_bam = [true, false] +[[bam_to_fastq.matrix]] +append_read_number = [true, false] +[[bam_to_fastq.matrix]] +output_singletons = [true, false] +[bam_to_fastq.inputs] +prefix = "kitchen_sink_test" # the `prefix` input will be shared by _all_ permutations of the test matrix +# this test is to ensure all the options (and combinations thereof) are valid +# so no tests beyond a `0` exit code are needed here + From 7bc1e3a9c85e19c47dfb1f87c78930fb9b8b367e Mon Sep 17 00:00:00 2001 From: Ari Frantz Date: Tue, 9 Sep 2025 08:16:55 -0400 Subject: [PATCH 02/47] revise: mv TOML out of tests dir and into main workspace --- .../data_structures => data_structures}/flag_filter.toml | 0 {sprocket-tests/tools => tools}/picard.toml | 0 {sprocket-tests/tools => tools}/samtools.toml | 0 3 files changed, 0 insertions(+), 0 deletions(-) rename {sprocket-tests/data_structures => data_structures}/flag_filter.toml (100%) rename {sprocket-tests/tools => tools}/picard.toml (100%) rename {sprocket-tests/tools => tools}/samtools.toml (100%) diff --git a/sprocket-tests/data_structures/flag_filter.toml b/data_structures/flag_filter.toml similarity index 100% rename from sprocket-tests/data_structures/flag_filter.toml rename to data_structures/flag_filter.toml diff --git a/sprocket-tests/tools/picard.toml b/tools/picard.toml similarity index 100% rename from sprocket-tests/tools/picard.toml rename to tools/picard.toml diff --git a/sprocket-tests/tools/samtools.toml b/tools/samtools.toml similarity index 100% rename from sprocket-tests/tools/samtools.toml rename to tools/samtools.toml From 6438780b5ab75c551365755080ea1567cdd4ef86 Mon Sep 17 00:00:00 2001 From: Ari Frantz Date: Tue, 18 Nov 2025 09:18:08 -0500 Subject: [PATCH 03/47] switch from TOML to YAML --- data_structures/flag_filter.toml | 49 -------------------------------- data_structures/flag_filter.yaml | 43 ++++++++++++++++++++++++++++ tools/picard.toml | 11 ------- tools/picard.yaml | 9 ++++++ tools/samtools.toml | 31 -------------------- tools/samtools.yaml | 34 ++++++++++++++++++++++ 6 files changed, 86 insertions(+), 91 deletions(-) delete mode 100644 data_structures/flag_filter.toml create mode 100644 data_structures/flag_filter.yaml delete mode 100644 tools/picard.toml create mode 100644 tools/picard.yaml delete mode 100644 tools/samtools.toml create mode 100644 tools/samtools.yaml diff --git a/data_structures/flag_filter.toml b/data_structures/flag_filter.toml deleted file mode 100644 index 549004e1f..000000000 --- a/data_structures/flag_filter.toml +++ /dev/null @@ -1,49 +0,0 @@ -[[validate_string_is_12bit_int]] -name = "decimal_passes" # each test must have a unique identifier -[validate_string_is_12bit_int.inputs] -number = "5" -# without any tests explicitly configured, Sprocket will consider the task executing with a 0 exit code to be a "pass" and any non-zero exit code as a "fail" - -[[validate_string_is_12bit_int]] -name = "hexadecimal_passes" -[validate_string_is_12bit_int.inputs] -number = "0x900" -[validate_string_is_12bit_int.tests] -stdout.contains = "Input number (0x900) is valid" # builtin test for checking STDOUT logs - -[[validate_string_is_12bit_int]] -name = "too_big_hexadecimal_fails" -[validate_string_is_12bit_int.inputs] -number = "0x1000" -[validate_string_is_12bit_int.tests] -exit_code = 42 # the task should fail for this test -stderr.contains = "Input number (0x1000) is invalid" # similar to the stdout test - -[[validate_string_is_12bit_int]] -name = "too_big_decimal_fails" -[validate_string_is_12bit_int.inputs] -number = "4096" -[validate_string_is_12bit_int.tests] -exit_code = 42 -stderr.contains = [ - "Input number (4096) interpreted as decimal", - "But number must be less than 4096!", -] # `contains` test can also be an array of strings - -[[validate_flag_filter]] # a workflow test -name = "valid_FlagFilter_passes" -[validate_flag_filter.inputs.flags] -include_if_all = "3" # decimal -exclude_if_any = "0xF04" # hexadecimal -include_if_any = "03" # octal -exclude_if_all = "4095" # decimal - -[[validate_flag_filter]] -name = "invalid_FlagFilter_fails" -[validate_flag_filter.inputs.flags] -include_if_all = "" # empty string -exclude_if_any = "this is not a number" -include_if_any = "000000000011" # binary interpreted as octal. Too many digits for octal -exclude_if_all = "4095" # this is fine -[validate_flag_filter.tests] -should_fail = true diff --git a/data_structures/flag_filter.yaml b/data_structures/flag_filter.yaml new file mode 100644 index 000000000..9888ec27b --- /dev/null +++ b/data_structures/flag_filter.yaml @@ -0,0 +1,43 @@ +validate_string_is_12bit_int: + - name: decimal_passes + inputs: + - number: "5" + - name: hexadecimal_passes + inputs: + - number: "0x900" + tests: + stdout: + contains: Input number (0x900) is valid + - name: too_big_hexadecimal_fails + inputs: + - number: "0x1000" + tests: + exit_code: 42 + stderr: + contains: Input number (0x1000) is invalid + - name: too_big_decimal_fails + inputs: + - number: "4096" + tests: + exit_code: 42 + stderr: + contains: + - Input number (4096) interpreted as decimal + - But number must be less than 4096! +validate_flag_filter: + - name: valid_FlagFilter_passes + inputs: + - flags: + include_if_all: "3" + exclude_if_any: "0xF04" + include_if_any: "03" + exclude_if_all: "4095" + - name: invalid_FlagFilter_fails + inputs: + - flags: + include_if_all: "" + exclude_if_any: this is not a number + include_if_any: "000000000011" + exclude_if_all: "4095" + tests: + should_fail: true diff --git a/tools/picard.toml b/tools/picard.toml deleted file mode 100644 index b1723646b..000000000 --- a/tools/picard.toml +++ /dev/null @@ -1,11 +0,0 @@ -[[merge_sam_files]] -name = "Merge works" -[merge_sam_files.inputs] -bams = [ - "$FIXTURES/test1.bam", - "$FIXTURES/test2.bam", -] -prefix = "test.merged" -[merge_sam_files.tests] -custom = "quickcheck.sh" - diff --git a/tools/picard.yaml b/tools/picard.yaml new file mode 100644 index 000000000..075d6c794 --- /dev/null +++ b/tools/picard.yaml @@ -0,0 +1,9 @@ +merge_sam_files: + - name: Merge works + inputs: + - bams: + - $FIXTURES/test1.bam + - $FIXTURES/test2.bam + - prefix: test.merged + tests: + custom: quickcheck.sh diff --git a/tools/samtools.toml b/tools/samtools.toml deleted file mode 100644 index 070d9c0bc..000000000 --- a/tools/samtools.toml +++ /dev/null @@ -1,31 +0,0 @@ -[[bam_to_fastq]] -name = "kitchen_sink" -[[bam_to_fastq.matrix]] -bam = [ - "$FIXTURES/test1.bam", - "$FIXTURES/test2.bam", - "$FIXTURES/test3.bam", -] -bam_index = [ - "$FIXTURES/test1.bam.bai", - "$FIXTURES/test2.bam.bai", - "$FIXTURES/test3.bam.bai", -] -[[bam_to_fastq.matrix]] -bitwise_filter = [ - { include_if_all = "0x0", exclude_if_any = "0x900", include_if_any = "0x0", exclude_if_all = "0x0" }, - { include_if_all = "00", exclude_if_any = "0x904", include_if_any = "3", exclude_if_all = "0" }, -] -[[bam_to_fastq.matrix]] -paired_end = [true, false] -[[bam_to_fastq.matrix]] -retain_collated_bam = [true, false] -[[bam_to_fastq.matrix]] -append_read_number = [true, false] -[[bam_to_fastq.matrix]] -output_singletons = [true, false] -[bam_to_fastq.inputs] -prefix = "kitchen_sink_test" # the `prefix` input will be shared by _all_ permutations of the test matrix -# this test is to ensure all the options (and combinations thereof) are valid -# so no tests beyond a `0` exit code are needed here - diff --git a/tools/samtools.yaml b/tools/samtools.yaml new file mode 100644 index 000000000..0e09fe9f4 --- /dev/null +++ b/tools/samtools.yaml @@ -0,0 +1,34 @@ +bam_to_fastq: + - name: kitchen_sink + inputs: + - bam: + - $FIXTURES/test1.bam + - $FIXTURES/test2.bam + - $FIXTURES/test3.bam + bam_index: + - $FIXTURES/test1.bam.bai + - $FIXTURES/test2.bam.bai + - $FIXTURES/test3.bam.bai + - bitwise_filter: + - include_if_all: "0x0" + exclude_if_any: "0x900" + include_if_any: "0x0" + exclude_if_all: "0x0" + - include_if_all: "00" + exclude_if_any: "0x904" + include_if_any: "3" + exclude_if_all: "0" + - paired_end: + - true + - false + - retain_collated_bam: + - true + - false + - append_read_number: + - true + - false + - output_singletons: + - true + - false + - prefix: + - kitchen_sink_test From 62744f295be4186311f62b44606ec2ab2adc34fc Mon Sep 17 00:00:00 2001 From: Ari Frantz Date: Wed, 19 Nov 2025 14:54:00 -0500 Subject: [PATCH 04/47] WIP --- data_structures/flag_filter.yaml | 16 ++++++++++------ tools/picard.yaml | 6 +++--- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/data_structures/flag_filter.yaml b/data_structures/flag_filter.yaml index 9888ec27b..793edb8f4 100644 --- a/data_structures/flag_filter.yaml +++ b/data_structures/flag_filter.yaml @@ -1,23 +1,27 @@ validate_string_is_12bit_int: - name: decimal_passes inputs: - - number: "5" + - number: + - "5" - name: hexadecimal_passes inputs: - - number: "0x900" + - number: + - "0x900" tests: stdout: contains: Input number (0x900) is valid - name: too_big_hexadecimal_fails inputs: - - number: "0x1000" + - number: + - "0x1000" tests: exit_code: 42 stderr: contains: Input number (0x1000) is invalid - name: too_big_decimal_fails inputs: - - number: "4096" + - number: + - "4096" tests: exit_code: 42 stderr: @@ -28,14 +32,14 @@ validate_flag_filter: - name: valid_FlagFilter_passes inputs: - flags: - include_if_all: "3" + - include_if_all: "3" exclude_if_any: "0xF04" include_if_any: "03" exclude_if_all: "4095" - name: invalid_FlagFilter_fails inputs: - flags: - include_if_all: "" + - include_if_all: "" exclude_if_any: this is not a number include_if_any: "000000000011" exclude_if_all: "4095" diff --git a/tools/picard.yaml b/tools/picard.yaml index 075d6c794..502cd7ea1 100644 --- a/tools/picard.yaml +++ b/tools/picard.yaml @@ -2,8 +2,8 @@ merge_sam_files: - name: Merge works inputs: - bams: - - $FIXTURES/test1.bam - - $FIXTURES/test2.bam - - prefix: test.merged + - [$FIXTURES/test1.bam, $FIXTURES/test2.bam] + - prefix: + - test.merged tests: custom: quickcheck.sh From a9cc715c65c4b8242f4e679e54bcc2cb10dc57e8 Mon Sep 17 00:00:00 2001 From: Ari Frantz Date: Wed, 19 Nov 2025 14:57:20 -0500 Subject: [PATCH 05/47] fix: tests -> assertions --- data_structures/flag_filter.yaml | 6 +++--- tools/picard.yaml | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/data_structures/flag_filter.yaml b/data_structures/flag_filter.yaml index 793edb8f4..d503db985 100644 --- a/data_structures/flag_filter.yaml +++ b/data_structures/flag_filter.yaml @@ -14,7 +14,7 @@ validate_string_is_12bit_int: inputs: - number: - "0x1000" - tests: + assertions: exit_code: 42 stderr: contains: Input number (0x1000) is invalid @@ -22,7 +22,7 @@ validate_string_is_12bit_int: inputs: - number: - "4096" - tests: + assertions: exit_code: 42 stderr: contains: @@ -43,5 +43,5 @@ validate_flag_filter: exclude_if_any: this is not a number include_if_any: "000000000011" exclude_if_all: "4095" - tests: + assertions: should_fail: true diff --git a/tools/picard.yaml b/tools/picard.yaml index 502cd7ea1..a6a7be331 100644 --- a/tools/picard.yaml +++ b/tools/picard.yaml @@ -5,5 +5,5 @@ merge_sam_files: - [$FIXTURES/test1.bam, $FIXTURES/test2.bam] - prefix: - test.merged - tests: + assertions: custom: quickcheck.sh From e09621cfa16f756c3f1dae9f9adf57b82588a726 Mon Sep 17 00:00:00 2001 From: Ari Frantz Date: Wed, 19 Nov 2025 16:09:23 -0500 Subject: [PATCH 06/47] Update flag_filter.yaml --- data_structures/flag_filter.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data_structures/flag_filter.yaml b/data_structures/flag_filter.yaml index d503db985..a9abf5b16 100644 --- a/data_structures/flag_filter.yaml +++ b/data_structures/flag_filter.yaml @@ -7,7 +7,7 @@ validate_string_is_12bit_int: inputs: - number: - "0x900" - tests: + assertions: stdout: contains: Input number (0x900) is valid - name: too_big_hexadecimal_fails From 0be911adff50fd00e911ae69b47b701c5551e49e Mon Sep 17 00:00:00 2001 From: Ari Frantz Date: Sat, 29 Nov 2025 19:58:16 -0500 Subject: [PATCH 07/47] feat: use Peter's top level mapping representation --- data_structures/flag_filter.yaml | 20 +++---- data_structures/read_group.yml | 13 +++++ tools/picard.yaml | 4 +- tools/samtools.yaml | 89 ++++++++++++++++++++++---------- 4 files changed, 88 insertions(+), 38 deletions(-) create mode 100644 data_structures/read_group.yml diff --git a/data_structures/flag_filter.yaml b/data_structures/flag_filter.yaml index a9abf5b16..1a667700a 100644 --- a/data_structures/flag_filter.yaml +++ b/data_structures/flag_filter.yaml @@ -1,27 +1,27 @@ validate_string_is_12bit_int: - name: decimal_passes inputs: - - number: - - "5" + number: + - "5" - name: hexadecimal_passes inputs: - - number: - - "0x900" + number: + - "0x900" assertions: stdout: contains: Input number (0x900) is valid - name: too_big_hexadecimal_fails inputs: - - number: - - "0x1000" + number: + - "0x1000" assertions: exit_code: 42 stderr: contains: Input number (0x1000) is invalid - name: too_big_decimal_fails inputs: - - number: - - "4096" + number: + - "4096" assertions: exit_code: 42 stderr: @@ -31,14 +31,14 @@ validate_string_is_12bit_int: validate_flag_filter: - name: valid_FlagFilter_passes inputs: - - flags: + flags: - include_if_all: "3" exclude_if_any: "0xF04" include_if_any: "03" exclude_if_all: "4095" - name: invalid_FlagFilter_fails inputs: - - flags: + flags: - include_if_all: "" exclude_if_any: this is not a number include_if_any: "000000000011" diff --git a/data_structures/read_group.yml b/data_structures/read_group.yml new file mode 100644 index 000000000..c92e5c2b0 --- /dev/null +++ b/data_structures/read_group.yml @@ -0,0 +1,13 @@ +# Note this file has the extension `.yml` while other tests end with `.yaml`. +# This is an intentional test that both extensions work. +read_group_to_string: + - name: bad_id + inputs: + read_group: + - ID: id, + SM: sample_a + LB: library + assertions: + exit_code: 1 + stdout: + contains: ID (id) must not match pattern diff --git a/tools/picard.yaml b/tools/picard.yaml index a6a7be331..f2b547bb4 100644 --- a/tools/picard.yaml +++ b/tools/picard.yaml @@ -1,9 +1,9 @@ merge_sam_files: - name: Merge works inputs: - - bams: + bams: - [$FIXTURES/test1.bam, $FIXTURES/test2.bam] - - prefix: + prefix: - test.merged assertions: custom: quickcheck.sh diff --git a/tools/samtools.yaml b/tools/samtools.yaml index 0e09fe9f4..8f6e5276f 100644 --- a/tools/samtools.yaml +++ b/tools/samtools.yaml @@ -1,34 +1,71 @@ bam_to_fastq: - name: kitchen_sink inputs: - - bam: + $files: + bam: + - $FIXTURES/test1.bam + - $FIXTURES/test2.bam + - $FIXTURES/test3.bam + bam_index: + - $FIXTURES/test1.bam.bai + - $FIXTURES/test2.bam.bai + - $FIXTURES/test3.bam.bai + bitwise_filter: + - include_if_all: "0x0" + exclude_if_any: "0x900" + include_if_any: "0x0" + exclude_if_all: "0x0" + - include_if_all: "00" + exclude_if_any: "0x904" + include_if_any: "3" + exclude_if_all: "0" + paired_end: + - true + - false + retain_collated_bam: + - true + - false + append_read_number: + - true + - false + output_singletons: + - true + - false + prefix: + - kitchen_sink_test + - name: simpler + inputs: + output_singletons: + - true + - false + $files: + bam: - $FIXTURES/test1.bam - $FIXTURES/test2.bam - - $FIXTURES/test3.bam bam_index: - $FIXTURES/test1.bam.bai - $FIXTURES/test2.bam.bai - - $FIXTURES/test3.bam.bai - - bitwise_filter: - - include_if_all: "0x0" - exclude_if_any: "0x900" - include_if_any: "0x0" - exclude_if_all: "0x0" - - include_if_all: "00" - exclude_if_any: "0x904" - include_if_any: "3" - exclude_if_all: "0" - - paired_end: - - true - - false - - retain_collated_bam: - - true - - false - - append_read_number: - - true - - false - - output_singletons: - - true - - false - - prefix: - - kitchen_sink_test + - name: not as simple + inputs: + output_singletons: + - true + - false + $files: + bam: + - $FIXTURES/test1.bam + - $FIXTURES/test2.bam + - $FIXTURES/test3.bam + bam_index: + - $FIXTURES/test1.bam.bai + - $FIXTURES/test2.bam.bai + - $FIXTURES/test3.bam.bai + $ref: + ref_fasta: + - hg19.fasta + - GRCh38.fasta + ref_fasta_index: + - hg19.fa.fai + - GRCh38.fa.fai + prefix: + - not_as_simple + From 374392a61921c0c688c087da0652b6f46cf3bfb8 Mon Sep 17 00:00:00 2001 From: Ari Frantz Date: Tue, 9 Dec 2025 13:56:33 -0500 Subject: [PATCH 08/47] set up sprocket test infra --- sprocket-tests/custom/quickcheck.sh | 10 ---------- test/bin/quickcheck.sh | 0 .../fixtures}/1scattered.interval_list | 0 ...001_R01C01.beta_swan_norm_unfiltered.genomic.csv | 0 ...001_R03C01.beta_swan_norm_unfiltered.genomic.csv | 0 .../fixtures}/201533520001_R03C01_Grn.idat | Bin .../fixtures}/201533520001_R03C01_Red.idat | Bin .../fixtures}/Aligned.sortedByCoord.chr9_chr22.bam | 0 .../Aligned.sortedByCoord.chr9_chr22.bam.bai | 0 .../input => test/fixtures}/GRCh38.chr1_chr19.dict | 0 {tests/input => test/fixtures}/GRCh38.chr1_chr19.fa | 0 .../fixtures}/GRCh38.chr1_chr19.fa.fai | 0 .../input => test/fixtures}/GRCh38.chr9_chr22.fa.gz | 0 .../fixtures}/GRCh38.chrY_chrM.bwa_db.tar.gz | 0 .../input => test/fixtures}/GRCh38.chrY_chrM.dict | 0 {tests/input => test/fixtures}/GRCh38.chrY_chrM.fa | 0 .../input => test/fixtures}/GRCh38.chrY_chrM.fa.fai | 0 .../Homo_sapiens_assembly38.dbsnp138.top5000.vcf | 0 ...Homo_sapiens_assembly38.dbsnp138.top5000.vcf.idx | Bin ...Mills_and_1000G_gold_standard.indels.hg38.vcf.gz | 0 ...s_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi | Bin {tests/input => test/fixtures}/README.md | 0 .../fixtures}/chr1_chr19.interval_list | 0 {tests/input => test/fixtures}/combined_beta.csv | 0 {tests/input => test/fixtures}/filtered_beta.csv | 0 {tests/input => test/fixtures}/fusions.BCR_ABL1.tsv | 0 .../fixtures}/gencode.v31.chr9_chr22.gtf.gz | 0 .../fixtures}/gencode.v31.chrY_chrM.gene.bed | 0 .../fixtures}/gencode.v31.chrY_chrM.genelengths.txt | 0 .../fixtures}/gencode.v31.chrY_chrM.gtf.gz | 0 .../fixtures}/kraken2_C_elegans_library.tar.gz | 0 .../input => test/fixtures}/kraken2_db.mini.tar.gz | 0 .../input => test/fixtures}/kraken2_taxonomy.tar.gz | 0 {tests/input => test/fixtures}/random10k.r1.fq.gz | 0 {tests/input => test/fixtures}/random10k.r2.fq.gz | 0 .../fixtures}/star_db.chrY_chrM.tar.gz | 0 .../fixtures}/test.PE.2_RGs.Aligned.out.sorted.bam | 0 {tests/input => test/fixtures}/test.bam | 0 {tests/input => test/fixtures}/test.bam.bai | 0 .../fixtures}/test.bwa_aln_pe.chrY_chrM.TPM.txt | 0 .../fixtures}/test.bwa_aln_pe.chrY_chrM.bam | 0 .../fixtures}/test.bwa_aln_pe.chrY_chrM.bam.bai | 0 .../test.bwa_aln_pe.chrY_chrM.feature-counts.txt | 0 .../test.bwa_aln_pe.chrY_chrM.readlength.txt | 0 {tests/input => test/fixtures}/test.extra_RG.bam | 0 {tests/input => test/fixtures}/test.fa | 0 {tests/input => test/fixtures}/test.tar.gz | 0 .../fixtures}/test.unaccounted_read.bam | 0 {tests/input => test/fixtures}/test1.vcf.gz | 0 {tests/input => test/fixtures}/test1.vcf.gz.tbi | Bin {tests/input => test/fixtures}/test2.bam | 0 {tests/input => test/fixtures}/test2.vcf.gz | 0 {tests/input => test/fixtures}/test2.vcf.gz.tbi | Bin {tests/input => test/fixtures}/test_R1.fq.gz | 0 {tests/input => test/fixtures}/test_R2.fq.gz | 0 .../input => test/fixtures}/test_rnaseq_variant.bam | 0 .../fixtures}/test_rnaseq_variant.bam.bai | 0 .../fixtures}/test_rnaseq_variant.recal.txt | 0 {tests/input => test/fixtures}/umap.csv | 0 .../wgs_calling_regions.hg38.interval_list | 0 60 files changed, 10 deletions(-) delete mode 100644 sprocket-tests/custom/quickcheck.sh create mode 100644 test/bin/quickcheck.sh rename {tests/input => test/fixtures}/1scattered.interval_list (100%) rename {tests/input => test/fixtures}/201533520001_R01C01.beta_swan_norm_unfiltered.genomic.csv (100%) rename {tests/input => test/fixtures}/201533520001_R03C01.beta_swan_norm_unfiltered.genomic.csv (100%) rename {tests/input => test/fixtures}/201533520001_R03C01_Grn.idat (100%) rename {tests/input => test/fixtures}/201533520001_R03C01_Red.idat (100%) rename {tests/input => test/fixtures}/Aligned.sortedByCoord.chr9_chr22.bam (100%) rename {tests/input => test/fixtures}/Aligned.sortedByCoord.chr9_chr22.bam.bai (100%) rename {tests/input => test/fixtures}/GRCh38.chr1_chr19.dict (100%) rename {tests/input => test/fixtures}/GRCh38.chr1_chr19.fa (100%) rename {tests/input => test/fixtures}/GRCh38.chr1_chr19.fa.fai (100%) rename {tests/input => test/fixtures}/GRCh38.chr9_chr22.fa.gz (100%) rename {tests/input => test/fixtures}/GRCh38.chrY_chrM.bwa_db.tar.gz (100%) rename {tests/input => test/fixtures}/GRCh38.chrY_chrM.dict (100%) rename {tests/input => test/fixtures}/GRCh38.chrY_chrM.fa (100%) rename {tests/input => test/fixtures}/GRCh38.chrY_chrM.fa.fai (100%) rename {tests/input => test/fixtures}/Homo_sapiens_assembly38.dbsnp138.top5000.vcf (100%) rename {tests/input => test/fixtures}/Homo_sapiens_assembly38.dbsnp138.top5000.vcf.idx (100%) rename {tests/input => test/fixtures}/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz (100%) rename {tests/input => test/fixtures}/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi (100%) rename {tests/input => test/fixtures}/README.md (100%) rename {tests/input => test/fixtures}/chr1_chr19.interval_list (100%) rename {tests/input => test/fixtures}/combined_beta.csv (100%) rename {tests/input => test/fixtures}/filtered_beta.csv (100%) rename {tests/input => test/fixtures}/fusions.BCR_ABL1.tsv (100%) rename {tests/input => test/fixtures}/gencode.v31.chr9_chr22.gtf.gz (100%) rename {tests/input => test/fixtures}/gencode.v31.chrY_chrM.gene.bed (100%) rename {tests/input => test/fixtures}/gencode.v31.chrY_chrM.genelengths.txt (100%) rename {tests/input => test/fixtures}/gencode.v31.chrY_chrM.gtf.gz (100%) rename {tests/input => test/fixtures}/kraken2_C_elegans_library.tar.gz (100%) rename {tests/input => test/fixtures}/kraken2_db.mini.tar.gz (100%) rename {tests/input => test/fixtures}/kraken2_taxonomy.tar.gz (100%) rename {tests/input => test/fixtures}/random10k.r1.fq.gz (100%) rename {tests/input => test/fixtures}/random10k.r2.fq.gz (100%) rename {tests/input => test/fixtures}/star_db.chrY_chrM.tar.gz (100%) rename {tests/input => test/fixtures}/test.PE.2_RGs.Aligned.out.sorted.bam (100%) rename {tests/input => test/fixtures}/test.bam (100%) rename {tests/input => test/fixtures}/test.bam.bai (100%) rename {tests/input => test/fixtures}/test.bwa_aln_pe.chrY_chrM.TPM.txt (100%) rename {tests/input => test/fixtures}/test.bwa_aln_pe.chrY_chrM.bam (100%) rename {tests/input => test/fixtures}/test.bwa_aln_pe.chrY_chrM.bam.bai (100%) rename {tests/input => test/fixtures}/test.bwa_aln_pe.chrY_chrM.feature-counts.txt (100%) rename {tests/input => test/fixtures}/test.bwa_aln_pe.chrY_chrM.readlength.txt (100%) rename {tests/input => test/fixtures}/test.extra_RG.bam (100%) rename {tests/input => test/fixtures}/test.fa (100%) rename {tests/input => test/fixtures}/test.tar.gz (100%) rename {tests/input => test/fixtures}/test.unaccounted_read.bam (100%) rename {tests/input => test/fixtures}/test1.vcf.gz (100%) rename {tests/input => test/fixtures}/test1.vcf.gz.tbi (100%) rename {tests/input => test/fixtures}/test2.bam (100%) rename {tests/input => test/fixtures}/test2.vcf.gz (100%) rename {tests/input => test/fixtures}/test2.vcf.gz.tbi (100%) rename {tests/input => test/fixtures}/test_R1.fq.gz (100%) rename {tests/input => test/fixtures}/test_R2.fq.gz (100%) rename {tests/input => test/fixtures}/test_rnaseq_variant.bam (100%) rename {tests/input => test/fixtures}/test_rnaseq_variant.bam.bai (100%) rename {tests/input => test/fixtures}/test_rnaseq_variant.recal.txt (100%) rename {tests/input => test/fixtures}/umap.csv (100%) rename {tests/input => test/fixtures}/wgs_calling_regions.hg38.interval_list (100%) diff --git a/sprocket-tests/custom/quickcheck.sh b/sprocket-tests/custom/quickcheck.sh deleted file mode 100644 index 8c270cc8e..000000000 --- a/sprocket-tests/custom/quickcheck.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/bin/bash - -set -euo pipefail - -out_json=$1 - -out_bam=$(jq -r .bam "$out_json") - -samtools quickcheck "$out_bam" - diff --git a/test/bin/quickcheck.sh b/test/bin/quickcheck.sh new file mode 100644 index 000000000..e69de29bb diff --git a/tests/input/1scattered.interval_list b/test/fixtures/1scattered.interval_list similarity index 100% rename from tests/input/1scattered.interval_list rename to test/fixtures/1scattered.interval_list diff --git a/tests/input/201533520001_R01C01.beta_swan_norm_unfiltered.genomic.csv b/test/fixtures/201533520001_R01C01.beta_swan_norm_unfiltered.genomic.csv similarity index 100% rename from tests/input/201533520001_R01C01.beta_swan_norm_unfiltered.genomic.csv rename to test/fixtures/201533520001_R01C01.beta_swan_norm_unfiltered.genomic.csv diff --git a/tests/input/201533520001_R03C01.beta_swan_norm_unfiltered.genomic.csv b/test/fixtures/201533520001_R03C01.beta_swan_norm_unfiltered.genomic.csv similarity index 100% rename from tests/input/201533520001_R03C01.beta_swan_norm_unfiltered.genomic.csv rename to test/fixtures/201533520001_R03C01.beta_swan_norm_unfiltered.genomic.csv diff --git a/tests/input/201533520001_R03C01_Grn.idat b/test/fixtures/201533520001_R03C01_Grn.idat similarity index 100% rename from tests/input/201533520001_R03C01_Grn.idat rename to test/fixtures/201533520001_R03C01_Grn.idat diff --git a/tests/input/201533520001_R03C01_Red.idat b/test/fixtures/201533520001_R03C01_Red.idat similarity index 100% rename from tests/input/201533520001_R03C01_Red.idat rename to test/fixtures/201533520001_R03C01_Red.idat diff --git a/tests/input/Aligned.sortedByCoord.chr9_chr22.bam b/test/fixtures/Aligned.sortedByCoord.chr9_chr22.bam similarity index 100% rename from tests/input/Aligned.sortedByCoord.chr9_chr22.bam rename to test/fixtures/Aligned.sortedByCoord.chr9_chr22.bam diff --git a/tests/input/Aligned.sortedByCoord.chr9_chr22.bam.bai b/test/fixtures/Aligned.sortedByCoord.chr9_chr22.bam.bai similarity index 100% rename from tests/input/Aligned.sortedByCoord.chr9_chr22.bam.bai rename to test/fixtures/Aligned.sortedByCoord.chr9_chr22.bam.bai diff --git a/tests/input/GRCh38.chr1_chr19.dict b/test/fixtures/GRCh38.chr1_chr19.dict similarity index 100% rename from tests/input/GRCh38.chr1_chr19.dict rename to test/fixtures/GRCh38.chr1_chr19.dict diff --git a/tests/input/GRCh38.chr1_chr19.fa b/test/fixtures/GRCh38.chr1_chr19.fa similarity index 100% rename from tests/input/GRCh38.chr1_chr19.fa rename to test/fixtures/GRCh38.chr1_chr19.fa diff --git a/tests/input/GRCh38.chr1_chr19.fa.fai b/test/fixtures/GRCh38.chr1_chr19.fa.fai similarity index 100% rename from tests/input/GRCh38.chr1_chr19.fa.fai rename to test/fixtures/GRCh38.chr1_chr19.fa.fai diff --git a/tests/input/GRCh38.chr9_chr22.fa.gz b/test/fixtures/GRCh38.chr9_chr22.fa.gz similarity index 100% rename from tests/input/GRCh38.chr9_chr22.fa.gz rename to test/fixtures/GRCh38.chr9_chr22.fa.gz diff --git a/tests/input/GRCh38.chrY_chrM.bwa_db.tar.gz b/test/fixtures/GRCh38.chrY_chrM.bwa_db.tar.gz similarity index 100% rename from tests/input/GRCh38.chrY_chrM.bwa_db.tar.gz rename to test/fixtures/GRCh38.chrY_chrM.bwa_db.tar.gz diff --git a/tests/input/GRCh38.chrY_chrM.dict b/test/fixtures/GRCh38.chrY_chrM.dict similarity index 100% rename from tests/input/GRCh38.chrY_chrM.dict rename to test/fixtures/GRCh38.chrY_chrM.dict diff --git a/tests/input/GRCh38.chrY_chrM.fa b/test/fixtures/GRCh38.chrY_chrM.fa similarity index 100% rename from tests/input/GRCh38.chrY_chrM.fa rename to test/fixtures/GRCh38.chrY_chrM.fa diff --git a/tests/input/GRCh38.chrY_chrM.fa.fai b/test/fixtures/GRCh38.chrY_chrM.fa.fai similarity index 100% rename from tests/input/GRCh38.chrY_chrM.fa.fai rename to test/fixtures/GRCh38.chrY_chrM.fa.fai diff --git a/tests/input/Homo_sapiens_assembly38.dbsnp138.top5000.vcf b/test/fixtures/Homo_sapiens_assembly38.dbsnp138.top5000.vcf similarity index 100% rename from tests/input/Homo_sapiens_assembly38.dbsnp138.top5000.vcf rename to test/fixtures/Homo_sapiens_assembly38.dbsnp138.top5000.vcf diff --git a/tests/input/Homo_sapiens_assembly38.dbsnp138.top5000.vcf.idx b/test/fixtures/Homo_sapiens_assembly38.dbsnp138.top5000.vcf.idx similarity index 100% rename from tests/input/Homo_sapiens_assembly38.dbsnp138.top5000.vcf.idx rename to test/fixtures/Homo_sapiens_assembly38.dbsnp138.top5000.vcf.idx diff --git a/tests/input/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz b/test/fixtures/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz similarity index 100% rename from tests/input/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz rename to test/fixtures/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz diff --git a/tests/input/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi b/test/fixtures/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi similarity index 100% rename from tests/input/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi rename to test/fixtures/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi diff --git a/tests/input/README.md b/test/fixtures/README.md similarity index 100% rename from tests/input/README.md rename to test/fixtures/README.md diff --git a/tests/input/chr1_chr19.interval_list b/test/fixtures/chr1_chr19.interval_list similarity index 100% rename from tests/input/chr1_chr19.interval_list rename to test/fixtures/chr1_chr19.interval_list diff --git a/tests/input/combined_beta.csv b/test/fixtures/combined_beta.csv similarity index 100% rename from tests/input/combined_beta.csv rename to test/fixtures/combined_beta.csv diff --git a/tests/input/filtered_beta.csv b/test/fixtures/filtered_beta.csv similarity index 100% rename from tests/input/filtered_beta.csv rename to test/fixtures/filtered_beta.csv diff --git a/tests/input/fusions.BCR_ABL1.tsv b/test/fixtures/fusions.BCR_ABL1.tsv similarity index 100% rename from tests/input/fusions.BCR_ABL1.tsv rename to test/fixtures/fusions.BCR_ABL1.tsv diff --git a/tests/input/gencode.v31.chr9_chr22.gtf.gz b/test/fixtures/gencode.v31.chr9_chr22.gtf.gz similarity index 100% rename from tests/input/gencode.v31.chr9_chr22.gtf.gz rename to test/fixtures/gencode.v31.chr9_chr22.gtf.gz diff --git a/tests/input/gencode.v31.chrY_chrM.gene.bed b/test/fixtures/gencode.v31.chrY_chrM.gene.bed similarity index 100% rename from tests/input/gencode.v31.chrY_chrM.gene.bed rename to test/fixtures/gencode.v31.chrY_chrM.gene.bed diff --git a/tests/input/gencode.v31.chrY_chrM.genelengths.txt b/test/fixtures/gencode.v31.chrY_chrM.genelengths.txt similarity index 100% rename from tests/input/gencode.v31.chrY_chrM.genelengths.txt rename to test/fixtures/gencode.v31.chrY_chrM.genelengths.txt diff --git a/tests/input/gencode.v31.chrY_chrM.gtf.gz b/test/fixtures/gencode.v31.chrY_chrM.gtf.gz similarity index 100% rename from tests/input/gencode.v31.chrY_chrM.gtf.gz rename to test/fixtures/gencode.v31.chrY_chrM.gtf.gz diff --git a/tests/input/kraken2_C_elegans_library.tar.gz b/test/fixtures/kraken2_C_elegans_library.tar.gz similarity index 100% rename from tests/input/kraken2_C_elegans_library.tar.gz rename to test/fixtures/kraken2_C_elegans_library.tar.gz diff --git a/tests/input/kraken2_db.mini.tar.gz b/test/fixtures/kraken2_db.mini.tar.gz similarity index 100% rename from tests/input/kraken2_db.mini.tar.gz rename to test/fixtures/kraken2_db.mini.tar.gz diff --git a/tests/input/kraken2_taxonomy.tar.gz b/test/fixtures/kraken2_taxonomy.tar.gz similarity index 100% rename from tests/input/kraken2_taxonomy.tar.gz rename to test/fixtures/kraken2_taxonomy.tar.gz diff --git a/tests/input/random10k.r1.fq.gz b/test/fixtures/random10k.r1.fq.gz similarity index 100% rename from tests/input/random10k.r1.fq.gz rename to test/fixtures/random10k.r1.fq.gz diff --git a/tests/input/random10k.r2.fq.gz b/test/fixtures/random10k.r2.fq.gz similarity index 100% rename from tests/input/random10k.r2.fq.gz rename to test/fixtures/random10k.r2.fq.gz diff --git a/tests/input/star_db.chrY_chrM.tar.gz b/test/fixtures/star_db.chrY_chrM.tar.gz similarity index 100% rename from tests/input/star_db.chrY_chrM.tar.gz rename to test/fixtures/star_db.chrY_chrM.tar.gz diff --git a/tests/input/test.PE.2_RGs.Aligned.out.sorted.bam b/test/fixtures/test.PE.2_RGs.Aligned.out.sorted.bam similarity index 100% rename from tests/input/test.PE.2_RGs.Aligned.out.sorted.bam rename to test/fixtures/test.PE.2_RGs.Aligned.out.sorted.bam diff --git a/tests/input/test.bam b/test/fixtures/test.bam similarity index 100% rename from tests/input/test.bam rename to test/fixtures/test.bam diff --git a/tests/input/test.bam.bai b/test/fixtures/test.bam.bai similarity index 100% rename from tests/input/test.bam.bai rename to test/fixtures/test.bam.bai diff --git a/tests/input/test.bwa_aln_pe.chrY_chrM.TPM.txt b/test/fixtures/test.bwa_aln_pe.chrY_chrM.TPM.txt similarity index 100% rename from tests/input/test.bwa_aln_pe.chrY_chrM.TPM.txt rename to test/fixtures/test.bwa_aln_pe.chrY_chrM.TPM.txt diff --git a/tests/input/test.bwa_aln_pe.chrY_chrM.bam b/test/fixtures/test.bwa_aln_pe.chrY_chrM.bam similarity index 100% rename from tests/input/test.bwa_aln_pe.chrY_chrM.bam rename to test/fixtures/test.bwa_aln_pe.chrY_chrM.bam diff --git a/tests/input/test.bwa_aln_pe.chrY_chrM.bam.bai b/test/fixtures/test.bwa_aln_pe.chrY_chrM.bam.bai similarity index 100% rename from tests/input/test.bwa_aln_pe.chrY_chrM.bam.bai rename to test/fixtures/test.bwa_aln_pe.chrY_chrM.bam.bai diff --git a/tests/input/test.bwa_aln_pe.chrY_chrM.feature-counts.txt b/test/fixtures/test.bwa_aln_pe.chrY_chrM.feature-counts.txt similarity index 100% rename from tests/input/test.bwa_aln_pe.chrY_chrM.feature-counts.txt rename to test/fixtures/test.bwa_aln_pe.chrY_chrM.feature-counts.txt diff --git a/tests/input/test.bwa_aln_pe.chrY_chrM.readlength.txt b/test/fixtures/test.bwa_aln_pe.chrY_chrM.readlength.txt similarity index 100% rename from tests/input/test.bwa_aln_pe.chrY_chrM.readlength.txt rename to test/fixtures/test.bwa_aln_pe.chrY_chrM.readlength.txt diff --git a/tests/input/test.extra_RG.bam b/test/fixtures/test.extra_RG.bam similarity index 100% rename from tests/input/test.extra_RG.bam rename to test/fixtures/test.extra_RG.bam diff --git a/tests/input/test.fa b/test/fixtures/test.fa similarity index 100% rename from tests/input/test.fa rename to test/fixtures/test.fa diff --git a/tests/input/test.tar.gz b/test/fixtures/test.tar.gz similarity index 100% rename from tests/input/test.tar.gz rename to test/fixtures/test.tar.gz diff --git a/tests/input/test.unaccounted_read.bam b/test/fixtures/test.unaccounted_read.bam similarity index 100% rename from tests/input/test.unaccounted_read.bam rename to test/fixtures/test.unaccounted_read.bam diff --git a/tests/input/test1.vcf.gz b/test/fixtures/test1.vcf.gz similarity index 100% rename from tests/input/test1.vcf.gz rename to test/fixtures/test1.vcf.gz diff --git a/tests/input/test1.vcf.gz.tbi b/test/fixtures/test1.vcf.gz.tbi similarity index 100% rename from tests/input/test1.vcf.gz.tbi rename to test/fixtures/test1.vcf.gz.tbi diff --git a/tests/input/test2.bam b/test/fixtures/test2.bam similarity index 100% rename from tests/input/test2.bam rename to test/fixtures/test2.bam diff --git a/tests/input/test2.vcf.gz b/test/fixtures/test2.vcf.gz similarity index 100% rename from tests/input/test2.vcf.gz rename to test/fixtures/test2.vcf.gz diff --git a/tests/input/test2.vcf.gz.tbi b/test/fixtures/test2.vcf.gz.tbi similarity index 100% rename from tests/input/test2.vcf.gz.tbi rename to test/fixtures/test2.vcf.gz.tbi diff --git a/tests/input/test_R1.fq.gz b/test/fixtures/test_R1.fq.gz similarity index 100% rename from tests/input/test_R1.fq.gz rename to test/fixtures/test_R1.fq.gz diff --git a/tests/input/test_R2.fq.gz b/test/fixtures/test_R2.fq.gz similarity index 100% rename from tests/input/test_R2.fq.gz rename to test/fixtures/test_R2.fq.gz diff --git a/tests/input/test_rnaseq_variant.bam b/test/fixtures/test_rnaseq_variant.bam similarity index 100% rename from tests/input/test_rnaseq_variant.bam rename to test/fixtures/test_rnaseq_variant.bam diff --git a/tests/input/test_rnaseq_variant.bam.bai b/test/fixtures/test_rnaseq_variant.bam.bai similarity index 100% rename from tests/input/test_rnaseq_variant.bam.bai rename to test/fixtures/test_rnaseq_variant.bam.bai diff --git a/tests/input/test_rnaseq_variant.recal.txt b/test/fixtures/test_rnaseq_variant.recal.txt similarity index 100% rename from tests/input/test_rnaseq_variant.recal.txt rename to test/fixtures/test_rnaseq_variant.recal.txt diff --git a/tests/input/umap.csv b/test/fixtures/umap.csv similarity index 100% rename from tests/input/umap.csv rename to test/fixtures/umap.csv diff --git a/tests/input/wgs_calling_regions.hg38.interval_list b/test/fixtures/wgs_calling_regions.hg38.interval_list similarity index 100% rename from tests/input/wgs_calling_regions.hg38.interval_list rename to test/fixtures/wgs_calling_regions.hg38.interval_list From d89fd65ae5e2de848b77e0f2aebc6385c17e9284 Mon Sep 17 00:00:00 2001 From: Ari Frantz Date: Thu, 11 Dec 2025 09:51:05 -0500 Subject: [PATCH 09/47] picard: lower merge mem for testing --- tools/picard.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/picard.wdl b/tools/picard.wdl index 07324ebe2..33927919b 100755 --- a/tools/picard.wdl +++ b/tools/picard.wdl @@ -385,7 +385,7 @@ task merge_sam_files { String sort_order = "coordinate" String validation_stringency = "SILENT" Boolean threading = true - Int memory_gb = 40 + Int memory_gb = 10 Int modify_disk_size_gb = 0 } From 81d9608417005ebdf8fda57c69b9ba3f8af56cd3 Mon Sep 17 00:00:00 2001 From: Ari Frantz Date: Thu, 11 Dec 2025 09:51:19 -0500 Subject: [PATCH 10/47] point to new fixtures --- tools/picard.yaml | 2 +- tools/samtools.yaml | 50 +++++++++------------------------------------ 2 files changed, 11 insertions(+), 41 deletions(-) diff --git a/tools/picard.yaml b/tools/picard.yaml index f2b547bb4..7d031e5dd 100644 --- a/tools/picard.yaml +++ b/tools/picard.yaml @@ -2,7 +2,7 @@ merge_sam_files: - name: Merge works inputs: bams: - - [$FIXTURES/test1.bam, $FIXTURES/test2.bam] + - [test.bwa_aln_pe.chrY_chrM.bam, test.PE.2_RGs.Aligned.out.sorted.bam] prefix: - test.merged assertions: diff --git a/tools/samtools.yaml b/tools/samtools.yaml index 8f6e5276f..2f75fdb52 100644 --- a/tools/samtools.yaml +++ b/tools/samtools.yaml @@ -1,15 +1,11 @@ bam_to_fastq: - name: kitchen_sink inputs: - $files: - bam: - - $FIXTURES/test1.bam - - $FIXTURES/test2.bam - - $FIXTURES/test3.bam - bam_index: - - $FIXTURES/test1.bam.bai - - $FIXTURES/test2.bam.bai - - $FIXTURES/test3.bam.bai + bam: + - Aligned.sortedByCoord.chr9_chr22.bam + - test.bam + - test_rnaseq_variant.bam + - test.bwa_aln_pe.chrY_chrM.bam bitwise_filter: - include_if_all: "0x0" exclude_if_any: "0x900" @@ -38,34 +34,8 @@ bam_to_fastq: output_singletons: - true - false - $files: - bam: - - $FIXTURES/test1.bam - - $FIXTURES/test2.bam - bam_index: - - $FIXTURES/test1.bam.bai - - $FIXTURES/test2.bam.bai - - name: not as simple - inputs: - output_singletons: - - true - - false - $files: - bam: - - $FIXTURES/test1.bam - - $FIXTURES/test2.bam - - $FIXTURES/test3.bam - bam_index: - - $FIXTURES/test1.bam.bai - - $FIXTURES/test2.bam.bai - - $FIXTURES/test3.bam.bai - $ref: - ref_fasta: - - hg19.fasta - - GRCh38.fasta - ref_fasta_index: - - hg19.fa.fai - - GRCh38.fa.fai - prefix: - - not_as_simple - + bam: + - Aligned.sortedByCoord.chr9_chr22.bam + - test.bam + - test_rnaseq_variant.bam + - test.bwa_aln_pe.chrY_chrM.bam From 5d97f28c6fb2cf7d8e1bd1b6fde83d5a5ac05d82 Mon Sep 17 00:00:00 2001 From: Ari Frantz Date: Mon, 15 Dec 2025 07:36:47 -0500 Subject: [PATCH 11/47] Revert "picard: lower merge mem for testing" This reverts commit d89fd65ae5e2de848b77e0f2aebc6385c17e9284. --- tools/picard.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/picard.wdl b/tools/picard.wdl index 33927919b..07324ebe2 100755 --- a/tools/picard.wdl +++ b/tools/picard.wdl @@ -385,7 +385,7 @@ task merge_sam_files { String sort_order = "coordinate" String validation_stringency = "SILENT" Boolean threading = true - Int memory_gb = 10 + Int memory_gb = 40 Int modify_disk_size_gb = 0 } From 1b7235dd45737f977edabb1fc3aa1b66b3ebb9dd Mon Sep 17 00:00:00 2001 From: Ari Frantz Date: Sat, 27 Dec 2025 16:43:34 -0500 Subject: [PATCH 12/47] WIP --- data_structures/read_group.yml | 13 ------------- data_structures/{ => test}/flag_filter.yaml | 11 +++++------ data_structures/test/read_group.yaml | 11 +++++++++++ tools/{ => test}/picard.yaml | 0 tools/{ => test}/samtools.yaml | 2 -- 5 files changed, 16 insertions(+), 21 deletions(-) delete mode 100644 data_structures/read_group.yml rename data_structures/{ => test}/flag_filter.yaml (79%) create mode 100644 data_structures/test/read_group.yaml rename tools/{ => test}/picard.yaml (100%) rename tools/{ => test}/samtools.yaml (96%) diff --git a/data_structures/read_group.yml b/data_structures/read_group.yml deleted file mode 100644 index c92e5c2b0..000000000 --- a/data_structures/read_group.yml +++ /dev/null @@ -1,13 +0,0 @@ -# Note this file has the extension `.yml` while other tests end with `.yaml`. -# This is an intentional test that both extensions work. -read_group_to_string: - - name: bad_id - inputs: - read_group: - - ID: id, - SM: sample_a - LB: library - assertions: - exit_code: 1 - stdout: - contains: ID (id) must not match pattern diff --git a/data_structures/flag_filter.yaml b/data_structures/test/flag_filter.yaml similarity index 79% rename from data_structures/flag_filter.yaml rename to data_structures/test/flag_filter.yaml index 1a667700a..4da14bcdd 100644 --- a/data_structures/flag_filter.yaml +++ b/data_structures/test/flag_filter.yaml @@ -8,8 +8,8 @@ validate_string_is_12bit_int: number: - "0x900" assertions: - stdout: - contains: Input number (0x900) is valid + stderr: + - Input number \(.*\) is valid - name: too_big_hexadecimal_fails inputs: number: @@ -17,7 +17,7 @@ validate_string_is_12bit_int: assertions: exit_code: 42 stderr: - contains: Input number (0x1000) is invalid + - Input number \(.*\) is invalid - name: too_big_decimal_fails inputs: number: @@ -25,9 +25,8 @@ validate_string_is_12bit_int: assertions: exit_code: 42 stderr: - contains: - - Input number (4096) interpreted as decimal - - But number must be less than 4096! + - Input number \(.*\) interpreted as decimal + - But number must be less than 4096! validate_flag_filter: - name: valid_FlagFilter_passes inputs: diff --git a/data_structures/test/read_group.yaml b/data_structures/test/read_group.yaml new file mode 100644 index 000000000..a4367d8af --- /dev/null +++ b/data_structures/test/read_group.yaml @@ -0,0 +1,11 @@ +read_group_to_string: + - name: bad_id + inputs: + read_group: + - ID: id, + SM: sample_a + LB: library + assertions: + exit_code: 1 + stdout: + - ID (.*) must not match pattern diff --git a/tools/picard.yaml b/tools/test/picard.yaml similarity index 100% rename from tools/picard.yaml rename to tools/test/picard.yaml diff --git a/tools/samtools.yaml b/tools/test/samtools.yaml similarity index 96% rename from tools/samtools.yaml rename to tools/test/samtools.yaml index 2f75fdb52..feca44f2d 100644 --- a/tools/samtools.yaml +++ b/tools/test/samtools.yaml @@ -3,7 +3,6 @@ bam_to_fastq: inputs: bam: - Aligned.sortedByCoord.chr9_chr22.bam - - test.bam - test_rnaseq_variant.bam - test.bwa_aln_pe.chrY_chrM.bam bitwise_filter: @@ -36,6 +35,5 @@ bam_to_fastq: - false bam: - Aligned.sortedByCoord.chr9_chr22.bam - - test.bam - test_rnaseq_variant.bam - test.bwa_aln_pe.chrY_chrM.bam From 881fa7aa7a53eaf0c99062337d6244fe93f5d5c9 Mon Sep 17 00:00:00 2001 From: Ari Frantz Date: Sun, 28 Dec 2025 08:46:42 -0500 Subject: [PATCH 13/47] delete pytest folder --- .../input_json/get_read_groups.json | 3 - .../input_json/read_group_bad_id.json | 18 -- .../input_json/read_group_bad_sample.json | 18 -- .../input_json/read_group_good.json | 18 -- .../input_json/read_group_missing_sample.json | 8 - .../read_group_sample_with_space.json | 18 -- tests/data_structures/test_flag_filter.yaml | 74 -------- tests/data_structures/test_read_group.yaml | 51 ------ tests/tools/input_json/arriba.json | 9 - tests/tools/input_json/bwa_aln.json | 5 - tests/tools/input_json/bwa_aln_pe.json | 6 - tests/tools/input_json/bwa_mem.json | 5 - tests/tools/input_json/calc_tpm.json | 4 - tests/tools/input_json/gatk4_apply_bqsr.json | 5 - .../input_json/gatk4_base_recalibrator.json | 15 -- .../input_json/gatk4_haplotype_caller.json | 10 -- .../input_json/gatk4_split_n_cigar_reads.json | 8 - .../input_json/gatk4_variant_filtration.json | 7 - tests/tools/input_json/kraken_build_db.json | 6 - .../kraken_create_library_from_fastas.json | 5 - tests/tools/input_json/multiqc.json | 6 - tests/tools/input_json/multiqc_empty.json | 4 - .../tools/input_json/ngsderive_encoding.json | 6 - .../input_json/picard_merge_sam_files.json | 7 - tests/tools/input_json/picard_merge_vcfs.json | 11 -- tests/tools/input_json/sambamba_merge.json | 7 - tests/tools/input_json/samtools_merge.json | 7 - .../tools/input_json/samtools_subsample.json | 4 - .../input_json/star_alignment_PE_1_pair.json | 13 -- .../input_json/star_alignment_PE_2_pairs.json | 16 -- tests/tools/input_json/star_alignment_SE.json | 10 -- .../input_json/util_add_to_bam_header.json | 4 - tests/tools/input_json/util_download.json | 6 - tests/tools/input_json/util_split_string.json | 4 - tests/tools/test_arriba.yaml | 40 ----- tests/tools/test_bwa.yaml | 40 ----- tests/tools/test_deeptools.yaml | 9 - tests/tools/test_fastp.yaml | 12 -- tests/tools/test_fastqc.yaml | 10 -- tests/tools/test_fq.yaml | 27 --- tests/tools/test_gatk4.yaml | 65 ------- tests/tools/test_htseq.yaml | 19 -- tests/tools/test_kraken2.yaml | 55 ------ tests/tools/test_librarian.yaml | 10 -- tests/tools/test_md5sum.yaml | 11 -- tests/tools/test_mosdepth.yaml | 10 -- tests/tools/test_multiqc.yaml | 20 --- tests/tools/test_ngsderive.yaml | 68 -------- tests/tools/test_picard.yaml | 163 ------------------ tests/tools/test_qualimap.yaml | 11 -- tests/tools/test_sambamba.yaml | 51 ------ tests/tools/test_samtools.py | 53 ------ tests/tools/test_samtools.yaml | 125 -------------- tests/tools/test_star.yaml | 43 ----- tests/tools/test_util.yaml | 94 ---------- .../_test_methylation-preprocess.yaml | 15 -- tests/workflows/input_json/combine_data.json | 7 - .../input_json/dnaseq-standard-fastq.json | 18 -- .../workflows/input_json/dnaseq-standard.json | 4 - tests/workflows/input_json/filter_probes.json | 4 - tests/workflows/input_json/generate_umap.json | 3 - tests/workflows/input_json/plot_umap.json | 3 - .../input_json/process_raw_idats.json | 6 - tests/workflows/input_json/qc-standard.json | 10 -- .../input_json/rnaseq-standard-fastq.json | 21 --- .../workflows/input_json/rnaseq-standard.json | 6 - .../input_json/rnaseq-variant-calling.json | 16 -- tests/workflows/test_methylation-cohort.yaml | 40 ----- 68 files changed, 1487 deletions(-) delete mode 100644 tests/data_structures/input_json/get_read_groups.json delete mode 100644 tests/data_structures/input_json/read_group_bad_id.json delete mode 100644 tests/data_structures/input_json/read_group_bad_sample.json delete mode 100644 tests/data_structures/input_json/read_group_good.json delete mode 100644 tests/data_structures/input_json/read_group_missing_sample.json delete mode 100644 tests/data_structures/input_json/read_group_sample_with_space.json delete mode 100644 tests/data_structures/test_flag_filter.yaml delete mode 100644 tests/data_structures/test_read_group.yaml delete mode 100644 tests/tools/input_json/arriba.json delete mode 100644 tests/tools/input_json/bwa_aln.json delete mode 100644 tests/tools/input_json/bwa_aln_pe.json delete mode 100644 tests/tools/input_json/bwa_mem.json delete mode 100644 tests/tools/input_json/calc_tpm.json delete mode 100644 tests/tools/input_json/gatk4_apply_bqsr.json delete mode 100644 tests/tools/input_json/gatk4_base_recalibrator.json delete mode 100644 tests/tools/input_json/gatk4_haplotype_caller.json delete mode 100644 tests/tools/input_json/gatk4_split_n_cigar_reads.json delete mode 100644 tests/tools/input_json/gatk4_variant_filtration.json delete mode 100644 tests/tools/input_json/kraken_build_db.json delete mode 100644 tests/tools/input_json/kraken_create_library_from_fastas.json delete mode 100644 tests/tools/input_json/multiqc.json delete mode 100644 tests/tools/input_json/multiqc_empty.json delete mode 100644 tests/tools/input_json/ngsderive_encoding.json delete mode 100644 tests/tools/input_json/picard_merge_sam_files.json delete mode 100644 tests/tools/input_json/picard_merge_vcfs.json delete mode 100644 tests/tools/input_json/sambamba_merge.json delete mode 100644 tests/tools/input_json/samtools_merge.json delete mode 100644 tests/tools/input_json/samtools_subsample.json delete mode 100644 tests/tools/input_json/star_alignment_PE_1_pair.json delete mode 100644 tests/tools/input_json/star_alignment_PE_2_pairs.json delete mode 100644 tests/tools/input_json/star_alignment_SE.json delete mode 100644 tests/tools/input_json/util_add_to_bam_header.json delete mode 100644 tests/tools/input_json/util_download.json delete mode 100644 tests/tools/input_json/util_split_string.json delete mode 100644 tests/tools/test_arriba.yaml delete mode 100644 tests/tools/test_bwa.yaml delete mode 100644 tests/tools/test_deeptools.yaml delete mode 100644 tests/tools/test_fastp.yaml delete mode 100644 tests/tools/test_fastqc.yaml delete mode 100644 tests/tools/test_fq.yaml delete mode 100644 tests/tools/test_gatk4.yaml delete mode 100644 tests/tools/test_htseq.yaml delete mode 100644 tests/tools/test_kraken2.yaml delete mode 100644 tests/tools/test_librarian.yaml delete mode 100644 tests/tools/test_md5sum.yaml delete mode 100644 tests/tools/test_mosdepth.yaml delete mode 100644 tests/tools/test_multiqc.yaml delete mode 100644 tests/tools/test_ngsderive.yaml delete mode 100644 tests/tools/test_picard.yaml delete mode 100644 tests/tools/test_qualimap.yaml delete mode 100644 tests/tools/test_sambamba.yaml delete mode 100644 tests/tools/test_samtools.py delete mode 100644 tests/tools/test_samtools.yaml delete mode 100644 tests/tools/test_star.yaml delete mode 100644 tests/tools/test_util.yaml delete mode 100644 tests/workflows/_test_methylation-preprocess.yaml delete mode 100644 tests/workflows/input_json/combine_data.json delete mode 100644 tests/workflows/input_json/dnaseq-standard-fastq.json delete mode 100644 tests/workflows/input_json/dnaseq-standard.json delete mode 100644 tests/workflows/input_json/filter_probes.json delete mode 100644 tests/workflows/input_json/generate_umap.json delete mode 100644 tests/workflows/input_json/plot_umap.json delete mode 100644 tests/workflows/input_json/process_raw_idats.json delete mode 100644 tests/workflows/input_json/qc-standard.json delete mode 100644 tests/workflows/input_json/rnaseq-standard-fastq.json delete mode 100644 tests/workflows/input_json/rnaseq-standard.json delete mode 100644 tests/workflows/input_json/rnaseq-variant-calling.json delete mode 100644 tests/workflows/test_methylation-cohort.yaml diff --git a/tests/data_structures/input_json/get_read_groups.json b/tests/data_structures/input_json/get_read_groups.json deleted file mode 100644 index 9b3f1bbc7..000000000 --- a/tests/data_structures/input_json/get_read_groups.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "get_read_groups.bam": "https://github.com/stjude/CICERO/raw/master/test/data/input/test.bam" -} \ No newline at end of file diff --git a/tests/data_structures/input_json/read_group_bad_id.json b/tests/data_structures/input_json/read_group_bad_id.json deleted file mode 100644 index 41d05152c..000000000 --- a/tests/data_structures/input_json/read_group_bad_id.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "read_group_to_string.read_group":{ - "ID": "id", - "SM": "sample_a", - "LB": "library", - "BC": "barcode", - "PU": "platform_unit", - "PL": "ILLUMINA", - "CN": "center_name", - "DT": "date", - "DS": "description", - "PI": 1, - "PG": "program_group", - "PM": "platform_model", - "FO": "ACMG", - "KS": "key_sequence" - } -} \ No newline at end of file diff --git a/tests/data_structures/input_json/read_group_bad_sample.json b/tests/data_structures/input_json/read_group_bad_sample.json deleted file mode 100644 index 452d17b8d..000000000 --- a/tests/data_structures/input_json/read_group_bad_sample.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "read_group_to_string.read_group":{ - "ID": "R123", - "SM": "sample1", - "LB": "library", - "BC": "barcode", - "PU": "platform_unit", - "PL": "ILLUMINA", - "CN": "center_name", - "DT": "date", - "DS": "description", - "PI": 1, - "PG": "program_group", - "PM": "platform_model", - "FO": "ACMG", - "KS": "key_sequence" - } -} \ No newline at end of file diff --git a/tests/data_structures/input_json/read_group_good.json b/tests/data_structures/input_json/read_group_good.json deleted file mode 100644 index 81753f65d..000000000 --- a/tests/data_structures/input_json/read_group_good.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "read_group_to_string.read_group":{ - "ID": "R123", - "SM": "IPSC-1", - "LB": "library", - "BC": "barcode", - "PU": "platform_unit", - "PL": "ILLUMINA", - "CN": "center_name", - "DT": "date", - "DS": "description", - "PI": 1, - "PG": "program_group", - "PM": "platform_model", - "FO": "ACMG", - "KS": "key_sequence" - } -} \ No newline at end of file diff --git a/tests/data_structures/input_json/read_group_missing_sample.json b/tests/data_structures/input_json/read_group_missing_sample.json deleted file mode 100644 index 8a23be66a..000000000 --- a/tests/data_structures/input_json/read_group_missing_sample.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "read_group_to_string.read_group":{ - "ID": "id" - }, - "read_group_to_string.required_fields": [ - "SM" - ] -} \ No newline at end of file diff --git a/tests/data_structures/input_json/read_group_sample_with_space.json b/tests/data_structures/input_json/read_group_sample_with_space.json deleted file mode 100644 index f0c7d88d0..000000000 --- a/tests/data_structures/input_json/read_group_sample_with_space.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "read_group_to_string.read_group": { - "ID": "myID", - "SM": "sample a", - "LB": "library", - "BC": "barcode", - "PU": "platform_unit", - "PL": "ILLUMINA", - "CN": "center_name", - "DT": "date", - "DS": "description", - "PI": 1, - "PG": "program_group", - "PM": "platform_model", - "FO": "ACMG", - "KS": "key_sequence" - } -} \ No newline at end of file diff --git a/tests/data_structures/test_flag_filter.yaml b/tests/data_structures/test_flag_filter.yaml deleted file mode 100644 index 20a1fb4d5..000000000 --- a/tests/data_structures/test_flag_filter.yaml +++ /dev/null @@ -1,74 +0,0 @@ -- name: flag_filter_0x900 - tags: - - flag_filter - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t validate_string_is_12bit_int data_structures/flag_filter.wdl number="0x900" - -- name: flag_filter_5 - tags: - - flag_filter - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t validate_string_is_12bit_int data_structures/flag_filter.wdl number="5" - -- name: flag_filter_01 - tags: - - flag_filter - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t validate_string_is_12bit_int data_structures/flag_filter.wdl number="01" - -- name: flag_filter_0x1000 - tags: - - flag_filter - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t validate_string_is_12bit_int data_structures/flag_filter.wdl number="0x1000" - exit_code: 1 - stderr: - contains: - - "Input number (0x1000) is invalid" - -- name: flag_filter_neg1 - tags: - - flag_filter - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t validate_string_is_12bit_int data_structures/flag_filter.wdl number="-1" - exit_code: 1 - stderr: - contains: - - "Input number (-1) is invalid" - -- name: flag_filter_4096 - tags: - - flag_filter - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t validate_string_is_12bit_int data_structures/flag_filter.wdl number="4096" - exit_code: 1 - stderr: - contains: - - "Input number (4096) interpreted as decimal" - - "But number must be less than 4096!" - -- name: flag_filter_4095 - tags: - - flag_filter - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t validate_string_is_12bit_int data_structures/flag_filter.wdl number="4095" - -- name: flag_filter_string - tags: - - flag_filter - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t validate_string_is_12bit_int data_structures/flag_filter.wdl number="string" - exit_code: 1 - stderr: - contains: - - "Input number (string) is invalid" - -- name: flag_filter_empty_string - tags: - - flag_filter - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t validate_string_is_12bit_int data_structures/flag_filter.wdl number="" - exit_code: 1 - stderr: - contains: - - "Input number () is invalid" diff --git a/tests/data_structures/test_read_group.yaml b/tests/data_structures/test_read_group.yaml deleted file mode 100644 index 41d09e543..000000000 --- a/tests/data_structures/test_read_group.yaml +++ /dev/null @@ -1,51 +0,0 @@ -- name: read_group_bad_id - tags: - - read_group - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -w read_group_to_string -i tests/data_structures/input_json/read_group_bad_id.json data_structures/read_group.wdl - exit_code: 1 - stderr: - contains: - - "ID (id) must not match pattern" - -- name: read_group_bad_sample - tags: - - read_group - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -w read_group_to_string -i tests/data_structures/input_json/read_group_bad_sample.json data_structures/read_group.wdl - exit_code: 1 - stderr: - contains: - - "SM must not match pattern" - -- name: read_group_good - tags: - - read_group - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -w read_group_to_string -i tests/data_structures/input_json/read_group_good.json data_structures/read_group.wdl - -- name: read_group_missing_sample - tags: - - read_group - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -w read_group_to_string -i tests/data_structures/input_json/read_group_missing_sample.json data_structures/read_group.wdl - exit_code: 1 - stderr: - contains: - - "SM is required" - -- name: read_group_sample_with_space - tags: - - read_group - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -w read_group_to_string -i tests/data_structures/input_json/read_group_sample_with_space.json data_structures/read_group.wdl - exit_code: 1 - stderr: - contains: - - "SM must not match pattern" - -- name: get_read_groups - tags: - - read_group - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t get_read_groups -i tests/data_structures/input_json/get_read_groups.json data_structures/read_group.wdl \ No newline at end of file diff --git a/tests/tools/input_json/arriba.json b/tests/tools/input_json/arriba.json deleted file mode 100644 index 0721bbd93..000000000 --- a/tests/tools/input_json/arriba.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "arriba.bam": "../../input/Aligned.sortedByCoord.chr9_chr22.bam", - "arriba.gtf": "../../input/gencode.v31.chr9_chr22.gtf.gz", - "arriba.reference_fasta_gz": "../../input/GRCh38.chr9_chr22.fa.gz", - "arriba.disable_filters": [ - "blacklist" - ], - "arriba.prefix": "fusions" -} \ No newline at end of file diff --git a/tests/tools/input_json/bwa_aln.json b/tests/tools/input_json/bwa_aln.json deleted file mode 100644 index 3b59db584..000000000 --- a/tests/tools/input_json/bwa_aln.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "bwa_aln.fastq": "../../input/test_R1.fq.gz", - "bwa_aln.read_group": "@RG\\tID:test\\tSM:test", - "bwa_aln.bwa_db_tar_gz": "../../input/GRCh38.chrY_chrM.bwa_db.tar.gz" -} \ No newline at end of file diff --git a/tests/tools/input_json/bwa_aln_pe.json b/tests/tools/input_json/bwa_aln_pe.json deleted file mode 100644 index f878cb118..000000000 --- a/tests/tools/input_json/bwa_aln_pe.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "bwa_aln_pe.read_one_fastq_gz": "../../input/test_R1.fq.gz", - "bwa_aln_pe.read_two_fastq_gz": "../../input/test_R2.fq.gz", - "bwa_aln_pe.read_group": "@RG\\tID:test\\tSM:test", - "bwa_aln_pe.bwa_db_tar_gz": "../../input/GRCh38.chrY_chrM.bwa_db.tar.gz" -} \ No newline at end of file diff --git a/tests/tools/input_json/bwa_mem.json b/tests/tools/input_json/bwa_mem.json deleted file mode 100644 index 4563e2b8b..000000000 --- a/tests/tools/input_json/bwa_mem.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "bwa_mem.read_one_fastq_gz": "../../input/test_R1.fq.gz", - "bwa_mem.read_group": "@RG\\tID:test\\tSM:test", - "bwa_mem.bwa_db_tar_gz": "../../input/GRCh38.chrY_chrM.bwa_db.tar.gz" -} \ No newline at end of file diff --git a/tests/tools/input_json/calc_tpm.json b/tests/tools/input_json/calc_tpm.json deleted file mode 100644 index d4c9fc03f..000000000 --- a/tests/tools/input_json/calc_tpm.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "calc_tpm.counts": "../../input/test.bwa_aln_pe.chrY_chrM.feature-counts.txt", - "calc_tpm.feature_lengths": "../../input/gencode.v31.chrY_chrM.genelengths.txt" -} \ No newline at end of file diff --git a/tests/tools/input_json/gatk4_apply_bqsr.json b/tests/tools/input_json/gatk4_apply_bqsr.json deleted file mode 100644 index d73c2a3eb..000000000 --- a/tests/tools/input_json/gatk4_apply_bqsr.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "apply_bqsr.bam": "../../input/test_rnaseq_variant.bam", - "apply_bqsr.bam_index": "../../input/test_rnaseq_variant.bam.bai", - "apply_bqsr.recalibration_report": "../../input/test_rnaseq_variant.recal.txt" -} \ No newline at end of file diff --git a/tests/tools/input_json/gatk4_base_recalibrator.json b/tests/tools/input_json/gatk4_base_recalibrator.json deleted file mode 100644 index e63e309b2..000000000 --- a/tests/tools/input_json/gatk4_base_recalibrator.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "base_recalibrator.bam": "../../input/test_rnaseq_variant.bam", - "base_recalibrator.bam_index": "../../input/test_rnaseq_variant.bam.bai", - "base_recalibrator.fasta": "../../input/GRCh38.chr1_chr19.fa", - "base_recalibrator.fasta_index": "../../input/GRCh38.chr1_chr19.fa.fai", - "base_recalibrator.dict": "../../input/GRCh38.chr1_chr19.dict", - "base_recalibrator.dbSNP_vcf": "../../input/Homo_sapiens_assembly38.dbsnp138.top5000.vcf", - "base_recalibrator.dbSNP_vcf_index": "../../input/Homo_sapiens_assembly38.dbsnp138.top5000.vcf.idx", - "base_recalibrator.known_indels_sites_vcfs": [ - "../../input/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz" - ], - "base_recalibrator.known_indels_sites_indices": [ - "../../input/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi" - ] -} \ No newline at end of file diff --git a/tests/tools/input_json/gatk4_haplotype_caller.json b/tests/tools/input_json/gatk4_haplotype_caller.json deleted file mode 100644 index 653fb5714..000000000 --- a/tests/tools/input_json/gatk4_haplotype_caller.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "haplotype_caller.bam": "../../input/test_rnaseq_variant.bam", - "haplotype_caller.bam_index": "../../input/test_rnaseq_variant.bam.bai", - "haplotype_caller.fasta": "../../input/GRCh38.chr1_chr19.fa", - "haplotype_caller.fasta_index": "../../input/GRCh38.chr1_chr19.fa.fai", - "haplotype_caller.dict": "../../input/GRCh38.chr1_chr19.dict", - "haplotype_caller.dbSNP_vcf": "../../input/Homo_sapiens_assembly38.dbsnp138.top5000.vcf", - "haplotype_caller.dbSNP_vcf_index": "../../input/Homo_sapiens_assembly38.dbsnp138.top5000.vcf.idx", - "haplotype_caller.interval_list": "../../input/chr1_chr19.interval_list" -} \ No newline at end of file diff --git a/tests/tools/input_json/gatk4_split_n_cigar_reads.json b/tests/tools/input_json/gatk4_split_n_cigar_reads.json deleted file mode 100644 index e547714cf..000000000 --- a/tests/tools/input_json/gatk4_split_n_cigar_reads.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "split_n_cigar_reads.bam": "../../input/test.bam", - "split_n_cigar_reads.bam_index": "../../input/test.bam.bai", - "split_n_cigar_reads.fasta": "../../input/GRCh38.chr1_chr19.fa", - "split_n_cigar_reads.fasta_index": "../../input/GRCh38.chr1_chr19.fa.fai", - "split_n_cigar_reads.dict": "../../input/GRCh38.chr1_chr19.dict", - "split_n_cigar_reads.prefix": "split" -} \ No newline at end of file diff --git a/tests/tools/input_json/gatk4_variant_filtration.json b/tests/tools/input_json/gatk4_variant_filtration.json deleted file mode 100644 index ba6d698da..000000000 --- a/tests/tools/input_json/gatk4_variant_filtration.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "variant_filtration.vcf": "../../input/test1.vcf.gz", - "variant_filtration.vcf_index": "../../input/test1.vcf.gz.tbi", - "variant_filtration.fasta": "../../input/GRCh38.chr1_chr19.fa", - "variant_filtration.fasta_index": "../../input/GRCh38.chr1_chr19.fa.fai", - "variant_filtration.dict": "../../input/GRCh38.chr1_chr19.dict" -} \ No newline at end of file diff --git a/tests/tools/input_json/kraken_build_db.json b/tests/tools/input_json/kraken_build_db.json deleted file mode 100644 index d3251ef77..000000000 --- a/tests/tools/input_json/kraken_build_db.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "build_db.tarballs": [ - "../../input/kraken2_taxonomy.tar.gz", - "../../input/kraken2_C_elegans_library.tar.gz" - ] -} \ No newline at end of file diff --git a/tests/tools/input_json/kraken_create_library_from_fastas.json b/tests/tools/input_json/kraken_create_library_from_fastas.json deleted file mode 100644 index 1d15046d6..000000000 --- a/tests/tools/input_json/kraken_create_library_from_fastas.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "create_library_from_fastas.fastas_gz": [ - "https://ftp.ncbi.nlm.nih.gov/genomes/refseq/invertebrate/Caenorhabditis_elegans/reference/GCF_000002985.6_WBcel235/GCF_000002985.6_WBcel235_genomic.fna.gz" - ] -} \ No newline at end of file diff --git a/tests/tools/input_json/multiqc.json b/tests/tools/input_json/multiqc.json deleted file mode 100644 index 556d7111f..000000000 --- a/tests/tools/input_json/multiqc.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "multiqc.files": [ - "../../input/test.bwa_aln_pe.chrY_chrM.readlength.txt" - ], - "multiqc.report_name": "test.bwa_aln_pe.chrY_chrM.multiqc" -} \ No newline at end of file diff --git a/tests/tools/input_json/multiqc_empty.json b/tests/tools/input_json/multiqc_empty.json deleted file mode 100644 index f2fbc2d6c..000000000 --- a/tests/tools/input_json/multiqc_empty.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "multiqc.files": [], - "multiqc.report_name": "empty" -} \ No newline at end of file diff --git a/tests/tools/input_json/ngsderive_encoding.json b/tests/tools/input_json/ngsderive_encoding.json deleted file mode 100644 index 3b4d25fd5..000000000 --- a/tests/tools/input_json/ngsderive_encoding.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "encoding.ngs_files": [ - "../../input/test.bwa_aln_pe.chrY_chrM.bam" - ], - "encoding.outfile_name": "test.bwa_aln_pe.chrY_chrM.encoding.tsv" -} \ No newline at end of file diff --git a/tests/tools/input_json/picard_merge_sam_files.json b/tests/tools/input_json/picard_merge_sam_files.json deleted file mode 100644 index 59448d62d..000000000 --- a/tests/tools/input_json/picard_merge_sam_files.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "merge_sam_files.bams": [ - "../../input/test.bwa_aln_pe.chrY_chrM.bam", - "../../input/test.PE.2_RGs.Aligned.out.sorted.bam" - ], - "merge_sam_files.prefix": "test.merged" -} \ No newline at end of file diff --git a/tests/tools/input_json/picard_merge_vcfs.json b/tests/tools/input_json/picard_merge_vcfs.json deleted file mode 100644 index 729faa020..000000000 --- a/tests/tools/input_json/picard_merge_vcfs.json +++ /dev/null @@ -1,11 +0,0 @@ -{ - "merge_vcfs.vcfs": [ - "../../input/test1.vcf.gz", - "../../input/test2.vcf.gz" - ], - "merge_vcfs.vcfs_indexes": [ - "../../input/test1.vcf.gz.tbi", - "../../input/test2.vcf.gz.tbi" - ], - "merge_vcfs.output_vcf_name": "test.vcf.gz" -} \ No newline at end of file diff --git a/tests/tools/input_json/sambamba_merge.json b/tests/tools/input_json/sambamba_merge.json deleted file mode 100644 index 2b8c66b13..000000000 --- a/tests/tools/input_json/sambamba_merge.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "merge.bams": [ - "../../input/test.bwa_aln_pe.chrY_chrM.bam", - "../../input/test.PE.2_RGs.Aligned.out.sorted.bam" - ], - "merge.prefix": "test.merged" -} \ No newline at end of file diff --git a/tests/tools/input_json/samtools_merge.json b/tests/tools/input_json/samtools_merge.json deleted file mode 100644 index 2b8c66b13..000000000 --- a/tests/tools/input_json/samtools_merge.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "merge.bams": [ - "../../input/test.bwa_aln_pe.chrY_chrM.bam", - "../../input/test.PE.2_RGs.Aligned.out.sorted.bam" - ], - "merge.prefix": "test.merged" -} \ No newline at end of file diff --git a/tests/tools/input_json/samtools_subsample.json b/tests/tools/input_json/samtools_subsample.json deleted file mode 100644 index 05e5fe4b5..000000000 --- a/tests/tools/input_json/samtools_subsample.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "subsample.bam": "https://github.com/stjude/CICERO/raw/master/test/data/input/test.bam", - "subsample.desired_reads": 100 -} \ No newline at end of file diff --git a/tests/tools/input_json/star_alignment_PE_1_pair.json b/tests/tools/input_json/star_alignment_PE_1_pair.json deleted file mode 100644 index befd948bd..000000000 --- a/tests/tools/input_json/star_alignment_PE_1_pair.json +++ /dev/null @@ -1,13 +0,0 @@ -{ - "alignment.read_one_fastqs_gz": [ - "../../input/test_R1.fq.gz" - ], - "alignment.read_two_fastqs_gz": [ - "../../input/test_R2.fq.gz" - ], - "alignment.star_db_tar_gz": "../../input/star_db.chrY_chrM.tar.gz", - "alignment.prefix": "test", - "alignment.read_groups": [ - "ID:test SM:test PL:ILLUMINA" - ] -} \ No newline at end of file diff --git a/tests/tools/input_json/star_alignment_PE_2_pairs.json b/tests/tools/input_json/star_alignment_PE_2_pairs.json deleted file mode 100644 index 45e479445..000000000 --- a/tests/tools/input_json/star_alignment_PE_2_pairs.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - "alignment.read_one_fastqs_gz": [ - "../../input/test_R1.fq.gz", - "../../input/random10k.r1.fq.gz" - ], - "alignment.read_two_fastqs_gz": [ - "../../input/test_R2.fq.gz", - "../../input/random10k.r2.fq.gz" - ], - "alignment.star_db_tar_gz": "../../input/star_db.chrY_chrM.tar.gz", - "alignment.prefix": "test", - "alignment.read_groups": [ - "ID:test SM:test PL:ILLUMINA", - "ID:random" - ] -} \ No newline at end of file diff --git a/tests/tools/input_json/star_alignment_SE.json b/tests/tools/input_json/star_alignment_SE.json deleted file mode 100644 index c30d29281..000000000 --- a/tests/tools/input_json/star_alignment_SE.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "alignment.read_one_fastqs_gz": [ - "../../input/test_R1.fq.gz" - ], - "alignment.prefix": "test", - "alignment.read_groups": [ - "ID:foo" - ], - "alignment.star_db_tar_gz": "../../input/star_db.chrY_chrM.tar.gz" -} \ No newline at end of file diff --git a/tests/tools/input_json/util_add_to_bam_header.json b/tests/tools/input_json/util_add_to_bam_header.json deleted file mode 100644 index 71f5e7960..000000000 --- a/tests/tools/input_json/util_add_to_bam_header.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "add_to_bam_header.bam": "../../input/test.bwa_aln_pe.chrY_chrM.bam", - "add_to_bam_header.additional_header": "@RG\tID:3" -} \ No newline at end of file diff --git a/tests/tools/input_json/util_download.json b/tests/tools/input_json/util_download.json deleted file mode 100644 index 16dde86b1..000000000 --- a/tests/tools/input_json/util_download.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "download.url": "https://raw.githubusercontent.com/stjudecloud/workflows/main/LICENSE.md", - "download.outfile_name": "license.txt", - "download.md5sum": "cf3575bd84ab3151c7e9700b5f1a9746", - "download.disk_size_gb": 1 -} \ No newline at end of file diff --git a/tests/tools/input_json/util_split_string.json b/tests/tools/input_json/util_split_string.json deleted file mode 100644 index 68e3d245b..000000000 --- a/tests/tools/input_json/util_split_string.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "split_string.string": "rg1 , rg2", - "split_string.delimiter": " , " -} \ No newline at end of file diff --git a/tests/tools/test_arriba.yaml b/tests/tools/test_arriba.yaml deleted file mode 100644 index 883a9a9c7..000000000 --- a/tests/tools/test_arriba.yaml +++ /dev/null @@ -1,40 +0,0 @@ -- name: arriba - tags: - - arriba - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t arriba -i tests/tools/input_json/arriba.json tools/arriba.wdl - files: - - path: output/outputs.json - contains: - - fusions.tsv - - fusions.discarded.tsv - -- name: arriba_tsv_to_vcf - tags: - - arriba - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t arriba_tsv_to_vcf tools/arriba.wdl fusions="tests/input/fusions.BCR_ABL1.tsv" reference_fasta="tests/input/GRCh38.chr9_chr22.fa.gz" prefix="fusions" - files: - - path: output/outputs.json - contains: - - fusions.vcf - -- name: arriba_extract_fusion_supporting_alignments - tags: - - arriba - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t arriba_extract_fusion_supporting_alignments tools/arriba.wdl fusions="tests/input/fusions.BCR_ABL1.tsv" bam="tests/input/Aligned.sortedByCoord.chr9_chr22.bam" bam_index="tests/input/Aligned.sortedByCoord.chr9_chr22.bam.bai" prefix="fusions" - files: - - path: output/outputs.json - contains: - - fusions_1.bam - -- name: arriba_annotate_exon_numbers - tags: - - arriba - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t arriba_annotate_exon_numbers tools/arriba.wdl fusions="tests/input/fusions.BCR_ABL1.tsv" gtf="tests/input/gencode.v31.chr9_chr22.gtf.gz" prefix="fusions" - files: - - path: output/outputs.json - contains: - - fusions.annotated.tsv diff --git a/tests/tools/test_bwa.yaml b/tests/tools/test_bwa.yaml deleted file mode 100644 index 6c38821ef..000000000 --- a/tests/tools/test_bwa.yaml +++ /dev/null @@ -1,40 +0,0 @@ -- name: bwa_aln - tags: - - bwa - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t bwa_aln -i tests/tools/input_json/bwa_aln.json tools/bwa.wdl - files: - - path: output/outputs.json - contains: - - test.bam - -- name: bwa_aln_pe - tags: - - bwa - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t bwa_aln_pe -i tests/tools/input_json/bwa_aln_pe.json tools/bwa.wdl - files: - - path: output/outputs.json - contains: - - test.bam - -- name: bwa_mem - tags: - - bwa - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t bwa_mem -i tests/tools/input_json/bwa_mem.json tools/bwa.wdl - files: - - path: output/outputs.json - contains: - - test.bam - -- name: build_bwa_db - tags: - - bwa - - reference - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t build_bwa_db tools/bwa.wdl reference_fasta="tests/input/GRCh38.chrY_chrM.fa" - files: - - path: output/outputs.json - contains: - - bwa_db.tar.gz diff --git a/tests/tools/test_deeptools.yaml b/tests/tools/test_deeptools.yaml deleted file mode 100644 index 64c671edf..000000000 --- a/tests/tools/test_deeptools.yaml +++ /dev/null @@ -1,9 +0,0 @@ -- name: deeptools_bam_coverage - tags: - - deeptools - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t bam_coverage tools/deeptools.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" bam_index="tests/input/test.bwa_aln_pe.chrY_chrM.bam.bai" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.bw diff --git a/tests/tools/test_fastp.yaml b/tests/tools/test_fastp.yaml deleted file mode 100644 index 006db1087..000000000 --- a/tests/tools/test_fastp.yaml +++ /dev/null @@ -1,12 +0,0 @@ -- name: fastp - tags: - - fastp - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t fastp tools/fastp.wdl read_one_fastq="tests/input/test_R1.fq.gz" read_two_fastq="tests/input/test_R2.fq.gz" - files: - - path: output/outputs.json - contains: - - test.trimmed.R1.fastq.gz - - test.trimmed.R2.fastq.gz - - test.trimmed.fastp.html - - test.trimmed.fastp.json \ No newline at end of file diff --git a/tests/tools/test_fastqc.yaml b/tests/tools/test_fastqc.yaml deleted file mode 100644 index 2be254308..000000000 --- a/tests/tools/test_fastqc.yaml +++ /dev/null @@ -1,10 +0,0 @@ -- name: fastqc - tags: - - fastqc - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t fastqc tools/fastqc.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM_fastqc.zip - - test.bwa_aln_pe.chrY_chrM.fastqc_results.tar.gz diff --git a/tests/tools/test_fq.yaml b/tests/tools/test_fq.yaml deleted file mode 100644 index dae76f1bc..000000000 --- a/tests/tools/test_fq.yaml +++ /dev/null @@ -1,27 +0,0 @@ -- name: fqlint - tags: - - fq - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t fqlint tools/fq.wdl read_one_fastq=tests/input/test_R1.fq.gz read_two_fastq=tests/input/test_R2.fq.gz - -- name: subsample_fq - tags: - - fq - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t subsample tools/fq.wdl read_one_fastq=tests/input/test_R1.fq.gz read_two_fastq=tests/input/test_R2.fq.gz record_count=1000 - files: - - path: output/outputs.json - contains: - - test.R1.subsampled.fastq.gz - - test.R2.subsampled.fastq.gz - -- name: subsample_fq_percentage - tags: - - fq - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t subsample tools/fq.wdl read_one_fastq=tests/input/test_R1.fq.gz read_two_fastq=tests/input/test_R2.fq.gz probability=0.01 - files: - - path: output/outputs.json - contains: - - test.R1.subsampled.fastq.gz - - test.R2.subsampled.fastq.gz diff --git a/tests/tools/test_gatk4.yaml b/tests/tools/test_gatk4.yaml deleted file mode 100644 index 200a7f33e..000000000 --- a/tests/tools/test_gatk4.yaml +++ /dev/null @@ -1,65 +0,0 @@ -- name: gatk4_split_n_cigar_reads - tags: - - gatk4 - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t split_n_cigar_reads -i tests/tools/input_json/gatk4_split_n_cigar_reads.json tools/gatk4.wdl - files: - - path: output/outputs.json - contains: - - split.bam - - split.bam.bai - - split.bam.md5 - -- name: gatk4_base_recalibrator - tags: - - gatk4 - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t base_recalibrator -i tests/tools/input_json/gatk4_base_recalibrator.json tools/gatk4.wdl - files: - - path: output/outputs.json - contains: - - test_rnaseq_variant.recal.txt - -- name: gatk4_apply_bqsr - tags: - - gatk4 - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t apply_bqsr -i tests/tools/input_json/gatk4_apply_bqsr.json tools/gatk4.wdl - files: - - path: output/outputs.json - contains: - - test_rnaseq_variant.bqsr.bam - - test_rnaseq_variant.bqsr.bam.bai - -- name: gatk4_haplotype_caller - tags: - - gatk4 - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t haplotype_caller -i tests/tools/input_json/gatk4_haplotype_caller.json tools/gatk4.wdl - files: - - path: output/outputs.json - contains: - - test_rnaseq_variant.vcf.gz - -- name: gatk4_variant_filtration - tags: - - gatk4 - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t variant_filtration -i tests/tools/input_json/gatk4_variant_filtration.json tools/gatk4.wdl - files: - - path: output/outputs.json - contains: - - test1.filtered.vcf.gz - - test1.filtered.vcf.gz.tbi - -- name: gatk4_mark_duplicates_spark - tags: - - gatk4 - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t mark_duplicates_spark tools/gatk4.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.MarkDuplicates.bam - - test.bwa_aln_pe.chrY_chrM.MarkDuplicates.bam.bai - - test.bwa_aln_pe.chrY_chrM.MarkDuplicates.metrics.txt \ No newline at end of file diff --git a/tests/tools/test_htseq.yaml b/tests/tools/test_htseq.yaml deleted file mode 100644 index f35b7adbd..000000000 --- a/tests/tools/test_htseq.yaml +++ /dev/null @@ -1,19 +0,0 @@ -- name: htseq_count - tags: - - htseq - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t count tools/htseq.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" gtf="tests/input/gencode.v31.chrY_chrM.gtf.gz" strandedness="no" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.feature-counts.txt - -- name: calc_tpm - tags: - - htseq - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t calc_tpm -i tests/tools/input_json/calc_tpm.json tools/htseq.wdl - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.TPM.txt diff --git a/tests/tools/test_kraken2.yaml b/tests/tools/test_kraken2.yaml deleted file mode 100644 index 22564eaf6..000000000 --- a/tests/tools/test_kraken2.yaml +++ /dev/null @@ -1,55 +0,0 @@ -- name: download_taxonomy - tags: - - kraken - - reference - - slow - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t download_taxonomy tools/kraken2.wdl - files: - - path: output/outputs.json - contains: - - kraken2_taxonomy.tar.gz - -- name: download_library - tags: - - kraken - - reference - - slow - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t download_library tools/kraken2.wdl library_name='fungi' - files: - - path: output/outputs.json - contains: - - kraken2_fungi_library.tar.gz - -- name: create_library_from_fastas - tags: - - kraken - - reference - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t create_library_from_fastas -i tests/tools/input_json/kraken_create_library_from_fastas.json tools/kraken2.wdl - files: - - path: output/outputs.json - contains: - - kraken2_custom_library.tar.gz - -- name: build_db - tags: - - kraken - - reference - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t build_db -i tests/tools/input_json/kraken_build_db.json tools/kraken2.wdl - files: - - path: output/outputs.json - contains: - - kraken2_db.tar.gz - -- name: kraken - tags: - - kraken - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t kraken tools/kraken2.wdl read_one_fastq_gz="tests/input/test_R1.fq.gz" read_two_fastq_gz="tests/input/test_R2.fq.gz" db="tests/input/kraken2_db.mini.tar.gz" - files: - - path: output/outputs.json - contains: - - test.kraken2.txt diff --git a/tests/tools/test_librarian.yaml b/tests/tools/test_librarian.yaml deleted file mode 100644 index a0033514e..000000000 --- a/tests/tools/test_librarian.yaml +++ /dev/null @@ -1,10 +0,0 @@ -- name: librarian - tags: - - librarian - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t librarian tools/librarian.wdl read_one_fastq="tests/input/test_R1.fq.gz" - files: - - path: output/outputs.json - contains: - - test.librarian.tar.gz - - librarian_heatmap.txt diff --git a/tests/tools/test_md5sum.yaml b/tests/tools/test_md5sum.yaml deleted file mode 100644 index a1199b897..000000000 --- a/tests/tools/test_md5sum.yaml +++ /dev/null @@ -1,11 +0,0 @@ -- name: compute_checksum - tags: - - md5sum - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t compute_checksum tools/md5sum.wdl file="tests/input/test.bwa_aln_pe.chrY_chrM.bam" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.bam.md5 - # contains: - # - "77fa2f59b0083202c73b0c80b60b24f6" diff --git a/tests/tools/test_mosdepth.yaml b/tests/tools/test_mosdepth.yaml deleted file mode 100644 index e0b33e21f..000000000 --- a/tests/tools/test_mosdepth.yaml +++ /dev/null @@ -1,10 +0,0 @@ -- name: mosdepth_coverage - tags: - - mosdepth - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t coverage tools/mosdepth.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" bam_index="tests/input/test.bwa_aln_pe.chrY_chrM.bam.bai" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.mosdepth.summary.txt - - test.bwa_aln_pe.chrY_chrM.mosdepth.global.dist.txt diff --git a/tests/tools/test_multiqc.yaml b/tests/tools/test_multiqc.yaml deleted file mode 100644 index c2537df7b..000000000 --- a/tests/tools/test_multiqc.yaml +++ /dev/null @@ -1,20 +0,0 @@ -- name: multiqc - tags: - - multiqc - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t multiqc -i tests/tools/input_json/multiqc.json tools/multiqc.wdl - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.multiqc.html - -- name: multiqc_empty - tags: - - multiqc - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t multiqc -i tests/tools/input_json/multiqc_empty.json tools/multiqc.wdl - exit_code: 1 - stderr: - contains: - - "No analysis results found" - - "MultiQC didn't find any valid files" \ No newline at end of file diff --git a/tests/tools/test_ngsderive.yaml b/tests/tools/test_ngsderive.yaml deleted file mode 100644 index b94534d5e..000000000 --- a/tests/tools/test_ngsderive.yaml +++ /dev/null @@ -1,68 +0,0 @@ -- name: strandedness - tags: - - ngsderive - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t strandedness tools/ngsderive.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" bam_index="tests/input/test.bwa_aln_pe.chrY_chrM.bam.bai" gene_model="tests/input/gencode.v31.chrY_chrM.gtf.gz" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.strandedness.tsv - - "Unstranded" - -- name: instrument - tags: - - ngsderive - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t instrument tools/ngsderive.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.instrument.tsv - - "multiple instruments" - # - "unknown confidence" - -- name: read_length - tags: - - ngsderive - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t read_length tools/ngsderive.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" bam_index="tests/input/test.bwa_aln_pe.chrY_chrM.bam.bai" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.readlength.tsv - # contains: - # - "150=20000" - -- name: encoding - tags: - - ngsderive - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t encoding -i tests/tools/input_json/ngsderive_encoding.json tools/ngsderive.wdl - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.encoding.tsv - # contains: - # - "ASCII range: 74-74" - # - "Illumina 1.3" - -- name: junction_annotation - tags: - - ngsderive - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t junction_annotation tools/ngsderive.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" bam_index="tests/input/test.bwa_aln_pe.chrY_chrM.bam.bai" gene_model="tests/input/gencode.v31.chrY_chrM.gtf.gz" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.junction_summary.tsv - - test.bwa_aln_pe.chrY_chrM.junctions.tsv.gz - -- name: endedness - tags: - - ngsderive - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t endedness tools/ngsderive.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.endedness.tsv diff --git a/tests/tools/test_picard.yaml b/tests/tools/test_picard.yaml deleted file mode 100644 index ca99cab36..000000000 --- a/tests/tools/test_picard.yaml +++ /dev/null @@ -1,163 +0,0 @@ -- name: picard_mark_duplicates - tags: - - picard - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t mark_duplicates tools/picard.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.MarkDuplicates.bam - - test.bwa_aln_pe.chrY_chrM.MarkDuplicates.bam.bai - - test.bwa_aln_pe.chrY_chrM.MarkDuplicates.bam.md5 - - test.bwa_aln_pe.chrY_chrM.MarkDuplicates.metrics.txt - -- name: picard_validate_bam - tags: - - picard - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t validate_bam tools/picard.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.ValidateSamFile.txt - -- name: picard_bam_to_fastq - tags: - - picard - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t bam_to_fastq tools/picard.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.R1.fastq.gz - - test.bwa_aln_pe.chrY_chrM.R2.fastq.gz - -- name: picard_sort - tags: - - picard - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t sort tools/picard.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" memory_gb=16 - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.sorted.bam - - test.bwa_aln_pe.chrY_chrM.sorted.bam.bai - -- name: picard_sort_queryname - tags: - - picard - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t sort tools/picard.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" memory_gb=16 sort_order="queryname" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.sorted.bam - must_not_contain: - - test.bwa_aln_pe.chrY_chrM.sorted.bam.bai - -- name: picard_merge_sam_files - tags: - - picard - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t merge_sam_files -i tests/tools/input_json/picard_merge_sam_files.json tools/picard.wdl - files: - - path: output/outputs.json - contains: - - test.merged.bam - -- name: picard_clean_sam - tags: - - picard - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t clean_sam tools/picard.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.cleaned.bam - -- name: picard_collect_wgs_metrics - tags: - - picard - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t collect_wgs_metrics tools/picard.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" reference_fasta="tests/input/GRCh38.chrY_chrM.fa" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.CollectWgsMetrics.txt - -- name: picard_collect_alignment_summary_metrics - tags: - - picard - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t collect_alignment_summary_metrics tools/picard.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.CollectAlignmentSummaryMetrics.txt - - test.bwa_aln_pe.chrY_chrM.CollectAlignmentSummaryMetrics.pdf - -- name: picard_collect_gc_bias_metrics - tags: - - picard - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t collect_gc_bias_metrics tools/picard.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" reference_fasta=tests/input/GRCh38.chrY_chrM.fa - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.CollectGcBiasMetrics.txt - - test.bwa_aln_pe.chrY_chrM.CollectGcBiasMetrics.summary.txt - - test.bwa_aln_pe.chrY_chrM.CollectGcBiasMetrics.pdf - -- name: picard_collect_insert_size_metrics - tags: - - picard - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t collect_insert_size_metrics tools/picard.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.CollectInsertSizeMetrics.txt - - test.bwa_aln_pe.chrY_chrM.CollectInsertSizeMetrics.pdf - -- name: picard_quality_score_distribution - tags: - - picard - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t quality_score_distribution tools/picard.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.QualityScoreDistribution.txt - - test.bwa_aln_pe.chrY_chrM.QualityScoreDistribution.pdf - -- name: picard_merge_vcfs - tags: - - picard - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t merge_vcfs -i tests/tools/input_json/picard_merge_vcfs.json tools/picard.wdl - files: - - path: output/outputs.json - contains: - - test.vcf.gz - -- name: picard_scatter_interval_list - tags: - - picard - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t scatter_interval_list tools/picard.wdl interval_list="tests/input/wgs_calling_regions.hg38.interval_list" scatter_count=3 - files: - - path: output/outputs.json - contains: - - 1scattered.interval_list - - 2scattered.interval_list - - 3scattered.interval_list - -- name: picard_create_sequence_dictionary - tags: - - picard - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t create_sequence_dictionary tools/picard.wdl fasta="tests/input/GRCh38.chrY_chrM.fa" outfile_name="GRCh38.chrY_chrM.dict" - files: - - path: output/outputs.json - contains: - - GRCh38.chrY_chrM.dict \ No newline at end of file diff --git a/tests/tools/test_qualimap.yaml b/tests/tools/test_qualimap.yaml deleted file mode 100644 index 405b31517..000000000 --- a/tests/tools/test_qualimap.yaml +++ /dev/null @@ -1,11 +0,0 @@ -- name: qualimap_rnaseq - tags: - - qualimap - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t rnaseq tools/qualimap.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" gtf="tests/input/gencode.v31.chrY_chrM.gtf.gz" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.qualimap_rnaseq_results.tar.gz - - rnaseq_qc_results.txt - - coverage_profile_along_genes_(total).txt diff --git a/tests/tools/test_sambamba.yaml b/tests/tools/test_sambamba.yaml deleted file mode 100644 index 4a1464a2b..000000000 --- a/tests/tools/test_sambamba.yaml +++ /dev/null @@ -1,51 +0,0 @@ -- name: sambamba_index - tags: - - sambamba - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t index tools/sambamba.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.bam.bai - -- name: sambamba_merge - tags: - - sambamba - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t merge -i tests/tools/input_json/sambamba_merge.json tools/sambamba.wdl - files: - - path: output/outputs.json - contains: - - test.merged.bam - -- name: sambamba_sort - tags: - - sambamba - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t sort tools/sambamba.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.sorted.bam - -- name: sambamba_flagstat - tags: - - sambamba - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t flagstat tools/sambamba.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.flagstat.txt - -- name: sambamba_markdup - tags: - - sambamba - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t markdup tools/sambamba.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.markdup.bam - - test.bwa_aln_pe.chrY_chrM.markdup.bam.bai - - test.bwa_aln_pe.chrY_chrM.markdup_log.txt \ No newline at end of file diff --git a/tests/tools/test_samtools.py b/tests/tools/test_samtools.py deleted file mode 100644 index 272ed7b76..000000000 --- a/tests/tools/test_samtools.py +++ /dev/null @@ -1,53 +0,0 @@ -"""TODO: rewrite this so it is runner agnostic""" - -# import pytest -# import pathlib -# from collections import OrderedDict - -# import pysam -# import fastq - - -# @pytest.mark.workflow('samtools_split') -# def test_samtools_split(workflow_dir): -# bam = pathlib.Path(workflow_dir, 'test-output/out/split_bams/0/test.1.bam') -# samfile = pysam.AlignmentFile(bam, "rb") -# bam_header = OrderedDict((k, v) for k, v in samfile.header.items()) -# read_groups = [read_group['ID'] for read_group in bam_header.get('RG', []) if 'ID' in read_group] -# assert len(read_groups) == 1 -# assert read_groups[0] == "1" - -# second_bam = pathlib.Path(workflow_dir, 'test-output/out/split_bams/1/test.2.bam') -# second_samfile = pysam.AlignmentFile(second_bam, "rb") -# second_bam_header = OrderedDict((k, v) for k, v in second_samfile.header.items()) -# second_read_groups = [read_group['ID'] for read_group in second_bam_header.get('RG', []) if 'ID' in read_group] -# assert len(second_read_groups) == 1 -# assert second_read_groups[0] == "2" - -# @pytest.mark.workflow('samtools_merge') -# def test_samtools_merge(workflow_dir): -# bam = pathlib.Path(workflow_dir, 'test-output/out/merged_bam/test.bam') -# samfile = pysam.AlignmentFile(bam, "rb") -# bam_header = OrderedDict((k, v) for k, v in samfile.header.items()) -# read_groups = [read_group['ID'] for read_group in bam_header.get('RG', []) if 'ID' in read_group] -# assert len(read_groups) == 2 -# assert read_groups[0] == "test2" -# assert read_groups[1] == "test.bwa_aln_pe.chrY_chrM" - -# @pytest.mark.workflow('samtools_collate', 'samtools_collate_to_fastq') -# def test_samtools_collate(workflow_dir): -# bam = pathlib.Path(workflow_dir, 'test-output/out/collated_bam/test.bwa_aln_pe.chrY_chrM.collated.bam') -# samfile = pysam.AlignmentFile(bam, "rb") - -# reads = list(samfile.fetch(until_eof=True)) -# for c in range(0, 100, 2): -# assert reads[c].query_name == reads[c+1].query_name -# assert reads[c].is_read1 != reads[c+1].is_read1 - -# @pytest.mark.workflow('samtools_bam_to_fastq', 'samtools_collate_to_fastq') -# def test_samtools_bam_to_fastq(workflow_dir): -# fq1 = fastq.read(pathlib.Path(workflow_dir, 'test-output/out/read_one_fastq_gz/test.bwa_aln_pe.chrY_chrM.R1.fastq.gz')) -# fq2 = fastq.read(pathlib.Path(workflow_dir, 'test-output/out/read_two_fastq_gz/test.bwa_aln_pe.chrY_chrM.R2.fastq.gz')) - -# for r1, r2 in zip(fq1, fq2): -# assert r1.head.removesuffix("/1") == r2.head.removesuffix("/2") diff --git a/tests/tools/test_samtools.yaml b/tests/tools/test_samtools.yaml deleted file mode 100644 index 4d08c8583..000000000 --- a/tests/tools/test_samtools.yaml +++ /dev/null @@ -1,125 +0,0 @@ -- name: samtools_quickcheck - tags: - - samtools - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t quickcheck tools/samtools.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" - -- name: samtools_split - tags: - - samtools - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t split tools/samtools.wdl bam="tests/input/test.bam" - files: - - path: output/outputs.json - contains: - - test.1.bam - - test.2.bam - -- name: samtools_split_unaccounted - tags: - - samtools - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t split tools/samtools.wdl bam="tests/input/test.unaccounted_read.bam" - exit_code: 1 - stderr: - contains: - - "There are reads present with bad or missing RG tags!" - -- name: samtools_split_extra_RG - tags: - - samtools - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t split tools/samtools.wdl bam="tests/input/test.extra_RG.bam" - exit_code: 1 - stderr: - contains: - - "No reads are in output BAM test.extra_RG.no_match.bam!" - - "This is likely caused by malformed RG records." - -- name: samtools_flagstat - tags: - - samtools - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t flagstat tools/samtools.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.flagstat.txt - # contains: - # - "20000" - # - "0 + 0 secondary" - # - "20000 + 0 in total (QC-passed reads + QC-failed reads)" - # - "10000 + 0 read1" - # - "10000 + 0 read2" - -- name: samtools_index - tags: - - samtools - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t index tools/samtools.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.bam.bai - -- name: samtools_subsample - tags: - - samtools - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t subsample -i tests/tools/input_json/samtools_subsample.json tools/samtools.wdl - files: - - path: output/outputs.json - contains: - - test.sampled.bam - -- name: samtools_merge - tags: - - samtools - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t merge -i tests/tools/input_json/samtools_merge.json tools/samtools.wdl - files: - - path: output/outputs.json - contains: - - test.merged.bam - -- name: samtools_addreplacerg - tags: - - samtools - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t addreplacerg tools/samtools.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" read_group_id="test" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.addreplacerg.bam - -- name: samtools_collate - tags: - - samtools - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t collate tools/samtools.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.collated.bam - -- name: samtools_bam_to_fastq - tags: - - samtools - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t bam_to_fastq tools/samtools.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" retain_collated_bam=true - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.collated.bam - - test.bwa_aln_pe.chrY_chrM.R1.fastq.gz - - test.bwa_aln_pe.chrY_chrM.R2.fastq.gz - -- name: samtools_faidx - tags: - - samtools - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t faidx tools/samtools.wdl fasta="tests/input/test.fa" - files: - - path: output/outputs.json - contains: - - test.fa.fai diff --git a/tests/tools/test_star.yaml b/tests/tools/test_star.yaml deleted file mode 100644 index 159f217e7..000000000 --- a/tests/tools/test_star.yaml +++ /dev/null @@ -1,43 +0,0 @@ -- name: build_star_db - tags: - - star - - reference - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t build_star_db tools/star.wdl reference_fasta=tests/input/GRCh38.chrY_chrM.fa gtf=tests/input/gencode.v31.chrY_chrM.gtf.gz - files: - - path: output/outputs.json - contains: - - star_db.tar.gz - -- name: star_alignment_PE_1_pair - tags: - - star - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t alignment -i tests/tools/input_json/star_alignment_PE_1_pair.json tools/star.wdl - files: - - path: output/outputs.json - contains: - - test.Log.final.out - - test.Aligned.out.bam - -- name: star_alignment_PE_2_pairs - tags: - - star - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t alignment -i tests/tools/input_json/star_alignment_PE_2_pairs.json tools/star.wdl - files: - - path: output/outputs.json - contains: - - test.Log.final.out - - test.Aligned.out.bam - -- name: star_alignment_SE - tags: - - star - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t alignment -i tests/tools/input_json/star_alignment_SE.json tools/star.wdl - files: - - path: output/outputs.json - contains: - - test.Log.final.out - - test.Aligned.out.bam \ No newline at end of file diff --git a/tests/tools/test_util.yaml b/tests/tools/test_util.yaml deleted file mode 100644 index 695da9859..000000000 --- a/tests/tools/test_util.yaml +++ /dev/null @@ -1,94 +0,0 @@ -- name: download - tags: - - util - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -i tests/tools/input_json/util_download.json -t download tools/util.wdl - files: - - path: output/outputs.json - contains: - - license.txt - # md5sum: cf3575bd84ab3151c7e9700b5f1a9746 - # contains: - # - "MIT License" - -- name: split_string - tags: - - util - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t split_string -i tests/tools/input_json/util_split_string.json tools/util.wdl - stdout: - contains: - - "rg1" - - "rg2" - must_not_contain: - - "rg1 , rg2" - -- name: calc_feature_lengths - tags: - - util - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t calc_feature_lengths tools/util.wdl gtf="tests/input/gencode.v31.chrY_chrM.gtf.gz" - files: - - path: output/outputs.json - contains: - - gencode.v31.chrY_chrM.genelengths.txt - # contains: - # - "AL954722.1" - -- name: compression_integrity - tags: - - util - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t compression_integrity tools/util.wdl bgzipped_file="tests/input/test.bwa_aln_pe.chrY_chrM.bam" - -# TODO: This does not test that the record was properly added to the header. -- name: add_to_bam_header - tags: - - util - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t add_to_bam_header -i tests/tools/input_json/util_add_to_bam_header.json tools/util.wdl - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.reheader.bam - -- name: unpack_tarball - tags: - - util - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t unpack_tarball tools/util.wdl tarball="tests/input/test.tar.gz" - files: - - path: output/outputs.json - contains: - - test_file_a - - test_file_b - -- name: make_coverage_regions_bed - tags: - - util - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t make_coverage_regions_bed tools/util.wdl gtf="tests/input/gencode.v31.chrY_chrM.gtf.gz" feature_type="exon" - files: - - path: output/outputs.json - contains: - - gencode.v31.chrY_chrM.exon.bed - -- name: global_phred_scores - tags: - - util - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t global_phred_scores tools/util.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" - files: - - path: output/outputs.json - contains: - - test.bwa_aln_pe.chrY_chrM.global_PHRED_scores.tsv - -- name: split_fastq - tags: - - util - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t split_fastq tools/util.wdl fastq="tests/input/test_R1.fq.gz" prefix="test.R1." - files: - - path: output/outputs.json - contains: - - test.R1.000000.fastq.gz \ No newline at end of file diff --git a/tests/workflows/_test_methylation-preprocess.yaml b/tests/workflows/_test_methylation-preprocess.yaml deleted file mode 100644 index 3bfe379b8..000000000 --- a/tests/workflows/_test_methylation-preprocess.yaml +++ /dev/null @@ -1,15 +0,0 @@ -- name: process_raw_idats - tags: - - methylation - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t process_raw_idats -i tests/workflows/input_json/process_raw_idats.json workflows/methylation/methylation-preprocess.wdl - files: - - path: output/outputs.json - contains: - - 201533520001_R03C01.beta_swan_norm_unfiltered.csv - - 201533520001_R03C01.beta_swan_norm_unfiltered.genomic.csv - - 201533520001_R03C01.annotation.csv - - 201533520001_R03C01.beta.csv - - 201533520001_R03C01.cn_values.csv - - 201533520001_R03C01.m_values.csv - - 201533520001_R03C01.probeNames.csv \ No newline at end of file diff --git a/tests/workflows/input_json/combine_data.json b/tests/workflows/input_json/combine_data.json deleted file mode 100644 index ec1edd324..000000000 --- a/tests/workflows/input_json/combine_data.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "combine_data.files_to_combine": [ - "../../input/201533520001_R01C01.beta_swan_norm_unfiltered.genomic.csv", - "../../input/201533520001_R03C01.beta_swan_norm_unfiltered.genomic.csv" - ], - "combine_data.combined_file_name": "combined_beta.csv" -} \ No newline at end of file diff --git a/tests/workflows/input_json/dnaseq-standard-fastq.json b/tests/workflows/input_json/dnaseq-standard-fastq.json deleted file mode 100644 index f1db97b82..000000000 --- a/tests/workflows/input_json/dnaseq-standard-fastq.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "dnaseq_standard_fastq_experimental.read_one_fastqs_gz": [ - "../../input/test_R1.fq.gz" - ], - "dnaseq_standard_fastq_experimental.read_two_fastqs_gz": [ - "../../input/test_R2.fq.gz" - ], - "dnaseq_standard_fastq_experimental.read_groups": [ - { - "ID": "test", - "PI": 150, - "PL": "ILLUMINA", - "SM": "Sample", - "LB": "Sample" - } - ], - "dnaseq_standard_fastq_experimental.bwa_db": "../../input/GRCh38.chrY_chrM.bwa_db.tar.gz" -} \ No newline at end of file diff --git a/tests/workflows/input_json/dnaseq-standard.json b/tests/workflows/input_json/dnaseq-standard.json deleted file mode 100644 index 06e8d8cac..000000000 --- a/tests/workflows/input_json/dnaseq-standard.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "dnaseq_standard_experimental.bam": "../../input/test.bwa_aln_pe.chrY_chrM.bam", - "dnaseq_standard_experimental.bwa_db": "../../input/GRCh38.chrY_chrM.bwa_db.tar.gz" -} \ No newline at end of file diff --git a/tests/workflows/input_json/filter_probes.json b/tests/workflows/input_json/filter_probes.json deleted file mode 100644 index 8b7d0a024..000000000 --- a/tests/workflows/input_json/filter_probes.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "filter_probes.beta_values": "../../input/combined_beta.csv", - "filter_probes.num_probes": 1000 -} \ No newline at end of file diff --git a/tests/workflows/input_json/generate_umap.json b/tests/workflows/input_json/generate_umap.json deleted file mode 100644 index 670c02298..000000000 --- a/tests/workflows/input_json/generate_umap.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "generate_umap.filtered_beta_values": "../../input/filtered_beta.csv" -} \ No newline at end of file diff --git a/tests/workflows/input_json/plot_umap.json b/tests/workflows/input_json/plot_umap.json deleted file mode 100644 index 86444c490..000000000 --- a/tests/workflows/input_json/plot_umap.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "plot_umap.umap": "../../input/umap.csv" -} \ No newline at end of file diff --git a/tests/workflows/input_json/process_raw_idats.json b/tests/workflows/input_json/process_raw_idats.json deleted file mode 100644 index b1dbbbc77..000000000 --- a/tests/workflows/input_json/process_raw_idats.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "process_raw_idats.idats": { - "left": "../../input/201533520001_R03C01_Grn.idat", - "right": "../../input/201533520001_R03C01_Red.idat" - } -} \ No newline at end of file diff --git a/tests/workflows/input_json/qc-standard.json b/tests/workflows/input_json/qc-standard.json deleted file mode 100644 index 0f552166e..000000000 --- a/tests/workflows/input_json/qc-standard.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "quality_check_standard.bam": "../../input/test.bwa_aln_pe.chrY_chrM.bam", - "quality_check_standard.bam_index": "../../input/test.bwa_aln_pe.chrY_chrM.bam.bai", - "quality_check_standard.gtf": "../../input/gencode.v31.chrY_chrM.gtf.gz", - "quality_check_standard.kraken_db": "../../input/kraken2_db.mini.tar.gz", - "quality_check_standard.rna": true, - "quality_check_standard.coverage_beds": [ - "../../input/gencode.v31.chrY_chrM.gene.bed" - ] -} \ No newline at end of file diff --git a/tests/workflows/input_json/rnaseq-standard-fastq.json b/tests/workflows/input_json/rnaseq-standard-fastq.json deleted file mode 100644 index 452d4c932..000000000 --- a/tests/workflows/input_json/rnaseq-standard-fastq.json +++ /dev/null @@ -1,21 +0,0 @@ -{ - "rnaseq_standard_fastq.read_one_fastqs_gz": [ - "../../input/test_R1.fq.gz" - ], - "rnaseq_standard_fastq.read_two_fastqs_gz": [ - "../../input/test_R2.fq.gz" - ], - "rnaseq_standard_fastq.read_groups": [ - { - "ID": "test", - "PI": 150, - "PL": "ILLUMINA", - "SM": "Sample", - "LB": "Sample" - } - ], - "rnaseq_standard_fastq.prefix": "test", - "rnaseq_standard_fastq.gtf": "../../input/gencode.v31.chrY_chrM.gtf.gz", - "rnaseq_standard_fastq.star_db": "../../input/star_db.chrY_chrM.tar.gz", - "rnaseq_standard_fastq.strandedness": "Unstranded" -} \ No newline at end of file diff --git a/tests/workflows/input_json/rnaseq-standard.json b/tests/workflows/input_json/rnaseq-standard.json deleted file mode 100644 index a42cbc5a1..000000000 --- a/tests/workflows/input_json/rnaseq-standard.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "rnaseq_standard.bam": "../../input/test.bwa_aln_pe.chrY_chrM.bam", - "rnaseq_standard.gtf": "../../input/gencode.v31.chrY_chrM.gtf.gz", - "rnaseq_standard.star_db": "../../input/star_db.chrY_chrM.tar.gz", - "rnaseq_standard.strandedness": "Unstranded" -} \ No newline at end of file diff --git a/tests/workflows/input_json/rnaseq-variant-calling.json b/tests/workflows/input_json/rnaseq-variant-calling.json deleted file mode 100644 index e108c5298..000000000 --- a/tests/workflows/input_json/rnaseq-variant-calling.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - "rnaseq_variant_calling.bam": "../../input/test.bwa_aln_pe.chrY_chrM.bam", - "rnaseq_variant_calling.bam_index": "../../input/test.bwa_aln_pe.chrY_chrM.bam.bai", - "rnaseq_variant_calling.fasta": "../../input/GRCh38.chrY_chrM.fa", - "rnaseq_variant_calling.fasta_index": "../../input/GRCh38.chrY_chrM.fa.fai", - "rnaseq_variant_calling.dict": "../../input/GRCh38.chrY_chrM.dict", - "rnaseq_variant_calling.calling_interval_list": "../../input/wgs_calling_regions.hg38.interval_list", - "rnaseq_variant_calling.known_vcfs": [ - "../../input/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz" - ], - "rnaseq_variant_calling.known_vcf_indexes": [ - "../../input/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi" - ], - "rnaseq_variant_calling.dbSNP_vcf": "../../input/Homo_sapiens_assembly38.dbsnp138.top5000.vcf", - "rnaseq_variant_calling.dbSNP_vcf_index": "../../input/Homo_sapiens_assembly38.dbsnp138.top5000.vcf.idx" -} \ No newline at end of file diff --git a/tests/workflows/test_methylation-cohort.yaml b/tests/workflows/test_methylation-cohort.yaml deleted file mode 100644 index f981b75a7..000000000 --- a/tests/workflows/test_methylation-cohort.yaml +++ /dev/null @@ -1,40 +0,0 @@ -- name: combine_data - tags: - - methylation - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t combine_data -i tests/workflows/input_json/combine_data.json workflows/methylation/methylation-cohort.wdl - files: - - path: output/outputs.json - contains: - - combined_beta.csv - -- name: filter_probes - tags: - - methylation - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t filter_probes -i tests/workflows/input_json/filter_probes.json workflows/methylation/methylation-cohort.wdl - files: - - path: output/outputs.json - contains: - - filtered.beta.csv - - filtered.probes.csv - -- name: generate_umap - tags: - - methylation - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t generate_umap -i tests/workflows/input_json/generate_umap.json workflows/methylation/methylation-cohort.wdl - files: - - path: output/outputs.json - contains: - - umap.csv - -- name: plot_umap - tags: - - methylation - command: >- - ./developer_scripts/run_sprocket_or_miniwdl.sh -t plot_umap -i tests/workflows/input_json/plot_umap.json workflows/methylation/methylation-cohort.wdl - files: - - path: output/outputs.json - contains: - - umap.png From d2898f06c9751ed306fba91bfe2e3fe3b6738059 Mon Sep 17 00:00:00 2001 From: Ari Frantz Date: Sun, 28 Dec 2025 08:47:05 -0500 Subject: [PATCH 14/47] revise: change validate_read_group logic --- data_structures/read_group.wdl | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/data_structures/read_group.wdl b/data_structures/read_group.wdl index 151f58ac8..719fdb409 100644 --- a/data_structures/read_group.wdl +++ b/data_structures/read_group.wdl @@ -156,15 +156,13 @@ task validate_read_group { input { ReadGroup read_group - Array[String] required_fields = [] + Array[String] required_fields = ["SM"] Boolean restrictive = true } # The SAM spec allows any printable ASCII character in header fields. String sam_spec_pattern = "[\\ -~]+" # We have the opinion that is too permissive for ID and SM. - String id_pattern = "id" - String sample_pattern = "sample.?" String restrictive_pattern = "\\ " # Disallow spaces Array[String] platforms = [ "CAPILLARY", "DNBSEQ", "ELEMENT", "HELICOS", "ILLUMINA", "IONTORRENT", "LS454", @@ -174,11 +172,9 @@ task validate_read_group { command <<< exit_code=0 if ~{restrictive}; then - if [[ ~{read_group.ID} =~ ^~{id_pattern}$ ]] \ - || [[ ~{read_group.ID} =~ ~{restrictive_pattern} ]] + if [[ "~{read_group.ID}" =~ ~{restrictive_pattern} ]] then - >&2 echo "ID (~{read_group.ID}) must not match patterns:" - >&2 echo "'~{id_pattern}' or '~{restrictive_pattern}'" + >&2 echo "ID must not contain spaces" exit_code=1 fi fi @@ -194,11 +190,9 @@ task validate_read_group { fi if ~{defined(read_group.SM)}; then if ~{restrictive}; then - if [[ "~{read_group.SM}" =~ ^~{sample_pattern}$ ]] \ - || [[ "~{read_group.SM}" =~ ~{restrictive_pattern} ]] + if [[ "~{read_group.SM}" =~ ~{restrictive_pattern} ]] then - >&2 echo "SM must not match patterns:" - >&2 echo "'~{sample_pattern}' or '~{restrictive_pattern}'" + >&2 echo "SM must not contain spaces" exit_code=1 fi fi From 6c910ce58f812ce5e512ad4c61286d669aa539f3 Mon Sep 17 00:00:00 2001 From: Ari Frantz Date: Sun, 28 Dec 2025 08:47:21 -0500 Subject: [PATCH 15/47] test data_structures --- data_structures/test/flag_filter.yaml | 22 ++++--- data_structures/test/read_group.yaml | 90 +++++++++++++++++++++++++-- 2 files changed, 98 insertions(+), 14 deletions(-) diff --git a/data_structures/test/flag_filter.yaml b/data_structures/test/flag_filter.yaml index 4da14bcdd..fb6a55927 100644 --- a/data_structures/test/flag_filter.yaml +++ b/data_structures/test/flag_filter.yaml @@ -1,19 +1,23 @@ validate_string_is_12bit_int: - - name: decimal_passes + - name: valid_numbers inputs: number: - "5" - - name: hexadecimal_passes - inputs: - number: - "0x900" + - "01" + - "4095" assertions: stderr: - Input number \(.*\) is valid - - name: too_big_hexadecimal_fails + - name: invalid_numbers inputs: number: - "0x1000" + - "" + - "string" + - this is not a number + - "000000000011" + - "-1" assertions: exit_code: 42 stderr: @@ -38,9 +42,9 @@ validate_flag_filter: - name: invalid_FlagFilter_fails inputs: flags: - - include_if_all: "" - exclude_if_any: this is not a number - include_if_any: "000000000011" - exclude_if_all: "4095" + - include_if_all: "3" + exclude_if_any: "0xF04" + include_if_any: "03" + exclude_if_all: "" # empty string should trigger a fail assertions: should_fail: true diff --git a/data_structures/test/read_group.yaml b/data_structures/test/read_group.yaml index a4367d8af..a5435be74 100644 --- a/data_structures/test/read_group.yaml +++ b/data_structures/test/read_group.yaml @@ -1,11 +1,91 @@ -read_group_to_string: - - name: bad_id +validate_read_group: + - name: valid_read_groups inputs: read_group: - - ID: id, + - ID: id + SM: sample + - ID: R2 + SM: sampleA + LB: spaces are allowed in LB + BC: barcode with a space + PU: platform_unit + PL: ILLUMINA + CN: center_name + DT: date + DS: description + PI: 1 + PG: program_group + PM: platform_model + FO: ACMG + KS: key_sequence + - name: id_with_spaces + inputs: + read_group: + - ID: ids should not have spaces # this is a problem SM: sample_a LB: library + BC: barcode + PU: platform_unit + PL: ILLUMINA + CN: center_name + DT: date + DS: description + PI: 1 + PG: program_group + PM: platform_model + FO: ACMG + KS: key_sequence + assertions: + exit_code: 1 + stderr: + - ID must not contain spaces + - name: sample_with_spaces + inputs: + read_group: + - ID: R123 + SM: samples should not have spaces + LB: library can have spaces though + BC: barcode + PU: platform_unit + PL: ILLUMINA + CN: center_name + DT: date + DS: description + PI: 1 + PG: program_group + PM: platform_model + FO: ACMG + KS: key_sequence + assertions: + exit_code: 1 + stderr: + - SM must not contain spaces + - name: spaces_allowed + inputs: + read_group: + - ID: technically permissable but gross + SM: same here! + restrictive: + - false + - name: missing_sample + inputs: + read_group: + - ID: R123 + LB: library assertions: exit_code: 1 - stdout: - - ID (.*) must not match pattern + stderr: + - SM is required + - name: missing_sample_allowed + inputs: + read_group: + - ID: R1 + LB: lib + required_fields: + - [] + +get_read_groups: + - name: works + inputs: + bam: + - test.bam From 64476ad2a7ed9412dd26ae52e1bde13ae8ba8bc5 Mon Sep 17 00:00:00 2001 From: Ari Frantz Date: Sun, 28 Dec 2025 09:20:28 -0500 Subject: [PATCH 16/47] chore: log TODO --- data_structures/test/read_group.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/data_structures/test/read_group.yaml b/data_structures/test/read_group.yaml index a5435be74..d255bb66b 100644 --- a/data_structures/test/read_group.yaml +++ b/data_structures/test/read_group.yaml @@ -89,3 +89,5 @@ get_read_groups: inputs: bam: - test.bam + +# TODO: test read_group_to_string From c76f2016c009db0999dd8b835c4471c635782a12 Mon Sep 17 00:00:00 2001 From: Ari Frantz Date: Sun, 28 Dec 2025 12:10:33 -0500 Subject: [PATCH 17/47] more tests --- tools/test/arriba.yaml | 42 +++++++++++++++++++++++++++++++++++++++ tools/test/bwa.yaml | 35 ++++++++++++++++++++++++++++++++ tools/test/deeptools.yaml | 7 +++++++ 3 files changed, 84 insertions(+) create mode 100644 tools/test/arriba.yaml create mode 100644 tools/test/bwa.yaml create mode 100644 tools/test/deeptools.yaml diff --git a/tools/test/arriba.yaml b/tools/test/arriba.yaml new file mode 100644 index 000000000..b274dd692 --- /dev/null +++ b/tools/test/arriba.yaml @@ -0,0 +1,42 @@ +arriba: + - name: works + inputs: + bam: + - Aligned.sortedByCoord.chr9_chr22.bam + gtf: + - gencode.v31.chr9_chr22.gtf.gz + reference_fasta_gz: + - GRCh38.chr9_chr22.fa.gz + disable_filters: + - [ blacklist ] + prefix: + - fusions +arriba_tsv_to_vcf: + - name: works + inputs: + fusions: + - fusions.BCR_ABL1.tsv + reference_fasta: + - GRCh38.chr9_chr22.fa.gz + prefix: + - fusions +arriba_extract_fusion_supporting_alignments: + - name: works + inputs: + fusions: + - fusions.BCR_ABL1.tsv + bam: + - Aligned.sortedByCoord.chr9_chr22.bam + bam_index: + - Aligned.sortedByCoord.chr9_chr22.bam.bai + prefix: + - fusions +arriba_annotate_exon_numbers: + - name: works + inputs: + fusions: + - fusions.BCR_ABL1.tsv + gtf: + - gencode.v31.chr9_chr22.gtf.gz + prefix: + - fusions \ No newline at end of file diff --git a/tools/test/bwa.yaml b/tools/test/bwa.yaml new file mode 100644 index 000000000..55004379e --- /dev/null +++ b/tools/test/bwa.yaml @@ -0,0 +1,35 @@ +bwa_aln: + - name: works + inputs: + fastq: + - test_R1.fq.gz + read_group: + - "@RG\\tID:test\\tSM:test" + bwa_db_tar_gz: + - GRCh38.chrY_chrM.bwa_db.tar.gz +bwa_aln_pe: + - name: works + inputs: + read_one_fastq_gz: + - test_R1.fq.gz + read_two_fastq_gz: + - test_R2.fq.gz + read_group: + - "@RG\\tID:test\\tSM:test" + bwa_db_tar_gz: + - GRCh38.chrY_chrM.bwa_db.tar.gz +bwa_mem: + - name: works + inputs: + read_one_fastq_gz: + - test_R1.fq.gz + read_group: + - "@RG\\tID:test\\tSM:test" + bwa_db_tar_gz: + - GRCh38.chrY_chrM.bwa_db.tar.gz +build_bwa_db: + - name: works + tags: [ reference, slow ] + inputs: + reference_fasta: + - GRCh38.chrY_chrM.fa \ No newline at end of file diff --git a/tools/test/deeptools.yaml b/tools/test/deeptools.yaml new file mode 100644 index 000000000..5b71a1a26 --- /dev/null +++ b/tools/test/deeptools.yaml @@ -0,0 +1,7 @@ +bam_coverage: + - name: works + inputs: + bam: + - test.bwa_aln_pe.chrY_chrM.bam + bam_index: + - test.bwa_aln_pe.chrY_chrM.bam.bai \ No newline at end of file From 2fb9d234124ff6cbd3536a83defeede9a6dccc29 Mon Sep 17 00:00:00 2001 From: Ari Frantz Date: Sun, 28 Dec 2025 12:11:51 -0500 Subject: [PATCH 18/47] WIP: remove broken CI elements --- .github/workflows/build-and-test.yaml | 3 -- .github/workflows/pytest.yaml | 56 --------------------------- 2 files changed, 59 deletions(-) delete mode 100644 .github/workflows/pytest.yaml diff --git a/.github/workflows/build-and-test.yaml b/.github/workflows/build-and-test.yaml index 6d042f359..04c5a79bc 100644 --- a/.github/workflows/build-and-test.yaml +++ b/.github/workflows/build-and-test.yaml @@ -7,6 +7,3 @@ jobs: call-build: uses: ./.github/workflows/docker-build.yaml secrets: inherit - call-test: - uses: ./.github/workflows/pytest.yaml - needs: call-build diff --git a/.github/workflows/pytest.yaml b/.github/workflows/pytest.yaml deleted file mode 100644 index 64c0b310b..000000000 --- a/.github/workflows/pytest.yaml +++ /dev/null @@ -1,56 +0,0 @@ -name: pytest-workflow Check - -on: - workflow_call: - -jobs: - list-tags: - runs-on: ubuntu-latest - outputs: - tags: ${{ steps.set-tags.outputs.tags }} - steps: - - name: checkout - uses: actions/checkout@v4 - - name: set tags - id: set-tags - # remove the "reference" tag as it's redundant with other tags - run: echo "tags=$(find tests -name '*.yaml' -exec yq --output-format yaml '.[].tags[] ' {} \; | sort | uniq | grep -vE 'reference|slow' | jq -ncR '[inputs]')" >> $GITHUB_OUTPUT - pytest_check: - needs: list-tags - runs-on: ubuntu-latest - strategy: - matrix: - tag: ${{ fromJson(needs.list-tags.outputs.tags) }} - runner: [sprocket, miniwdl] - fail-fast: false - steps: - - uses: actions/checkout@v4 - with: - lfs: true - - name: Update Rust - if: matrix.runner == 'sprocket' - run: rustup update stable && rustup default stable - - name: Build Sprocket - if: matrix.runner == 'sprocket' - run: | - cargo install sprocket --locked - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: '3.10' - - name: Install miniwdl and pytest-workflow - run: | - python -m pip install --upgrade pip - pip install -r requirements-ci.txt - - name: filter tests - # don't run slow tests in CI - run: | - find tests -name '*.yaml' -exec yq --output-format yaml -i 'del(.[] | select(.tags[] | test("slow") ) )' {} \; - - name: Update containers - run: | - ./developer_scripts/update_container_tags.sh ${GITHUB_REF##*/} - - name: Run pytest-workflow - env: - RUNNER: ${{ matrix.runner }} - run: | - pytest --basetemp /home/runner/work/pytest --tag ${{ matrix.tag }} From d1e0d998d4b084a9de9002f147d3e6b219cbb08d Mon Sep 17 00:00:00 2001 From: Ari Frantz Date: Mon, 29 Dec 2025 07:23:23 -0500 Subject: [PATCH 19/47] add test depth (more bams and fastqs tested) --- tools/test/arriba.yaml | 13 +++++++------ tools/test/bwa.yaml | 17 +++++++++++++---- tools/test/deeptools.yaml | 15 +++++++++++---- 3 files changed, 31 insertions(+), 14 deletions(-) diff --git a/tools/test/arriba.yaml b/tools/test/arriba.yaml index b274dd692..e55e9e9d2 100644 --- a/tools/test/arriba.yaml +++ b/tools/test/arriba.yaml @@ -23,12 +23,13 @@ arriba_tsv_to_vcf: arriba_extract_fusion_supporting_alignments: - name: works inputs: - fusions: - - fusions.BCR_ABL1.tsv - bam: - - Aligned.sortedByCoord.chr9_chr22.bam - bam_index: - - Aligned.sortedByCoord.chr9_chr22.bam.bai + $files: + fusions: + - fusions.BCR_ABL1.tsv + bam: + - Aligned.sortedByCoord.chr9_chr22.bam + bam_index: + - Aligned.sortedByCoord.chr9_chr22.bam.bai prefix: - fusions arriba_annotate_exon_numbers: diff --git a/tools/test/bwa.yaml b/tools/test/bwa.yaml index 55004379e..6dc3b3d9e 100644 --- a/tools/test/bwa.yaml +++ b/tools/test/bwa.yaml @@ -3,6 +3,9 @@ bwa_aln: inputs: fastq: - test_R1.fq.gz + - test_R2.fq.gz + - random10k.r1.fq.gz + - random10k.r2.fq.gz read_group: - "@RG\\tID:test\\tSM:test" bwa_db_tar_gz: @@ -10,10 +13,13 @@ bwa_aln: bwa_aln_pe: - name: works inputs: - read_one_fastq_gz: - - test_R1.fq.gz - read_two_fastq_gz: - - test_R2.fq.gz + $files: + read_one_fastq_gz: + - test_R1.fq.gz + - random10k.r1.fq.gz + read_two_fastq_gz: + - test_R2.fq.gz + - random10k.r2.fq.gz read_group: - "@RG\\tID:test\\tSM:test" bwa_db_tar_gz: @@ -23,6 +29,9 @@ bwa_mem: inputs: read_one_fastq_gz: - test_R1.fq.gz + - test_R2.fq.gz + - random10k.r1.fq.gz + - random10k.r2.fq.gz read_group: - "@RG\\tID:test\\tSM:test" bwa_db_tar_gz: diff --git a/tools/test/deeptools.yaml b/tools/test/deeptools.yaml index 5b71a1a26..cba7fff3a 100644 --- a/tools/test/deeptools.yaml +++ b/tools/test/deeptools.yaml @@ -1,7 +1,14 @@ bam_coverage: - name: works inputs: - bam: - - test.bwa_aln_pe.chrY_chrM.bam - bam_index: - - test.bwa_aln_pe.chrY_chrM.bam.bai \ No newline at end of file + $files: + bam: + - test.bwa_aln_pe.chrY_chrM.bam + - Aligned.sortedByCoord.chr9_chr22.bam + - test_rnaseq_variant.bam + - test.bam + bam_index: + - test.bwa_aln_pe.chrY_chrM.bam.bai + - Aligned.sortedByCoord.chr9_chr22.bam.bai + - test_rnaseq_variant.bam.bai + - test.bam.bai \ No newline at end of file From 51ad295b4907811c5461f3a537e6fc9cd0aac643 Mon Sep 17 00:00:00 2001 From: Ari Frantz Date: Mon, 29 Dec 2025 07:26:17 -0500 Subject: [PATCH 20/47] Update read_group.yaml --- data_structures/test/read_group.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/data_structures/test/read_group.yaml b/data_structures/test/read_group.yaml index d255bb66b..d91c53972 100644 --- a/data_structures/test/read_group.yaml +++ b/data_structures/test/read_group.yaml @@ -88,6 +88,9 @@ get_read_groups: - name: works inputs: bam: + - test.bwa_aln_pe.chrY_chrM.bam + - Aligned.sortedByCoord.chr9_chr22.bam + - test_rnaseq_variant.bam - test.bam # TODO: test read_group_to_string From 2b7d0ddedf9e9f1dae67eaaf5b98f15a78de4168 Mon Sep 17 00:00:00 2001 From: Ari Frantz Date: Mon, 29 Dec 2025 07:40:07 -0500 Subject: [PATCH 21/47] nesting for test fixtures --- data_structures/test/read_group.yaml | 8 ++--- .../Aligned.sortedByCoord.chr9_chr22.bam | 0 .../Aligned.sortedByCoord.chr9_chr22.bam.bai | 0 .../test.PE.2_RGs.Aligned.out.sorted.bam | 0 test/fixtures/{ => bams}/test.bam | 0 test/fixtures/{ => bams}/test.bam.bai | 0 .../{ => bams}/test.bwa_aln_pe.chrY_chrM.bam | 0 .../test.bwa_aln_pe.chrY_chrM.bam.bai | 0 test/fixtures/{ => bams}/test.extra_RG.bam | 0 .../{ => bams}/test.unaccounted_read.bam | 0 test/fixtures/{ => bams}/test2.bam | 0 .../{ => bams}/test_rnaseq_variant.bam | 0 .../{ => bams}/test_rnaseq_variant.bam.bai | 0 test/fixtures/{ => fastqs}/random10k.r1.fq.gz | 0 test/fixtures/{ => fastqs}/random10k.r2.fq.gz | 0 test/fixtures/{ => fastqs}/test_R1.fq.gz | 0 test/fixtures/{ => fastqs}/test_R2.fq.gz | 0 .../{ => reference}/GRCh38.chr1_chr19.dict | 0 .../{ => reference}/GRCh38.chr1_chr19.fa | 0 .../{ => reference}/GRCh38.chr1_chr19.fa.fai | 0 .../{ => reference}/GRCh38.chr9_chr22.fa.gz | 0 .../GRCh38.chrY_chrM.bwa_db.tar.gz | 0 .../{ => reference}/GRCh38.chrY_chrM.dict | 0 .../{ => reference}/GRCh38.chrY_chrM.fa | 0 .../{ => reference}/GRCh38.chrY_chrM.fa.fai | 0 .../gencode.v31.chr9_chr22.gtf.gz | 0 .../gencode.v31.chrY_chrM.gene.bed | 0 .../gencode.v31.chrY_chrM.genelengths.txt | 0 .../gencode.v31.chrY_chrM.gtf.gz | 0 .../kraken2_C_elegans_library.tar.gz | 0 .../{ => reference}/kraken2_db.mini.tar.gz | 0 .../{ => reference}/kraken2_taxonomy.tar.gz | 0 .../{ => reference}/star_db.chrY_chrM.tar.gz | 0 test/fixtures/{ => reference}/test.fa | 0 ...mo_sapiens_assembly38.dbsnp138.top5000.vcf | 0 ...apiens_assembly38.dbsnp138.top5000.vcf.idx | Bin ...and_1000G_gold_standard.indels.hg38.vcf.gz | 0 ...1000G_gold_standard.indels.hg38.vcf.gz.tbi | Bin test/fixtures/{ => vcfs}/test1.vcf.gz | 0 test/fixtures/{ => vcfs}/test1.vcf.gz.tbi | Bin test/fixtures/{ => vcfs}/test2.vcf.gz | 0 test/fixtures/{ => vcfs}/test2.vcf.gz.tbi | Bin tools/test/arriba.yaml | 14 ++++---- tools/test/bwa.yaml | 32 +++++++++--------- tools/test/deeptools.yaml | 16 ++++----- tools/test/picard.yaml | 2 +- tools/test/samtools.yaml | 12 +++---- 47 files changed, 42 insertions(+), 42 deletions(-) rename test/fixtures/{ => bams}/Aligned.sortedByCoord.chr9_chr22.bam (100%) rename test/fixtures/{ => bams}/Aligned.sortedByCoord.chr9_chr22.bam.bai (100%) rename test/fixtures/{ => bams}/test.PE.2_RGs.Aligned.out.sorted.bam (100%) rename test/fixtures/{ => bams}/test.bam (100%) rename test/fixtures/{ => bams}/test.bam.bai (100%) rename test/fixtures/{ => bams}/test.bwa_aln_pe.chrY_chrM.bam (100%) rename test/fixtures/{ => bams}/test.bwa_aln_pe.chrY_chrM.bam.bai (100%) rename test/fixtures/{ => bams}/test.extra_RG.bam (100%) rename test/fixtures/{ => bams}/test.unaccounted_read.bam (100%) rename test/fixtures/{ => bams}/test2.bam (100%) rename test/fixtures/{ => bams}/test_rnaseq_variant.bam (100%) rename test/fixtures/{ => bams}/test_rnaseq_variant.bam.bai (100%) rename test/fixtures/{ => fastqs}/random10k.r1.fq.gz (100%) rename test/fixtures/{ => fastqs}/random10k.r2.fq.gz (100%) rename test/fixtures/{ => fastqs}/test_R1.fq.gz (100%) rename test/fixtures/{ => fastqs}/test_R2.fq.gz (100%) rename test/fixtures/{ => reference}/GRCh38.chr1_chr19.dict (100%) rename test/fixtures/{ => reference}/GRCh38.chr1_chr19.fa (100%) rename test/fixtures/{ => reference}/GRCh38.chr1_chr19.fa.fai (100%) rename test/fixtures/{ => reference}/GRCh38.chr9_chr22.fa.gz (100%) rename test/fixtures/{ => reference}/GRCh38.chrY_chrM.bwa_db.tar.gz (100%) rename test/fixtures/{ => reference}/GRCh38.chrY_chrM.dict (100%) rename test/fixtures/{ => reference}/GRCh38.chrY_chrM.fa (100%) rename test/fixtures/{ => reference}/GRCh38.chrY_chrM.fa.fai (100%) rename test/fixtures/{ => reference}/gencode.v31.chr9_chr22.gtf.gz (100%) rename test/fixtures/{ => reference}/gencode.v31.chrY_chrM.gene.bed (100%) rename test/fixtures/{ => reference}/gencode.v31.chrY_chrM.genelengths.txt (100%) rename test/fixtures/{ => reference}/gencode.v31.chrY_chrM.gtf.gz (100%) rename test/fixtures/{ => reference}/kraken2_C_elegans_library.tar.gz (100%) rename test/fixtures/{ => reference}/kraken2_db.mini.tar.gz (100%) rename test/fixtures/{ => reference}/kraken2_taxonomy.tar.gz (100%) rename test/fixtures/{ => reference}/star_db.chrY_chrM.tar.gz (100%) rename test/fixtures/{ => reference}/test.fa (100%) rename test/fixtures/{ => vcfs}/Homo_sapiens_assembly38.dbsnp138.top5000.vcf (100%) rename test/fixtures/{ => vcfs}/Homo_sapiens_assembly38.dbsnp138.top5000.vcf.idx (100%) rename test/fixtures/{ => vcfs}/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz (100%) rename test/fixtures/{ => vcfs}/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi (100%) rename test/fixtures/{ => vcfs}/test1.vcf.gz (100%) rename test/fixtures/{ => vcfs}/test1.vcf.gz.tbi (100%) rename test/fixtures/{ => vcfs}/test2.vcf.gz (100%) rename test/fixtures/{ => vcfs}/test2.vcf.gz.tbi (100%) diff --git a/data_structures/test/read_group.yaml b/data_structures/test/read_group.yaml index d91c53972..b5257e9e0 100644 --- a/data_structures/test/read_group.yaml +++ b/data_structures/test/read_group.yaml @@ -88,9 +88,9 @@ get_read_groups: - name: works inputs: bam: - - test.bwa_aln_pe.chrY_chrM.bam - - Aligned.sortedByCoord.chr9_chr22.bam - - test_rnaseq_variant.bam - - test.bam + - bams/test.bwa_aln_pe.chrY_chrM.bam + - bams/Aligned.sortedByCoord.chr9_chr22.bam + - bams/test_rnaseq_variant.bam + - bams/test.bam # TODO: test read_group_to_string diff --git a/test/fixtures/Aligned.sortedByCoord.chr9_chr22.bam b/test/fixtures/bams/Aligned.sortedByCoord.chr9_chr22.bam similarity index 100% rename from test/fixtures/Aligned.sortedByCoord.chr9_chr22.bam rename to test/fixtures/bams/Aligned.sortedByCoord.chr9_chr22.bam diff --git a/test/fixtures/Aligned.sortedByCoord.chr9_chr22.bam.bai b/test/fixtures/bams/Aligned.sortedByCoord.chr9_chr22.bam.bai similarity index 100% rename from test/fixtures/Aligned.sortedByCoord.chr9_chr22.bam.bai rename to test/fixtures/bams/Aligned.sortedByCoord.chr9_chr22.bam.bai diff --git a/test/fixtures/test.PE.2_RGs.Aligned.out.sorted.bam b/test/fixtures/bams/test.PE.2_RGs.Aligned.out.sorted.bam similarity index 100% rename from test/fixtures/test.PE.2_RGs.Aligned.out.sorted.bam rename to test/fixtures/bams/test.PE.2_RGs.Aligned.out.sorted.bam diff --git a/test/fixtures/test.bam b/test/fixtures/bams/test.bam similarity index 100% rename from test/fixtures/test.bam rename to test/fixtures/bams/test.bam diff --git a/test/fixtures/test.bam.bai b/test/fixtures/bams/test.bam.bai similarity index 100% rename from test/fixtures/test.bam.bai rename to test/fixtures/bams/test.bam.bai diff --git a/test/fixtures/test.bwa_aln_pe.chrY_chrM.bam b/test/fixtures/bams/test.bwa_aln_pe.chrY_chrM.bam similarity index 100% rename from test/fixtures/test.bwa_aln_pe.chrY_chrM.bam rename to test/fixtures/bams/test.bwa_aln_pe.chrY_chrM.bam diff --git a/test/fixtures/test.bwa_aln_pe.chrY_chrM.bam.bai b/test/fixtures/bams/test.bwa_aln_pe.chrY_chrM.bam.bai similarity index 100% rename from test/fixtures/test.bwa_aln_pe.chrY_chrM.bam.bai rename to test/fixtures/bams/test.bwa_aln_pe.chrY_chrM.bam.bai diff --git a/test/fixtures/test.extra_RG.bam b/test/fixtures/bams/test.extra_RG.bam similarity index 100% rename from test/fixtures/test.extra_RG.bam rename to test/fixtures/bams/test.extra_RG.bam diff --git a/test/fixtures/test.unaccounted_read.bam b/test/fixtures/bams/test.unaccounted_read.bam similarity index 100% rename from test/fixtures/test.unaccounted_read.bam rename to test/fixtures/bams/test.unaccounted_read.bam diff --git a/test/fixtures/test2.bam b/test/fixtures/bams/test2.bam similarity index 100% rename from test/fixtures/test2.bam rename to test/fixtures/bams/test2.bam diff --git a/test/fixtures/test_rnaseq_variant.bam b/test/fixtures/bams/test_rnaseq_variant.bam similarity index 100% rename from test/fixtures/test_rnaseq_variant.bam rename to test/fixtures/bams/test_rnaseq_variant.bam diff --git a/test/fixtures/test_rnaseq_variant.bam.bai b/test/fixtures/bams/test_rnaseq_variant.bam.bai similarity index 100% rename from test/fixtures/test_rnaseq_variant.bam.bai rename to test/fixtures/bams/test_rnaseq_variant.bam.bai diff --git a/test/fixtures/random10k.r1.fq.gz b/test/fixtures/fastqs/random10k.r1.fq.gz similarity index 100% rename from test/fixtures/random10k.r1.fq.gz rename to test/fixtures/fastqs/random10k.r1.fq.gz diff --git a/test/fixtures/random10k.r2.fq.gz b/test/fixtures/fastqs/random10k.r2.fq.gz similarity index 100% rename from test/fixtures/random10k.r2.fq.gz rename to test/fixtures/fastqs/random10k.r2.fq.gz diff --git a/test/fixtures/test_R1.fq.gz b/test/fixtures/fastqs/test_R1.fq.gz similarity index 100% rename from test/fixtures/test_R1.fq.gz rename to test/fixtures/fastqs/test_R1.fq.gz diff --git a/test/fixtures/test_R2.fq.gz b/test/fixtures/fastqs/test_R2.fq.gz similarity index 100% rename from test/fixtures/test_R2.fq.gz rename to test/fixtures/fastqs/test_R2.fq.gz diff --git a/test/fixtures/GRCh38.chr1_chr19.dict b/test/fixtures/reference/GRCh38.chr1_chr19.dict similarity index 100% rename from test/fixtures/GRCh38.chr1_chr19.dict rename to test/fixtures/reference/GRCh38.chr1_chr19.dict diff --git a/test/fixtures/GRCh38.chr1_chr19.fa b/test/fixtures/reference/GRCh38.chr1_chr19.fa similarity index 100% rename from test/fixtures/GRCh38.chr1_chr19.fa rename to test/fixtures/reference/GRCh38.chr1_chr19.fa diff --git a/test/fixtures/GRCh38.chr1_chr19.fa.fai b/test/fixtures/reference/GRCh38.chr1_chr19.fa.fai similarity index 100% rename from test/fixtures/GRCh38.chr1_chr19.fa.fai rename to test/fixtures/reference/GRCh38.chr1_chr19.fa.fai diff --git a/test/fixtures/GRCh38.chr9_chr22.fa.gz b/test/fixtures/reference/GRCh38.chr9_chr22.fa.gz similarity index 100% rename from test/fixtures/GRCh38.chr9_chr22.fa.gz rename to test/fixtures/reference/GRCh38.chr9_chr22.fa.gz diff --git a/test/fixtures/GRCh38.chrY_chrM.bwa_db.tar.gz b/test/fixtures/reference/GRCh38.chrY_chrM.bwa_db.tar.gz similarity index 100% rename from test/fixtures/GRCh38.chrY_chrM.bwa_db.tar.gz rename to test/fixtures/reference/GRCh38.chrY_chrM.bwa_db.tar.gz diff --git a/test/fixtures/GRCh38.chrY_chrM.dict b/test/fixtures/reference/GRCh38.chrY_chrM.dict similarity index 100% rename from test/fixtures/GRCh38.chrY_chrM.dict rename to test/fixtures/reference/GRCh38.chrY_chrM.dict diff --git a/test/fixtures/GRCh38.chrY_chrM.fa b/test/fixtures/reference/GRCh38.chrY_chrM.fa similarity index 100% rename from test/fixtures/GRCh38.chrY_chrM.fa rename to test/fixtures/reference/GRCh38.chrY_chrM.fa diff --git a/test/fixtures/GRCh38.chrY_chrM.fa.fai b/test/fixtures/reference/GRCh38.chrY_chrM.fa.fai similarity index 100% rename from test/fixtures/GRCh38.chrY_chrM.fa.fai rename to test/fixtures/reference/GRCh38.chrY_chrM.fa.fai diff --git a/test/fixtures/gencode.v31.chr9_chr22.gtf.gz b/test/fixtures/reference/gencode.v31.chr9_chr22.gtf.gz similarity index 100% rename from test/fixtures/gencode.v31.chr9_chr22.gtf.gz rename to test/fixtures/reference/gencode.v31.chr9_chr22.gtf.gz diff --git a/test/fixtures/gencode.v31.chrY_chrM.gene.bed b/test/fixtures/reference/gencode.v31.chrY_chrM.gene.bed similarity index 100% rename from test/fixtures/gencode.v31.chrY_chrM.gene.bed rename to test/fixtures/reference/gencode.v31.chrY_chrM.gene.bed diff --git a/test/fixtures/gencode.v31.chrY_chrM.genelengths.txt b/test/fixtures/reference/gencode.v31.chrY_chrM.genelengths.txt similarity index 100% rename from test/fixtures/gencode.v31.chrY_chrM.genelengths.txt rename to test/fixtures/reference/gencode.v31.chrY_chrM.genelengths.txt diff --git a/test/fixtures/gencode.v31.chrY_chrM.gtf.gz b/test/fixtures/reference/gencode.v31.chrY_chrM.gtf.gz similarity index 100% rename from test/fixtures/gencode.v31.chrY_chrM.gtf.gz rename to test/fixtures/reference/gencode.v31.chrY_chrM.gtf.gz diff --git a/test/fixtures/kraken2_C_elegans_library.tar.gz b/test/fixtures/reference/kraken2_C_elegans_library.tar.gz similarity index 100% rename from test/fixtures/kraken2_C_elegans_library.tar.gz rename to test/fixtures/reference/kraken2_C_elegans_library.tar.gz diff --git a/test/fixtures/kraken2_db.mini.tar.gz b/test/fixtures/reference/kraken2_db.mini.tar.gz similarity index 100% rename from test/fixtures/kraken2_db.mini.tar.gz rename to test/fixtures/reference/kraken2_db.mini.tar.gz diff --git a/test/fixtures/kraken2_taxonomy.tar.gz b/test/fixtures/reference/kraken2_taxonomy.tar.gz similarity index 100% rename from test/fixtures/kraken2_taxonomy.tar.gz rename to test/fixtures/reference/kraken2_taxonomy.tar.gz diff --git a/test/fixtures/star_db.chrY_chrM.tar.gz b/test/fixtures/reference/star_db.chrY_chrM.tar.gz similarity index 100% rename from test/fixtures/star_db.chrY_chrM.tar.gz rename to test/fixtures/reference/star_db.chrY_chrM.tar.gz diff --git a/test/fixtures/test.fa b/test/fixtures/reference/test.fa similarity index 100% rename from test/fixtures/test.fa rename to test/fixtures/reference/test.fa diff --git a/test/fixtures/Homo_sapiens_assembly38.dbsnp138.top5000.vcf b/test/fixtures/vcfs/Homo_sapiens_assembly38.dbsnp138.top5000.vcf similarity index 100% rename from test/fixtures/Homo_sapiens_assembly38.dbsnp138.top5000.vcf rename to test/fixtures/vcfs/Homo_sapiens_assembly38.dbsnp138.top5000.vcf diff --git a/test/fixtures/Homo_sapiens_assembly38.dbsnp138.top5000.vcf.idx b/test/fixtures/vcfs/Homo_sapiens_assembly38.dbsnp138.top5000.vcf.idx similarity index 100% rename from test/fixtures/Homo_sapiens_assembly38.dbsnp138.top5000.vcf.idx rename to test/fixtures/vcfs/Homo_sapiens_assembly38.dbsnp138.top5000.vcf.idx diff --git a/test/fixtures/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz b/test/fixtures/vcfs/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz similarity index 100% rename from test/fixtures/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz rename to test/fixtures/vcfs/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz diff --git a/test/fixtures/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi b/test/fixtures/vcfs/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi similarity index 100% rename from test/fixtures/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi rename to test/fixtures/vcfs/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi diff --git a/test/fixtures/test1.vcf.gz b/test/fixtures/vcfs/test1.vcf.gz similarity index 100% rename from test/fixtures/test1.vcf.gz rename to test/fixtures/vcfs/test1.vcf.gz diff --git a/test/fixtures/test1.vcf.gz.tbi b/test/fixtures/vcfs/test1.vcf.gz.tbi similarity index 100% rename from test/fixtures/test1.vcf.gz.tbi rename to test/fixtures/vcfs/test1.vcf.gz.tbi diff --git a/test/fixtures/test2.vcf.gz b/test/fixtures/vcfs/test2.vcf.gz similarity index 100% rename from test/fixtures/test2.vcf.gz rename to test/fixtures/vcfs/test2.vcf.gz diff --git a/test/fixtures/test2.vcf.gz.tbi b/test/fixtures/vcfs/test2.vcf.gz.tbi similarity index 100% rename from test/fixtures/test2.vcf.gz.tbi rename to test/fixtures/vcfs/test2.vcf.gz.tbi diff --git a/tools/test/arriba.yaml b/tools/test/arriba.yaml index e55e9e9d2..e96b4d8ce 100644 --- a/tools/test/arriba.yaml +++ b/tools/test/arriba.yaml @@ -2,11 +2,11 @@ arriba: - name: works inputs: bam: - - Aligned.sortedByCoord.chr9_chr22.bam + - bams/Aligned.sortedByCoord.chr9_chr22.bam gtf: - - gencode.v31.chr9_chr22.gtf.gz + - reference/gencode.v31.chr9_chr22.gtf.gz reference_fasta_gz: - - GRCh38.chr9_chr22.fa.gz + - reference/GRCh38.chr9_chr22.fa.gz disable_filters: - [ blacklist ] prefix: @@ -17,7 +17,7 @@ arriba_tsv_to_vcf: fusions: - fusions.BCR_ABL1.tsv reference_fasta: - - GRCh38.chr9_chr22.fa.gz + - reference/GRCh38.chr9_chr22.fa.gz prefix: - fusions arriba_extract_fusion_supporting_alignments: @@ -27,9 +27,9 @@ arriba_extract_fusion_supporting_alignments: fusions: - fusions.BCR_ABL1.tsv bam: - - Aligned.sortedByCoord.chr9_chr22.bam + - bams/Aligned.sortedByCoord.chr9_chr22.bam bam_index: - - Aligned.sortedByCoord.chr9_chr22.bam.bai + - bams/Aligned.sortedByCoord.chr9_chr22.bam.bai prefix: - fusions arriba_annotate_exon_numbers: @@ -38,6 +38,6 @@ arriba_annotate_exon_numbers: fusions: - fusions.BCR_ABL1.tsv gtf: - - gencode.v31.chr9_chr22.gtf.gz + - reference/gencode.v31.chr9_chr22.gtf.gz prefix: - fusions \ No newline at end of file diff --git a/tools/test/bwa.yaml b/tools/test/bwa.yaml index 6dc3b3d9e..bc05f3524 100644 --- a/tools/test/bwa.yaml +++ b/tools/test/bwa.yaml @@ -2,43 +2,43 @@ bwa_aln: - name: works inputs: fastq: - - test_R1.fq.gz - - test_R2.fq.gz - - random10k.r1.fq.gz - - random10k.r2.fq.gz + - fastqs/test_R1.fq.gz + - fastqs/test_R2.fq.gz + - fastqs/random10k.r1.fq.gz + - fastqs/random10k.r2.fq.gz read_group: - "@RG\\tID:test\\tSM:test" bwa_db_tar_gz: - - GRCh38.chrY_chrM.bwa_db.tar.gz + - reference/GRCh38.chrY_chrM.bwa_db.tar.gz bwa_aln_pe: - name: works inputs: $files: read_one_fastq_gz: - - test_R1.fq.gz - - random10k.r1.fq.gz + - fastqs/test_R1.fq.gz + - fastqs/random10k.r1.fq.gz read_two_fastq_gz: - - test_R2.fq.gz - - random10k.r2.fq.gz + - fastqs/test_R2.fq.gz + - fastqs/random10k.r2.fq.gz read_group: - "@RG\\tID:test\\tSM:test" bwa_db_tar_gz: - - GRCh38.chrY_chrM.bwa_db.tar.gz + - reference/GRCh38.chrY_chrM.bwa_db.tar.gz bwa_mem: - name: works inputs: read_one_fastq_gz: - - test_R1.fq.gz - - test_R2.fq.gz - - random10k.r1.fq.gz - - random10k.r2.fq.gz + - fastqs/test_R1.fq.gz + - fastqs/test_R2.fq.gz + - fastqs/random10k.r1.fq.gz + - fastqs/random10k.r2.fq.gz read_group: - "@RG\\tID:test\\tSM:test" bwa_db_tar_gz: - - GRCh38.chrY_chrM.bwa_db.tar.gz + - reference/GRCh38.chrY_chrM.bwa_db.tar.gz build_bwa_db: - name: works tags: [ reference, slow ] inputs: reference_fasta: - - GRCh38.chrY_chrM.fa \ No newline at end of file + - reference/GRCh38.chrY_chrM.fa \ No newline at end of file diff --git a/tools/test/deeptools.yaml b/tools/test/deeptools.yaml index cba7fff3a..e44caf285 100644 --- a/tools/test/deeptools.yaml +++ b/tools/test/deeptools.yaml @@ -3,12 +3,12 @@ bam_coverage: inputs: $files: bam: - - test.bwa_aln_pe.chrY_chrM.bam - - Aligned.sortedByCoord.chr9_chr22.bam - - test_rnaseq_variant.bam - - test.bam + - bams/test.bwa_aln_pe.chrY_chrM.bam + - bams/Aligned.sortedByCoord.chr9_chr22.bam + - bams/test_rnaseq_variant.bam + - bams/test.bam bam_index: - - test.bwa_aln_pe.chrY_chrM.bam.bai - - Aligned.sortedByCoord.chr9_chr22.bam.bai - - test_rnaseq_variant.bam.bai - - test.bam.bai \ No newline at end of file + - bams/test.bwa_aln_pe.chrY_chrM.bam.bai + - bams/Aligned.sortedByCoord.chr9_chr22.bam.bai + - bams/test_rnaseq_variant.bam.bai + - bams/test.bam.bai \ No newline at end of file diff --git a/tools/test/picard.yaml b/tools/test/picard.yaml index 7d031e5dd..1c9f80721 100644 --- a/tools/test/picard.yaml +++ b/tools/test/picard.yaml @@ -2,7 +2,7 @@ merge_sam_files: - name: Merge works inputs: bams: - - [test.bwa_aln_pe.chrY_chrM.bam, test.PE.2_RGs.Aligned.out.sorted.bam] + - [bams/test.bwa_aln_pe.chrY_chrM.bam, bams/test.PE.2_RGs.Aligned.out.sorted.bam] prefix: - test.merged assertions: diff --git a/tools/test/samtools.yaml b/tools/test/samtools.yaml index feca44f2d..52a5de1b1 100644 --- a/tools/test/samtools.yaml +++ b/tools/test/samtools.yaml @@ -2,9 +2,9 @@ bam_to_fastq: - name: kitchen_sink inputs: bam: - - Aligned.sortedByCoord.chr9_chr22.bam - - test_rnaseq_variant.bam - - test.bwa_aln_pe.chrY_chrM.bam + - bams/Aligned.sortedByCoord.chr9_chr22.bam + - bams/test_rnaseq_variant.bam + - bams/test.bwa_aln_pe.chrY_chrM.bam bitwise_filter: - include_if_all: "0x0" exclude_if_any: "0x900" @@ -34,6 +34,6 @@ bam_to_fastq: - true - false bam: - - Aligned.sortedByCoord.chr9_chr22.bam - - test_rnaseq_variant.bam - - test.bwa_aln_pe.chrY_chrM.bam + - bams/Aligned.sortedByCoord.chr9_chr22.bam + - bams/test_rnaseq_variant.bam + - bams/test.bwa_aln_pe.chrY_chrM.bam From b3b9c2702a1f3b3b470cc078ac931f3b599d0236 Mon Sep 17 00:00:00 2001 From: Ari Frantz Date: Mon, 29 Dec 2025 10:27:29 -0500 Subject: [PATCH 22/47] deprecate fastqc --- tools/fastqc.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/fastqc.wdl b/tools/fastqc.wdl index a79de80b8..cf6fe238c 100755 --- a/tools/fastqc.wdl +++ b/tools/fastqc.wdl @@ -5,6 +5,8 @@ version 1.1 task fastqc { meta { description: "Generates a FastQC quality control metrics report for the input BAM file" + warning: "**[DEPRECATED]** We prefer the analysis provided by `fastp` which computes similar metrics but is faster and more robust. Please see the `fastp` task in `fastp.wdl` instead of using FastQC!" + deprecated: true outputs: { raw_data: "A zip archive of raw FastQC data. Can be parsed by MultiQC.", results: "A gzipped tar archive of all FastQC output files", From 038909d57b539665a829f7c97756886483b31bde Mon Sep 17 00:00:00 2001 From: Ari Frantz Date: Mon, 29 Dec 2025 10:27:38 -0500 Subject: [PATCH 23/47] log TODO --- tools/test/arriba.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/test/arriba.yaml b/tools/test/arriba.yaml index e96b4d8ce..8f51f6456 100644 --- a/tools/test/arriba.yaml +++ b/tools/test/arriba.yaml @@ -1,3 +1,4 @@ +# TODO: test advanced options arriba: - name: works inputs: From 77271b361496d1cf2bc137e8910acd17ec902432 Mon Sep 17 00:00:00 2001 From: Ari Frantz Date: Mon, 29 Dec 2025 10:28:04 -0500 Subject: [PATCH 24/47] tests for fastp and fq --- tools/test/fastp.yaml | 11 +++++++++ tools/test/fq.yaml | 56 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+) create mode 100644 tools/test/fastp.yaml create mode 100644 tools/test/fq.yaml diff --git a/tools/test/fastp.yaml b/tools/test/fastp.yaml new file mode 100644 index 000000000..851d0ce33 --- /dev/null +++ b/tools/test/fastp.yaml @@ -0,0 +1,11 @@ +# TODO: test advanced options +fastp: + - name: works + inputs: + $files: + read_one_fastq: + - fastqs/test_R1.fq.gz + - fastqs/random10k.r1.fq.gz + read_two_fastq: + - fastqs/test_R2.fq.gz + - fastqs/random10k.r2.fq.gz \ No newline at end of file diff --git a/tools/test/fq.yaml b/tools/test/fq.yaml new file mode 100644 index 000000000..9f5fc6095 --- /dev/null +++ b/tools/test/fq.yaml @@ -0,0 +1,56 @@ +# TODO: add lint tests for malformed fastqs +fqlint: + - name: valid_fastqs + inputs: + $files: + read_one_fastq: + - fastqs/test_R1.fq.gz + - fastqs/random10k.r1.fq.gz + read_two_fastq: + - fastqs/test_R2.fq.gz + - fastqs/random10k.r2.fq.gz +subsample: + - name: works + inputs: + $files: + read_one_fastq: + - fastqs/test_R1.fq.gz + - fastqs/random10k.r1.fq.gz + read_two_fastq: + - fastqs/test_R2.fq.gz + - fastqs/random10k.r2.fq.gz + $sampling_controls: + record_count: + - 1000 + - 500 + - -1 # negative should disable + - 0 # so should zero + probability: + - 0.0 # 0 should disable + - 1.0 # 1 should also disable + - 0.5 + - 0.25 + - name: conflicting_args + inputs: + $files: + read_one_fastq: + - fastqs/random10k.r1.fq.gz + read_two_fastq: + - fastqs/random10k.r2.fq.gz + record_count: + - -1 + - 0 + probability: + - 0.0 + - 1.0 + assertions: + exit_code: 2 + - name: neither_count_nor_probability_specified + inputs: + $files: + read_one_fastq: + - fastqs/random10k.r1.fq.gz + read_two_fastq: + - fastqs/random10k.r2.fq.gz + assertions: + exit_code: 2 From 5ed086613d0dd5eae4687c61fb32a8fc6843b420 Mon Sep 17 00:00:00 2001 From: Ari Frantz Date: Mon, 29 Dec 2025 10:58:11 -0500 Subject: [PATCH 25/47] mv reference vcfs to right dir --- .../Homo_sapiens_assembly38.dbsnp138.top5000.vcf | 0 ...Homo_sapiens_assembly38.dbsnp138.top5000.vcf.idx | Bin ...Mills_and_1000G_gold_standard.indels.hg38.vcf.gz | 0 ...s_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi | Bin 4 files changed, 0 insertions(+), 0 deletions(-) rename test/fixtures/{vcfs => reference}/Homo_sapiens_assembly38.dbsnp138.top5000.vcf (100%) rename test/fixtures/{vcfs => reference}/Homo_sapiens_assembly38.dbsnp138.top5000.vcf.idx (100%) rename test/fixtures/{vcfs => reference}/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz (100%) rename test/fixtures/{vcfs => reference}/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi (100%) diff --git a/test/fixtures/vcfs/Homo_sapiens_assembly38.dbsnp138.top5000.vcf b/test/fixtures/reference/Homo_sapiens_assembly38.dbsnp138.top5000.vcf similarity index 100% rename from test/fixtures/vcfs/Homo_sapiens_assembly38.dbsnp138.top5000.vcf rename to test/fixtures/reference/Homo_sapiens_assembly38.dbsnp138.top5000.vcf diff --git a/test/fixtures/vcfs/Homo_sapiens_assembly38.dbsnp138.top5000.vcf.idx b/test/fixtures/reference/Homo_sapiens_assembly38.dbsnp138.top5000.vcf.idx similarity index 100% rename from test/fixtures/vcfs/Homo_sapiens_assembly38.dbsnp138.top5000.vcf.idx rename to test/fixtures/reference/Homo_sapiens_assembly38.dbsnp138.top5000.vcf.idx diff --git a/test/fixtures/vcfs/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz b/test/fixtures/reference/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz similarity index 100% rename from test/fixtures/vcfs/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz rename to test/fixtures/reference/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz diff --git a/test/fixtures/vcfs/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi b/test/fixtures/reference/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi similarity index 100% rename from test/fixtures/vcfs/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi rename to test/fixtures/reference/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi From 8a186b64a95f38074dd51b518aefcee1e27b70bc Mon Sep 17 00:00:00 2001 From: Ari Frantz Date: Mon, 29 Dec 2025 11:34:13 -0500 Subject: [PATCH 26/47] Create gatk4.yaml --- tools/test/gatk4.yaml | 100 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 100 insertions(+) create mode 100644 tools/test/gatk4.yaml diff --git a/tools/test/gatk4.yaml b/tools/test/gatk4.yaml new file mode 100644 index 000000000..73bd4fe9d --- /dev/null +++ b/tools/test/gatk4.yaml @@ -0,0 +1,100 @@ +apply_bqsr: + - name: works + inputs: + $files: + bam: + - bams/test_rnaseq_variant.bam + bam_index: + - bams/test_rnaseq_variant.bam.bai + recalibration_report: + - test_rnaseq_variant.recal.txt +base_recalibrator: + - name: works + inputs: + $sample: + bam: + - bams/test_rnaseq_variant.bam + bam_index: + - bams/test_rnaseq_variant.bam.bai + $reference: + fasta: + - reference/GRCh38.chr1_chr19.fa + fasta_index: + - reference/GRCh38.chr1_chr19.fa.fai + dict: + - reference/GRCh38.chr1_chr19.dict + $dbsnp: + dbSNP_vcf: + - reference/Homo_sapiens_assembly38.dbsnp138.top5000.vcf + dbSNP_vcf_index: + - reference/Homo_sapiens_assembly38.dbsnp138.top5000.vcf.idx + $known_indels: + known_indels_sites_vcfs: + - [ reference/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz ] + known_indels_sites_indices: + - [ reference/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi ] +haplotype_caller: + - name: works + tags: [ slow ] + inputs: + $sample: + bam: + - bams/test_rnaseq_variant.bam + bam_index: + - bams/test_rnaseq_variant.bam.bai + $reference: + fasta: + - reference/GRCh38.chr1_chr19.fa + fasta_index: + - reference/GRCh38.chr1_chr19.fa.fai + dict: + - reference/GRCh38.chr1_chr19.dict + $dbsnp: + dbSNP_vcf: + - reference/Homo_sapiens_assembly38.dbsnp138.top5000.vcf + dbSNP_vcf_index: + - reference/Homo_sapiens_assembly38.dbsnp138.top5000.vcf.idx + interval_list: + - chr1_chr19.interval_list +split_n_cigar_reads: + - name: works + inputs: + $sample: + bam: + - bams/test_rnaseq_variant.bam + bam_index: + - bams/test_rnaseq_variant.bam.bai + $reference: + fasta: + - reference/GRCh38.chr1_chr19.fa + fasta_index: + - reference/GRCh38.chr1_chr19.fa.fai + dict: + - reference/GRCh38.chr1_chr19.dict + prefix: + - split +variant_filtration: + - name: works + inputs: + $sample: + vcf: + - vcfs/test1.vcf.gz + - vcfs/test2.vcf.gz + vcf_index: + - vcfs/test1.vcf.gz.tbi + - vcfs/test2.vcf.gz.tbi + $reference: + fasta: + - reference/GRCh38.chr1_chr19.fa + fasta_index: + - reference/GRCh38.chr1_chr19.fa.fai + dict: + - reference/GRCh38.chr1_chr19.dict +mark_duplicates_spark: + - name: works + inputs: + bam: + - bams/test.bwa_aln_pe.chrY_chrM.bam + - bams/Aligned.sortedByCoord.chr9_chr22.bam + - bams/test_rnaseq_variant.bam + - bams/test.bam \ No newline at end of file From 52da94cb055553ea3e6f0e821a682e6ddcba75c5 Mon Sep 17 00:00:00 2001 From: Ari Frantz Date: Mon, 29 Dec 2025 14:31:25 -0500 Subject: [PATCH 27/47] WIP --- .gitignore | 1 + tools/htseq.wdl | 15 +++------------ tools/test/gatk4.yaml | 1 + tools/test/htseq.yaml | 37 +++++++++++++++++++++++++++++++++++++ 4 files changed, 42 insertions(+), 12 deletions(-) create mode 100644 tools/test/htseq.yaml diff --git a/.gitignore b/.gitignore index c1ff16be3..2bab65b34 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +pytest/ # Ignore common bioinformatics formats used in these workflows. # But only if they are in the root of this repo /*.fastq.gz diff --git a/tools/htseq.wdl b/tools/htseq.wdl index 95d604e02..bcaca9ce0 100755 --- a/tools/htseq.wdl +++ b/tools/htseq.wdl @@ -34,15 +34,6 @@ task count { description: "GFF attribute to be used as feature ID", group: "Common", } - mode: { - description: "Mode to handle reads overlapping more than one feature. `union` is recommended for most use-cases.", - external_help: "https://htseq.readthedocs.io/en/latest/htseqcount.html#htseq-count-counting-reads-within-features", - choices: [ - "union", - "intersection-strict", - "intersection-nonempty", - ], - } include_custom_header: { description: "Include a custom header for the output file? If true, the first line of the output file will be `~{idattr}\t~{prefix}`.", warning: "This is not an official feature of HTSeq. This may break downstream tools that expect the typical headerless HTSeq output format.", @@ -80,7 +71,6 @@ task count { String prefix = basename(bam, ".bam") String feature_type = "exon" String idattr = "gene_name" - String mode = "union" Boolean include_custom_header = true Boolean pos_sorted = false Boolean nonunique = false @@ -93,6 +83,9 @@ task count { String outfile_name = prefix + ".feature-counts.txt" + # the docs recommend this for most use cases, so we hardcode + String mode = "union" + Float bam_size = size(bam, "GiB") Float gtf_size = size(gtf, "GiB") @@ -107,8 +100,6 @@ task count { if ~{include_custom_header}; then echo -e "~{idattr}\t~{prefix}" > "~{outfile_name}" - else - true > "~{outfile_name}" # ensure file is empty fi # 9223372036854776000 == max 64 bit Float diff --git a/tools/test/gatk4.yaml b/tools/test/gatk4.yaml index 73bd4fe9d..bfb256ae6 100644 --- a/tools/test/gatk4.yaml +++ b/tools/test/gatk4.yaml @@ -1,3 +1,4 @@ +# TODO: advanced options apply_bqsr: - name: works inputs: diff --git a/tools/test/htseq.yaml b/tools/test/htseq.yaml new file mode 100644 index 000000000..1a2356883 --- /dev/null +++ b/tools/test/htseq.yaml @@ -0,0 +1,37 @@ +count: + - name: kitchen_sink + tags: [ slow ] + inputs: + $files: + bam: + - bams/test.bwa_aln_pe.chrY_chrM.bam + gtf: + - reference/gencode.v31.chrY_chrM.gtf.gz + pos_sorted: + - false + strandedness: + - yes + - no + - reverse + include_custom_header: + - true + - false + nonunique: + - true + - false + secondary_alignments: + - true + - false + supplementary_alignments: + - true + - false + minaqual: + - 0 + - 10 +calc_tpm: + - name: works + inputs: + counts: + - test.bwa_aln_pe.chrY_chrM.feature-counts.txt + feature_lengths: + - reference/gencode.v31.chrY_chrM.genelengths.txt \ No newline at end of file From 8658e212da835e8437c332eb1a3e2f987152bec6 Mon Sep 17 00:00:00 2001 From: Ari Frantz Date: Sat, 3 Jan 2026 11:28:20 -0500 Subject: [PATCH 28/47] Update flag_filter.yaml --- data_structures/test/flag_filter.yaml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/data_structures/test/flag_filter.yaml b/data_structures/test/flag_filter.yaml index fb6a55927..cd64d0cca 100644 --- a/data_structures/test/flag_filter.yaml +++ b/data_structures/test/flag_filter.yaml @@ -6,6 +6,8 @@ validate_string_is_12bit_int: - "0x900" - "01" - "4095" + - "0" + - "072" assertions: stderr: - Input number \(.*\) is valid @@ -14,10 +16,11 @@ validate_string_is_12bit_int: number: - "0x1000" - "" - - "string" + - string - this is not a number - "000000000011" - "-1" + - "08" assertions: exit_code: 42 stderr: @@ -26,6 +29,7 @@ validate_string_is_12bit_int: inputs: number: - "4096" + - "9999" assertions: exit_code: 42 stderr: From 275865919980ee0b24f91b8b7239e5de66dc29df Mon Sep 17 00:00:00 2001 From: Ari Frantz Date: Sun, 4 Jan 2026 11:41:04 -0500 Subject: [PATCH 29/47] fix: SE works in addition to PE --- tools/fq.wdl | 2 +- tools/test/bwa.yaml | 16 +++++++++++----- tools/test/fastp.yaml | 6 +++++- tools/test/fq.yaml | 8 ++++++++ 4 files changed, 25 insertions(+), 7 deletions(-) diff --git a/tools/fq.wdl b/tools/fq.wdl index 55148a99d..e5eab4763 100755 --- a/tools/fq.wdl +++ b/tools/fq.wdl @@ -154,7 +154,7 @@ task subsample { ~{probability_arg} \ ~{record_count_arg} \ --r1-dst "~{r1_dst}" \ - ~{"--r2-dst '" + r2_dst + "'"} \ + ~{if defined(read_two_fastq) then "--r2-dst '" + r2_dst + "'" else ""} \ "~{read_one_fastq}" \ ~{"'" + read_two_fastq + "'"} >>> diff --git a/tools/test/bwa.yaml b/tools/test/bwa.yaml index bc05f3524..f5e1d7c15 100644 --- a/tools/test/bwa.yaml +++ b/tools/test/bwa.yaml @@ -27,11 +27,17 @@ bwa_aln_pe: bwa_mem: - name: works inputs: - read_one_fastq_gz: - - fastqs/test_R1.fq.gz - - fastqs/test_R2.fq.gz - - fastqs/random10k.r1.fq.gz - - fastqs/random10k.r2.fq.gz + $samples: + read_one_fastq_gz: + - fastqs/test_R1.fq.gz + - fastqs/test_R2.fq.gz + - fastqs/random10k.r1.fq.gz + - fastqs/random10k.r2.fq.gz + read_two_fastq_gz: + - fastqs/test_R2.fq.gz + - null + - fastqs/random10k.r2.fq.gz + - null read_group: - "@RG\\tID:test\\tSM:test" bwa_db_tar_gz: diff --git a/tools/test/fastp.yaml b/tools/test/fastp.yaml index 851d0ce33..272624876 100644 --- a/tools/test/fastp.yaml +++ b/tools/test/fastp.yaml @@ -6,6 +6,10 @@ fastp: read_one_fastq: - fastqs/test_R1.fq.gz - fastqs/random10k.r1.fq.gz + - fastqs/test_R2.fq.gz + - fastqs/random10k.r2.fq.gz read_two_fastq: - fastqs/test_R2.fq.gz - - fastqs/random10k.r2.fq.gz \ No newline at end of file + - fastqs/random10k.r2.fq.gz + - null + - null \ No newline at end of file diff --git a/tools/test/fq.yaml b/tools/test/fq.yaml index 9f5fc6095..c8c48084a 100644 --- a/tools/test/fq.yaml +++ b/tools/test/fq.yaml @@ -6,9 +6,13 @@ fqlint: read_one_fastq: - fastqs/test_R1.fq.gz - fastqs/random10k.r1.fq.gz + - fastqs/test_R2.fq.gz + - fastqs/random10k.r2.fq.gz read_two_fastq: - fastqs/test_R2.fq.gz - fastqs/random10k.r2.fq.gz + - null + - null subsample: - name: works inputs: @@ -16,9 +20,13 @@ subsample: read_one_fastq: - fastqs/test_R1.fq.gz - fastqs/random10k.r1.fq.gz + - fastqs/test_R2.fq.gz + - fastqs/random10k.r2.fq.gz read_two_fastq: - fastqs/test_R2.fq.gz - fastqs/random10k.r2.fq.gz + - null + - null $sampling_controls: record_count: - 1000 From 108983536cfe8e6ab678eb226516fd78b0388dd6 Mon Sep 17 00:00:00 2001 From: Ari Frantz Date: Tue, 27 Jan 2026 11:18:46 -0500 Subject: [PATCH 30/47] chore: don't specify defaults in two (disagreeing) places --- data_structures/read_group.wdl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/data_structures/read_group.wdl b/data_structures/read_group.wdl index 719fdb409..e0e75d715 100644 --- a/data_structures/read_group.wdl +++ b/data_structures/read_group.wdl @@ -83,7 +83,7 @@ workflow read_group_to_string { input { ReadGroup read_group - Array[String] required_fields = [] + Array[String] required_fields = ["SM"] Boolean format_as_sam_record = false Boolean restrictive = true } @@ -156,8 +156,8 @@ task validate_read_group { input { ReadGroup read_group - Array[String] required_fields = ["SM"] - Boolean restrictive = true + Array[String] required_fields + Boolean restrictive } # The SAM spec allows any printable ASCII character in header fields. @@ -374,7 +374,7 @@ task inner_read_group_to_string { input { ReadGroup read_group - Boolean format_as_sam_record = false + Boolean format_as_sam_record } String delimiter = if format_as_sam_record then "\\t" else " " From 5b690422c77bad542d0ab60b7ac7ca5e550e1bdb Mon Sep 17 00:00:00 2001 From: Ari Frantz Date: Wed, 28 Jan 2026 12:46:35 -0500 Subject: [PATCH 31/47] add a few of (experimental) output assertions --- data_structures/test/read_group.yaml | 29 +++++----- tools/test/fastp.yaml | 79 +++++++++++++++++++++++----- 2 files changed, 79 insertions(+), 29 deletions(-) diff --git a/data_structures/test/read_group.yaml b/data_structures/test/read_group.yaml index b5257e9e0..12ef8e5ad 100644 --- a/data_structures/test/read_group.yaml +++ b/data_structures/test/read_group.yaml @@ -1,11 +1,11 @@ -validate_read_group: +read_group_to_string: - name: valid_read_groups inputs: read_group: - - ID: id - SM: sample - - ID: R2 - SM: sampleA + - ID: R1 + SM: sampleFoo + - ID: R1 + SM: sampleFoo LB: spaces are allowed in LB BC: barcode with a space PU: platform_unit @@ -18,6 +18,11 @@ validate_read_group: PM: platform_model FO: ACMG KS: key_sequence + assertions: + outputs: + validated_read_group: + - Contains: R1 + - Contains: sampleFoo - name: id_with_spaces inputs: read_group: @@ -36,9 +41,7 @@ validate_read_group: FO: ACMG KS: key_sequence assertions: - exit_code: 1 - stderr: - - ID must not contain spaces + should_fail: true - name: sample_with_spaces inputs: read_group: @@ -57,9 +60,7 @@ validate_read_group: FO: ACMG KS: key_sequence assertions: - exit_code: 1 - stderr: - - SM must not contain spaces + should_fail: true - name: spaces_allowed inputs: read_group: @@ -73,9 +74,7 @@ validate_read_group: - ID: R123 LB: library assertions: - exit_code: 1 - stderr: - - SM is required + should_fail: true - name: missing_sample_allowed inputs: read_group: @@ -92,5 +91,3 @@ get_read_groups: - bams/Aligned.sortedByCoord.chr9_chr22.bam - bams/test_rnaseq_variant.bam - bams/test.bam - -# TODO: test read_group_to_string diff --git a/tools/test/fastp.yaml b/tools/test/fastp.yaml index 272624876..56429b143 100644 --- a/tools/test/fastp.yaml +++ b/tools/test/fastp.yaml @@ -1,15 +1,68 @@ # TODO: test advanced options fastp: - - name: works - inputs: - $files: - read_one_fastq: - - fastqs/test_R1.fq.gz - - fastqs/random10k.r1.fq.gz - - fastqs/test_R2.fq.gz - - fastqs/random10k.r2.fq.gz - read_two_fastq: - - fastqs/test_R2.fq.gz - - fastqs/random10k.r2.fq.gz - - null - - null \ No newline at end of file + - name: SE_trimming + inputs: + read_one_fastq: + - fastqs/test_R1.fq.gz + - fastqs/random10k.r1.fq.gz + - fastqs/test_R2.fq.gz + - fastqs/random10k.r2.fq.gz + assertions: + outputs: + single_end_reads_fastq_gz: + - Defined: true + read_one_fastq_gz: + - Defined: false + read_two_fastq_gz: + - Defined: false + - name: PE_trimming + inputs: + read_one_fastq: + - fastqs/test_R1.fq.gz + - fastqs/random10k.r1.fq.gz + read_two_fastq: + - fastqs/test_R2.fq.gz + - fastqs/random10k.r2.fq.gz + assertions: + outputs: + single_end_reads_fastq_gz: + - Defined: false + read_one_fastq_gz: + - Defined: true + read_two_fastq_gz: + - Defined: true + - name: SE_qc + inputs: + read_one_fastq: + - fastqs/test_R1.fq.gz + - fastqs/random10k.r1.fq.gz + - fastqs/test_R2.fq.gz + - fastqs/random10k.r2.fq.gz + output_fastq: + - false + assertions: + outputs: + single_end_reads_fastq_gz: + - Defined: false + read_one_fastq_gz: + - Defined: false + read_two_fastq_gz: + - Defined: false + - name: PE_qc + inputs: + read_one_fastq: + - fastqs/test_R1.fq.gz + - fastqs/random10k.r1.fq.gz + read_two_fastq: + - fastqs/test_R2.fq.gz + - fastqs/random10k.r2.fq.gz + output_fastq: + - false + assertions: + outputs: + single_end_reads_fastq_gz: + - Defined: false + read_one_fastq_gz: + - Defined: false + read_two_fastq_gz: + - Defined: false \ No newline at end of file From b1f9d8fc2e58ff87263466b74dfed3487e2aafcc Mon Sep 17 00:00:00 2001 From: Ari Frantz Date: Thu, 19 Mar 2026 14:27:24 -0400 Subject: [PATCH 32/47] sprocket format --- data_structures/flag_filter.wdl | 9 +- data_structures/read_group.wdl | 35 ++- tools/arriba.wdl | 75 +++++-- tools/bwa.wdl | 59 ++--- tools/deeptools.wdl | 3 +- tools/fastp.wdl | 79 ++++--- tools/fastqc.wdl | 1 - tools/fq.wdl | 26 ++- tools/gatk4.wdl | 106 +++++---- tools/htseq.wdl | 38 +++- tools/kraken2.wdl | 91 ++++---- tools/librarian.wdl | 9 +- tools/md5sum.wdl | 3 +- tools/mosdepth.wdl | 6 +- tools/ngsderive.wdl | 33 ++- tools/picard.wdl | 51 +++-- tools/qualimap.wdl | 23 +- tools/sambamba.wdl | 21 +- tools/samtools.wdl | 210 ++++++++++++------ tools/star.wdl | 64 +++--- tools/util.wdl | 34 +-- workflows/chipseq/chipseq-standard.wdl | 53 +++-- workflows/dnaseq/dnaseq-core.wdl | 47 ++-- workflows/dnaseq/dnaseq-standard-fastq.wdl | 15 +- workflows/dnaseq/dnaseq-standard.wdl | 13 +- workflows/general/alignment-post.wdl | 32 ++- workflows/general/bam-to-fastqs.wdl | 22 +- workflows/general/samtools-merge.wdl | 27 +-- workflows/methylation/methylation-cohort.wdl | 81 ++++--- .../methylation/methylation-preprocess.wdl | 6 +- .../methylation/methylation-standard.wdl | 8 +- workflows/qc/markdups-post.wdl | 6 +- workflows/qc/quality-check-standard.wdl | 163 +++++++++----- workflows/reference/bwa-db-build.wdl | 4 +- workflows/reference/gatk-reference.wdl | 20 +- workflows/reference/qc-reference.wdl | 25 ++- workflows/reference/star-db-build.wdl | 6 +- workflows/rnaseq/rnaseq-core.wdl | 36 ++- workflows/rnaseq/rnaseq-standard-fastq.wdl | 16 +- workflows/rnaseq/rnaseq-standard.wdl | 5 +- workflows/rnaseq/rnaseq-variant-calling.wdl | 12 +- 41 files changed, 934 insertions(+), 639 deletions(-) diff --git a/data_structures/flag_filter.wdl b/data_structures/flag_filter.wdl index 27d81a0be..263b7f456 100644 --- a/data_structures/flag_filter.wdl +++ b/data_structures/flag_filter.wdl @@ -58,7 +58,6 @@ ## In short, those are all flags corresponding to the quality of the read ## and them being `true` may indicate that the read is of low quality and ## should be excluded. - version 1.1 struct FlagFilter { @@ -127,15 +126,15 @@ workflow validate_flag_filter { } call validate_string_is_12bit_int as validate_include_if_any { input: - number = flags.include_if_any + number = flags.include_if_any, } call validate_string_is_12bit_int as validate_include_if_all { input: - number = flags.include_if_all + number = flags.include_if_all, } call validate_string_is_12bit_int as validate_exclude_if_any { input: - number = flags.exclude_if_any + number = flags.exclude_if_any, } call validate_string_is_12bit_int as validate_exclude_if_all { input: - number = flags.exclude_if_all + number = flags.exclude_if_all, } } diff --git a/data_structures/read_group.wdl b/data_structures/read_group.wdl index e0e75d715..df81982d6 100644 --- a/data_structures/read_group.wdl +++ b/data_structures/read_group.wdl @@ -40,7 +40,6 @@ ## } ## } ## ``` - version 1.1 #@ except: SnakeCase @@ -83,7 +82,9 @@ workflow read_group_to_string { input { ReadGroup read_group - Array[String] required_fields = ["SM"] + Array[String] required_fields = [ + "SM", + ] Boolean format_as_sam_record = false Boolean restrictive = true } @@ -99,8 +100,7 @@ workflow read_group_to_string { } output { - String validated_read_group - = inner_read_group_to_string.stringified_read_group + String validated_read_group = inner_read_group_to_string.stringified_read_group } } @@ -109,7 +109,7 @@ task get_read_groups { description: "Gets read group information from a BAM file and writes it out as JSON which is converted to a WDL struct." warning: "This task will uppercase any lowercase `PL` values it finds, as is required by the [SAM specification](https://samtools.github.io/hts-specs/SAMv1.pdf)." outputs: { - read_groups: "An array of `ReadGroup` structs containing read group information." + read_groups: "An array of `ReadGroup` structs containing read group information.", } } @@ -165,8 +165,18 @@ task validate_read_group { # We have the opinion that is too permissive for ID and SM. String restrictive_pattern = "\\ " # Disallow spaces Array[String] platforms = [ - "CAPILLARY", "DNBSEQ", "ELEMENT", "HELICOS", "ILLUMINA", "IONTORRENT", "LS454", - "ONT", "PACBIO", "SINGULAR", "SOLID", "ULTIMA", + "CAPILLARY", + "DNBSEQ", + "ELEMENT", + "HELICOS", + "ILLUMINA", + "IONTORRENT", + "LS454", + "ONT", + "PACBIO", + "SINGULAR", + "SOLID", + "ULTIMA", ] command <<< @@ -262,7 +272,10 @@ task validate_read_group { fi fi if [ "$(echo "~{sep(" ", required_fields)}" | grep -Ewc "KS")" -eq 1 ]; then - if [ -z "~{if defined(read_group.KS) then read_group.KS else ""}" ]; then + if [ -z "~{if defined(read_group.KS) + then read_group.KS + else "" + }" ]; then >&2 echo "KS is required" exit_code=1 fi @@ -360,7 +373,7 @@ task inner_read_group_to_string { description: "Converts a `ReadGroup` struct to a `String` **without any validation**." warning: "Please use the `read_group_to_string` workflow, which has validation of the `ReadGroup` contents." outputs: { - stringified_read_group: "Input `ReadGroup` as a string" + stringified_read_group: "Input `ReadGroup` as a string", } } @@ -377,7 +390,9 @@ task inner_read_group_to_string { Boolean format_as_sam_record } - String delimiter = if format_as_sam_record then "\\t" else " " + String delimiter = if format_as_sam_record + then "\\t" + else " " command <<< if ~{format_as_sam_record}; then diff --git a/tools/arriba.wdl b/tools/arriba.wdl index 8ea5c8e3c..84da082c0 100644 --- a/tools/arriba.wdl +++ b/tools/arriba.wdl @@ -1,5 +1,4 @@ ## [Homepage](https://arriba.readthedocs.io/en/latest/) - version 1.1 task arriba { @@ -138,14 +137,40 @@ task arriba { File? protein_domains File? wgs_svs Array[String] interesting_contigs = [ - "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", - "15", "16", "17", "18", "19", "20", "21", "22", "X", "Y", "AC_*", "NC_*", + "1", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "X", + "Y", + "AC_*", + "NC_*", + ] + Array[String] viral_contigs = [ + "AC_*", + "NC_*", ] - Array[String] viral_contigs = ["AC_*", "NC_*"] Array[String] disable_filters = [] #@ except: LineWidth - String feature_name - = "gene_name=gene_name|gene_id,gene_id=gene_id,transcript_id=transcript_id,feature_exon=exon,feature_CDS=CDS" + String feature_name = "gene_name=gene_name|gene_id,gene_id=gene_id,transcript_id=transcript_id,feature_exon=exon,feature_CDS=CDS" String prefix = basename(bam, ".bam") + ".fusions" String strandedness = "auto" Boolean mark_duplicates = true @@ -176,10 +201,8 @@ task arriba { } Int bam_size_gb = ceil(size(bam, "GiB")) - Int disk_size_gb = bam_size_gb - + ceil(size(gtf, "GiB")) - + ceil(size(reference_fasta_gz, "GiB")) - + modify_disk_size_gb + Int disk_size_gb = bam_size_gb + ceil(size(gtf, "GiB")) + ceil(size(reference_fasta_gz, + "GiB")) + modify_disk_size_gb Int memory_gb = bam_size_gb + modify_memory_gb command <<< @@ -198,18 +221,15 @@ task arriba { ~{"-d '" + wgs_svs + "'"} \ -D ~{max_genomic_breakpoint_distance} \ -s "~{strandedness}" \ - ~{( - if length(interesting_contigs) > 0 + ~{(if length(interesting_contigs) > 0 then "-i " + sep(",", quote(interesting_contigs)) else "" )} \ - ~{( - if length(viral_contigs) > 0 + ~{(if length(viral_contigs) > 0 then "-v " + sep(",", quote(viral_contigs)) else "" )} \ - ~{( - if length(disable_filters) > 0 + ~{(if length(disable_filters) > 0 then "-f " + sep(",", quote(disable_filters)) else "" )} \ @@ -232,9 +252,18 @@ task arriba { -l ~{max_itd_length} \ -z ~{min_itd_allele_fraction} \ -Z ~{min_itd_supporting_reads} \ - ~{if mark_duplicates then "" else "-u"} \ - ~{if report_additional_columns then "-X" else ""} \ - ~{if fill_gaps then "-I" else ""} + ~{if mark_duplicates + then "" + else "-u" + } \ + ~{if report_additional_columns + then "-X" + else "" + } \ + ~{if fill_gaps + then "-I" + else "" + } >>> output { @@ -255,7 +284,7 @@ task arriba_tsv_to_vcf { meta { description: "Convert Arriba TSV format fusions to VCF format." outputs: { - fusions_vcf: "Output file of fusions in VCF format" + fusions_vcf: "Output file of fusions in VCF format", } } @@ -274,9 +303,7 @@ task arriba_tsv_to_vcf { } Int input_size_gb = ceil(size(fusions, "GiB")) - Int disk_size_gb = ceil(input_size_gb) - + (ceil(size(reference_fasta, "GiB")) * 3) - + modify_disk_size_gb + Int disk_size_gb = ceil(input_size_gb) + (ceil(size(reference_fasta, "GiB")) * 3) + modify_disk_size_gb command <<< set -euo pipefail @@ -356,7 +383,7 @@ task arriba_annotate_exon_numbers { meta { description: "Annotate fusions with exon numbers." outputs: { - fusion_tsv: "TSV file with fusions annotated with exon numbers" + fusion_tsv: "TSV file with fusions annotated with exon numbers", } } diff --git a/tools/bwa.wdl b/tools/bwa.wdl index dbba3f2e7..3a7b3cb0d 100644 --- a/tools/bwa.wdl +++ b/tools/bwa.wdl @@ -1,12 +1,11 @@ ## [Homepage](https://github.com/lh3/bwa) - version 1.1 task bwa_aln { meta { description: "Maps Single-End FASTQ files to BAM format using bwa aln" outputs: { - bam: "Aligned BAM format file" + bam: "Aligned BAM format file", } } @@ -36,9 +35,7 @@ task bwa_aln { File fastq File bwa_db_tar_gz String read_group - String prefix = sub( - basename(fastq), - "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$", + String prefix = sub(basename(fastq), "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$", "" # Once replacing with capturing groups is supported, replace with group 3 ) Boolean use_all_cores = false @@ -50,8 +47,7 @@ task bwa_aln { Float input_fastq_size = size(fastq, "GiB") Float reference_size = size(bwa_db_tar_gz, "GiB") - Int disk_size_gb = ( - ceil((input_fastq_size + reference_size) * 2) + 10 + modify_disk_size_gb + Int disk_size_gb = (ceil((input_fastq_size + reference_size) * 2) + 10 + modify_disk_size_gb ) command <<< @@ -98,7 +94,7 @@ task bwa_aln_pe { meta { description: "Maps Paired-End FASTQ files to BAM format using bwa aln" outputs: { - bam: "Aligned BAM format file" + bam: "Aligned BAM format file", } } @@ -132,11 +128,8 @@ task bwa_aln_pe { File read_two_fastq_gz File bwa_db_tar_gz String read_group - String prefix = sub( - basename(read_one_fastq_gz), - "([_\\.][rR][12])?(\\.subsampled)?\\.(fastq|fq)(\\.gz)?$", - "" - ) + String prefix = sub(basename(read_one_fastq_gz), "([_\\.][rR][12])?(\\.subsampled)?\\.(fastq|fq)(\\.gz)?$", + "") Boolean use_all_cores = false Int ncpu = 4 Int modify_disk_size_gb = 0 @@ -144,12 +137,10 @@ task bwa_aln_pe { String output_bam = prefix + ".bam" - Float input_fastq_size = ( - size(read_one_fastq_gz, "GiB") + size(read_two_fastq_gz, "GiB") - ) + Float input_fastq_size = (size(read_one_fastq_gz, "GiB") + size(read_two_fastq_gz, "GiB" + )) Float reference_size = size(bwa_db_tar_gz, "GiB") - Int disk_size_gb = ( - ceil((input_fastq_size + reference_size) * 2) + 5 + modify_disk_size_gb + Int disk_size_gb = (ceil((input_fastq_size + reference_size) * 2) + 5 + modify_disk_size_gb ) command <<< @@ -202,7 +193,7 @@ task bwa_mem { meta { description: "Maps FASTQ files to BAM format using bwa mem" outputs: { - bam: "Aligned BAM format file" + bam: "Aligned BAM format file", } } @@ -230,11 +221,8 @@ task bwa_mem { File bwa_db_tar_gz String read_group File? read_two_fastq_gz - String prefix = sub( - basename(read_one_fastq_gz), - "([_\\.][rR][12])?(\\.subsampled)?\\.(fastq|fq)(\\.gz)?$", - "" - ) + String prefix = sub(basename(read_one_fastq_gz), "([_\\.][rR][12])?(\\.subsampled)?\\.(fastq|fq)(\\.gz)?$", + "") Boolean use_all_cores = false Int ncpu = 4 Int modify_disk_size_gb = 0 @@ -242,11 +230,10 @@ task bwa_mem { String output_bam = prefix + ".bam" - Float input_fastq_size = size(read_one_fastq_gz, "GiB") - + size(read_two_fastq_gz, "GiB") + Float input_fastq_size = size(read_one_fastq_gz, "GiB") + size(read_two_fastq_gz, "GiB" + ) Float reference_size = size(bwa_db_tar_gz, "GiB") - Int disk_size_gb = ( - ceil((input_fastq_size + reference_size) * 2) + 10 + modify_disk_size_gb + Int disk_size_gb = (ceil((input_fastq_size + reference_size) * 2) + 10 + modify_disk_size_gb ) command <<< @@ -271,9 +258,10 @@ task bwa_mem { -R "~{read_group}" \ bwa_db/"$PREFIX" \ "~{basename(read_one_fastq_gz)}" \ - ~{( - if defined(read_two_fastq_gz) - then "'" + basename(select_first([read_two_fastq_gz])) + "'" + ~{(if defined(read_two_fastq_gz) + then "'" + basename(select_first([ + read_two_fastq_gz, + ])) + "'" else "" )} \ | samtools view --no-PG --threads "$samtools_cores" -hb - \ @@ -281,9 +269,10 @@ task bwa_mem { rm -r bwa_db rm "~{basename(read_one_fastq_gz)}" - ~{( - if defined(read_two_fastq_gz) - then "rm '" + basename(select_first([read_two_fastq_gz])) + "'" + ~{(if defined(read_two_fastq_gz) + then "rm '" + basename(select_first([ + read_two_fastq_gz, + ])) + "'" else "" )} >>> @@ -305,7 +294,7 @@ task build_bwa_db { meta { description: "Creates a BWA index and returns it as a compressed tar archive" outputs: { - bwa_db_tar_gz: "Tarballed bwa reference files" + bwa_db_tar_gz: "Tarballed bwa reference files", } } diff --git a/tools/deeptools.wdl b/tools/deeptools.wdl index 91b5e1b06..d30738abc 100755 --- a/tools/deeptools.wdl +++ b/tools/deeptools.wdl @@ -1,12 +1,11 @@ ## [Homepage](https://deeptools.readthedocs.io/en/develop/index.html) - version 1.1 task bam_coverage { meta { description: "Generates a BigWig coverage track using bamCoverage from DeepTools" outputs: { - bigwig: "BigWig format coverage file" + bigwig: "BigWig format coverage file", } } diff --git a/tools/fastp.wdl b/tools/fastp.wdl index 42b78e34e..64d1ea545 100644 --- a/tools/fastp.wdl +++ b/tools/fastp.wdl @@ -95,9 +95,7 @@ task fastp { input { File read_one_fastq File? read_two_fastq - String prefix = sub( - basename(read_one_fastq), - "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$", + String prefix = sub(basename(read_one_fastq), "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$", "" # Once replacing with capturing groups is supported, replace with group 3 ) + ".trimmed" Boolean output_fastq = true @@ -114,7 +112,9 @@ task fastp { Boolean phred64 = false Boolean use_all_cores = false Int first_n_reads = 0 - Int duplicate_accuracy = if deduplicate then 3 else 1 + Int duplicate_accuracy = if deduplicate + then 3 + else 1 Int n_base_limit = 5 Int qualified_quality = 15 Int unqualified_percent = 40 @@ -148,9 +148,9 @@ task fastp { Float input_size = size(read_one_fastq, "GB") + size(read_two_fastq, "GB") Int disk_size_gb = ceil(input_size) * 2 + 10 + modify_disk_size_gb - command <<< + command <<< set -euo pipefail - + n_cores=~{ncpu} if ~{use_all_cores}; then n_cores=$(nproc) @@ -159,41 +159,71 @@ task fastp { fastp \ -i "~{read_one_fastq}" \ ~{"-I '" + read_two_fastq + "'"} \ - ~{( - if output_fastq - then "-o '" + ( - if defined(read_two_fastq) + ~{(if output_fastq + then "-o '" + (if defined(read_two_fastq) then "~{prefix}.R1.fastq.gz" else "~{prefix}.fastq.gz" ) + "'" else "" )} \ - ~{( - if (defined(read_two_fastq) && output_fastq) + ~{(if (defined(read_two_fastq) && output_fastq) then "-O '" + prefix + ".R2.fastq.gz'" else "" )} \ --reads_to_process ~{first_n_reads} \ - ~{if deduplicate then "--dedup" else ""} \ + ~{if deduplicate + then "--dedup" + else "" + } \ --dup_calc_accuracy ~{duplicate_accuracy} \ - ~{if disable_duplicate_eval then "--dont_eval_duplication" else ""} \ - ~{if phred64 then "--phred64" else ""} \ - ~{if disable_quality_filter then "--disable_quality_filtering" else ""} \ + ~{if disable_duplicate_eval + then "--dont_eval_duplication" + else "" + } \ + ~{if phred64 + then "--phred64" + else "" + } \ + ~{if disable_quality_filter + then "--disable_quality_filtering" + else "" + } \ -n ~{n_base_limit} \ -q ~{qualified_quality} \ -u ~{unqualified_percent} \ -e ~{average_quality} \ - ~{if disable_length_filter then "--disable_length_filtering" else ""} \ + ~{if disable_length_filter + then "--disable_length_filtering" + else "" + } \ -l ~{length_required} \ --length_limit ~{length_limit} \ - ~{if enable_complexity_filter then "-y" else ""} \ + ~{if enable_complexity_filter + then "-y" + else "" + } \ -Y ~{complexity_threshold} \ - ~{if enable_overrepresentation_eval then "-p" else ""} \ + ~{if enable_overrepresentation_eval + then "-p" + else "" + } \ -P ~{overrepresentation_sampling} \ - ~{if disable_adapter_trimming then "--disable_adapter_trimming" else ""} \ - ~{if enable_pe_adapter_trimming then "-2" else ""} \ - ~{if allow_gap_overlap_trimming then "--allow_gap_overlap_trimming" else ""} \ - ~{if enable_base_correction then "-c" else ""} \ + ~{if disable_adapter_trimming + then "--disable_adapter_trimming" + else "" + } \ + ~{if enable_pe_adapter_trimming + then "-2" + else "" + } \ + ~{if allow_gap_overlap_trimming + then "--allow_gap_overlap_trimming" + else "" + } \ + ~{if enable_base_correction + then "-c" + else "" + } \ --overlap_len_require ~{overlap_len_require} \ --overlap_diff_limit ~{overlap_diff_limit} \ --overlap_diff_percent_limit ~{overlap_diff_percent_limit} \ @@ -219,8 +249,7 @@ task fastp { runtime { cpu: ncpu - memory: ( - if disable_duplicate_eval + memory: (if disable_duplicate_eval then "4 GB" else dup_acc_to_mem[duplicate_accuracy] ) diff --git a/tools/fastqc.wdl b/tools/fastqc.wdl index cf6fe238c..06aee4034 100755 --- a/tools/fastqc.wdl +++ b/tools/fastqc.wdl @@ -1,5 +1,4 @@ ## [Homepage](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) - version 1.1 task fastqc { diff --git a/tools/fq.wdl b/tools/fq.wdl index e5eab4763..b353597f2 100755 --- a/tools/fq.wdl +++ b/tools/fq.wdl @@ -1,5 +1,4 @@ ## [Homepage](https://github.com/stjude-rust-labs/fq) - version 1.1 task fqlint { @@ -67,9 +66,7 @@ task fqlint { Float read1_size = size(read_one_fastq, "GiB") Float read2_size = size(read_two_fastq, "GiB") - Int memory_gb = ( - ceil((read1_size + read2_size) * 0.25) + 1 + modify_memory_gb - ) + Int memory_gb = (ceil((read1_size + read2_size) * 0.25) + 1 + modify_memory_gb) Int disk_size_gb = ceil((read1_size + read2_size) * 2) + modify_disk_size_gb @@ -78,7 +75,10 @@ task fqlint { ~{sep(" ", prefix("--disable-validator ", squote(disable_validator_codes)))} \ --single-read-validation-level "~{single_read_validation_level}" \ --paired-read-validation-level "~{paired_read_validation_level}" \ - --lint-mode ~{if panic then "panic" else "log"} \ + --lint-mode ~{if panic + then "panic" + else "log" + } \ "~{read_one_fastq}" \ ~{"'" + read_two_fastq + "'"} >>> @@ -123,9 +123,7 @@ task subsample { input { File read_one_fastq File? read_two_fastq - String prefix = sub( - basename(read_one_fastq), - "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$", + String prefix = sub(basename(read_one_fastq), "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$", "" # Once replacing with capturing groups is supported, replace with group 3 ) Float probability = 1.0 @@ -138,12 +136,13 @@ task subsample { Int disk_size_gb = ceil((read1_size + read2_size) * 2) + modify_disk_size_gb - String probability_arg = ( - if (probability < 1.0 && probability > 0) + String probability_arg = (if (probability < 1.0 && probability > 0) then "-p ~{probability}" else "" ) - String record_count_arg = if (record_count > 0) then "-n ~{record_count}" else "" + String record_count_arg = if (record_count > 0) + then "-n ~{record_count}" + else "" String r1_dst = prefix + ".R1.subsampled.fastq.gz" String r2_dst = prefix + ".R2.subsampled.fastq.gz" @@ -154,7 +153,10 @@ task subsample { ~{probability_arg} \ ~{record_count_arg} \ --r1-dst "~{r1_dst}" \ - ~{if defined(read_two_fastq) then "--r2-dst '" + r2_dst + "'" else ""} \ + ~{if defined(read_two_fastq) + then "--r2-dst '" + r2_dst + "'" + else "" + } \ "~{read_one_fastq}" \ ~{"'" + read_two_fastq + "'"} >>> diff --git a/tools/gatk4.wdl b/tools/gatk4.wdl index fe0bac91f..e4a709e18 100644 --- a/tools/gatk4.wdl +++ b/tools/gatk4.wdl @@ -1,5 +1,4 @@ ## [Homepage](https://software.broadinstitute.org/gatk) - version 1.1 task split_n_cigar_reads { @@ -13,7 +12,7 @@ task split_n_cigar_reads { } } - parameter_meta { + parameter_meta { bam: "Input BAM format file to with unsplit reads containing Ns in their CIGAR strings." bam_index: "BAM index file corresponding to the input BAM" fasta: "Reference genome in FASTA format. Must be uncompressed." @@ -37,23 +36,21 @@ task split_n_cigar_reads { Int ncpu = 8 } - Int disk_size_gb = ceil(size(bam, "GB") + 1) * 3 - + ceil(size(fasta, "GB")) - + modify_disk_size_gb + Int disk_size_gb = ceil(size(bam, "GB") + 1) * 3 + ceil(size(fasta, "GB")) + modify_disk_size_gb Int java_heap_size = ceil(memory_gb * 0.9) command <<< - set -euo pipefail - - gatk \ - --java-options "-Xms4000m -Xmx~{java_heap_size}g" \ - SplitNCigarReads \ - -R "~{fasta}" \ - -I "~{bam}" \ - -O "~{prefix}.bam" \ - -OBM true - # GATK is unreasonable and uses the plain ".bai" suffix. - mv "~{prefix}.bai" "~{prefix}.bam.bai" + set -euo pipefail + + gatk \ + --java-options "-Xms4000m -Xmx~{java_heap_size}g" \ + SplitNCigarReads \ + -R "~{fasta}" \ + -I "~{bam}" \ + -O "~{prefix}.bam" \ + -OBM true + # GATK is unreasonable and uses the plain ".bai" suffix. + mv "~{prefix}.bai" "~{prefix}.bam.bai" >>> output { @@ -76,11 +73,11 @@ task base_recalibrator { description: "Generates recalibration report for base quality score recalibration." external_help: "https://gatk.broadinstitute.org/hc/en-us/articles/360036897372-BaseRecalibratorSpark-BETA" outputs: { - recalibration_report: "Recalibration report file" + recalibration_report: "Recalibration report file", } } - parameter_meta { + parameter_meta { bam: "Input BAM format file on which to recabilbrate base quality scores" bam_index: "BAM index file corresponding to the input BAM" fasta: "Reference genome in FASTA format" @@ -114,23 +111,22 @@ task base_recalibrator { Int memory_gb = 25 Int modify_disk_size_gb = 0 Int ncpu = 4 - } + } - Int disk_size_gb = ceil(size(bam, "GB") + 1) * 3 - + ceil(size(fasta, "GB")) - + modify_disk_size_gb + Int disk_size_gb = ceil(size(bam, "GB") + 1) * 3 + ceil(size(fasta, "GB")) + modify_disk_size_gb Int java_heap_size = ceil(memory_gb * 0.9) #@ except: LineWidth command <<< # shellcheck disable=SC2102 gatk \ - --java-options "-XX:GCTimeLimit=50 -XX:GCHeapFreeLimit=10 -Xms4000m -Xmx~{java_heap_size}g" \ + --java-options "-XX:GCTimeLimit=50 -XX:GCHeapFreeLimit=10 -Xms4000m -Xmx~{ + java_heap_size + }g" \ BaseRecalibratorSpark \ -R "~{fasta}" \ -I "~{bam}" \ - ~{( - if use_original_quality_scores + ~{(if use_original_quality_scores then "--use-original-qualities" else "" )} \ @@ -163,7 +159,7 @@ task apply_bqsr { } } - parameter_meta { + parameter_meta { bam: "Input BAM format file on which to apply base quality score recalibration" bam_index: "BAM index file corresponding to the input BAM" recalibration_report: "Recalibration report file" @@ -194,11 +190,16 @@ task apply_bqsr { # shellcheck disable=SC2102 gatk \ - --java-options "-XX:GCTimeLimit=50 -XX:GCHeapFreeLimit=10 -Xms3000m -Xmx~{java_heap_size}g" \ + --java-options "-XX:GCTimeLimit=50 -XX:GCHeapFreeLimit=10 -Xms3000m -Xmx~{ + java_heap_size + }g" \ ApplyBQSRSpark \ --spark-master local[~{ncpu}] \ -I "~{bam}" \ - ~{if use_original_quality_scores then "--use-original-qualities" else "" } \ + ~{if use_original_quality_scores + then "--use-original-qualities" + else "" + } \ -O "~{prefix}.bqsr.bam" \ --bqsr-recal-file "~{recalibration_report}" >>> @@ -227,7 +228,7 @@ task haplotype_caller { } } - parameter_meta { + parameter_meta { bam: "Input BAM format file on which to call variants" bam_index: "BAM index file corresponding to the input BAM" interval_list: { @@ -269,10 +270,7 @@ task haplotype_caller { Int ncpu = 4 } - Int disk_size_gb = ceil(size(bam, "GB") * 2) - + 30 - + ceil(size(fasta, "GB")) - + modify_disk_size_gb + Int disk_size_gb = ceil(size(bam, "GB") * 2) + 30 + ceil(size(fasta, "GB")) + modify_disk_size_gb Int java_heap_size = ceil(memory_gb * 0.9) #@ except: LineWidth @@ -284,7 +282,10 @@ task haplotype_caller { -I "~{bam}" \ -L "~{interval_list}" \ -O "~{prefix}.vcf.gz" \ - ~{if use_soft_clipped_bases then "" else "--dont-use-soft-clipped-bases"} \ + ~{if use_soft_clipped_bases + then "" + else "--dont-use-soft-clipped-bases" + } \ --standard-min-confidence-threshold-for-calling ~{stand_call_conf} \ --dbsnp "~{dbSNP_vcf}" >>> @@ -313,7 +314,7 @@ task variant_filtration { } } - parameter_meta { + parameter_meta { vcf: "Input VCF format file to filter" vcf_index: "VCF index file corresponding to the input VCF" fasta: "Reference genome in FASTA format" @@ -340,8 +341,14 @@ task variant_filtration { File fasta File fasta_index File dict - Array[String] filter_names = ["FS", "QD"] - Array[String] filter_expressions = ["FS > 30.0", "QD < 2.0"] + Array[String] filter_names = [ + "FS", + "QD", + ] + Array[String] filter_expressions = [ + "FS > 30.0", + "QD < 2.0", + ] String prefix = basename(vcf, ".vcf.gz") Int cluster = 3 Int window = 35 @@ -377,7 +384,7 @@ task variant_filtration { } task mark_duplicates_spark { - meta { + meta { description: "Marks duplicate reads in the input BAM file using GATK's Spark implementation of Picard's MarkDuplicates." external_help: "https://gatk.broadinstitute.org/hc/en-us/articles/13832682540699-MarkDuplicatesSpark" outputs: { @@ -427,7 +434,7 @@ task mark_duplicates_spark { group: "Common", } optical_distance: { - description: "Maximum distance between read coordinates to consider them optical duplicates. If `0`, then optical duplicate marking is disabled.", + description: "Maximum distance between read coordinates to consider them optical duplicates. If `0`, then optical duplicate marking is disabled.", help: "Suggested settings of 100 for unpatterned versions of the Illumina platform (e.g. HiSeq) or 2500 for patterned flowcell models (e.g. NovaSeq). Calculation of distance depends on coordinate data embedded in the read names, typically produced by the Illumina sequencing machines.", warning: "Optical duplicate detection will not work on non-standard names without modifying `read_name_regex`.", } @@ -452,13 +459,10 @@ task mark_duplicates_spark { Float bam_size = size(bam, "GiB") Int memory_gb = min(ceil(bam_size + 15), 50) + modify_memory_gb - Int disk_size_gb = ( - ( - if create_bam - then ceil((bam_size * 2) + 10) - else ceil(bam_size + 10) - ) + modify_disk_size_gb - ) + Int disk_size_gb = ((if create_bam + then ceil((bam_size * 2) + 10) + else ceil(bam_size + 10) + ) + modify_disk_size_gb) Int java_heap_size = ceil(memory_gb * 0.9) @@ -470,12 +474,16 @@ task mark_duplicates_spark { --java-options "-Xmx~{java_heap_size}g" \ -I "~{bam}" \ -M "~{prefix}.metrics.txt" \ - -O "~{if create_bam then prefix + ".bam" else "/dev/null"}" \ + -O "~{if create_bam + then prefix + ".bam" + else "/dev/null" + }" \ --create-output-bam-index ~{create_bam} \ --read-validation-stringency "~{validation_stringency}" \ --duplicate-scoring-strategy "~{duplicate_scoring_strategy}" \ - --read-name-regex '~{ - if (optical_distance > 0) then read_name_regex else "null" + --read-name-regex '~{if (optical_distance > 0) + then read_name_regex + else "null" }' \ --duplicate-tagging-policy "~{tagging_policy}" \ --optical-duplicate-pixel-distance ~{optical_distance} \ diff --git a/tools/htseq.wdl b/tools/htseq.wdl index bcaca9ce0..cb8b16014 100755 --- a/tools/htseq.wdl +++ b/tools/htseq.wdl @@ -1,5 +1,4 @@ ## [Homepage](https://github.com/htseq/htseq) - version 1.1 task count { @@ -9,7 +8,7 @@ task count { feature_counts: { description: "A two column TSV file. First column is feature names and second column is counts.", help: "Presence of a header is determined by the `include_custom_header` parameter.", - } + }, } } @@ -89,10 +88,14 @@ task count { Float bam_size = size(bam, "GiB") Float gtf_size = size(gtf, "GiB") - Int memory_gb = (if pos_sorted then ceil(bam_size) + 4 else 4) + modify_memory_gb + Int memory_gb = (if pos_sorted + then ceil(bam_size) + 4 + else 4 + ) + modify_memory_gb - Int disk_size_gb = ceil( - (bam_size + gtf_size) * if pos_sorted then 4 else 1 + Int disk_size_gb = ceil((bam_size + gtf_size) * if pos_sorted + then 4 + else 1 ) + 10 + modify_disk_size_gb command <<< @@ -105,16 +108,24 @@ task count { # 9223372036854776000 == max 64 bit Float htseq-count -f bam \ --max-reads-in-buffer 9223372036854776000 \ - -r ~{if pos_sorted then "pos" else "name"} \ + -r ~{if pos_sorted + then "pos" + else "name" + } \ -s "~{strandedness}" \ -a ~{minaqual} \ -t "~{feature_type}" \ -m "~{mode}" \ -i "~{idattr}" \ - --nonunique ~{if nonunique then "all" else "none"} \ - --secondary-alignments ~{if secondary_alignments then "score" else "ignore"} \ - --supplementary-alignments ~{( - if supplementary_alignments + --nonunique ~{if nonunique + then "all" + else "none" + } \ + --secondary-alignments ~{if secondary_alignments + then "score" + else "ignore" + } \ + --supplementary-alignments ~{(if supplementary_alignments then "score" else "ignore" )} \ @@ -139,7 +150,7 @@ task calc_tpm { meta { description: "Given a feature counts file and a feature lengths file, calculate Transcripts Per Million (TPM)" outputs: { - tpm_file: "Transcripts Per Million (TPM) file. A two column headered TSV file." + tpm_file: "Transcripts Per Million (TPM) file. A two column headered TSV file.", } } @@ -171,7 +182,10 @@ task calc_tpm { "~{counts}" \ "~{feature_lengths}" \ "~{outfile_name}" \ - ~{if has_header then "--counts_has_header" else ""} + ~{if has_header + then "--counts_has_header" + else "" + } >>> output { diff --git a/tools/kraken2.wdl b/tools/kraken2.wdl index b501ffa48..5d2081918 100644 --- a/tools/kraken2.wdl +++ b/tools/kraken2.wdl @@ -1,5 +1,4 @@ ## [Homepage](https://github.com/DerrickWood/kraken2) - version 1.1 task download_taxonomy { @@ -9,7 +8,7 @@ task download_taxonomy { taxonomy: { description: "The NCBI taxonomy, which is needed by the `build_db` task.", warning: "This output is not human-readable or meant for anything other than building a Kraken2 database.", - } + }, } } @@ -27,7 +26,10 @@ task download_taxonomy { set -euo pipefail kraken2-build --download-taxonomy \ - ~{if protein then "--protein" else ""} \ + ~{if protein + then "--protein" + else "" + } \ --use-ftp \ --db "~{db_name}" 2>&1 \ | awk '/gunzip:/ { print; exit 42 } !/gunzip:/ { print }' 1>&2 @@ -58,7 +60,7 @@ task download_library { library: { description: "A library of reference genomes, which is needed by the `build_db` task.", warning: "This output is not human-readable or meant for anything other than building a Kraken2 database.", - } + }, } } @@ -97,21 +99,24 @@ task download_library { String db_name = "kraken2_" + library_name + "_library" #@ except: ExpressionSpacing - Int disk_size_gb = ( - ( - if library_name == "bacteria" then 300 - else if library_name == "nr" then 600 - else if library_name == "nt" then 2500 - else 25 - ) + modify_disk_size_gb - ) + Int disk_size_gb = ((if library_name == "bacteria" + then 300 + else if library_name == "nr" + then 600 + else if library_name == "nt" + then 2500 + else 25 + ) + modify_disk_size_gb) command <<< set -euo pipefail kraken2-build --download-library \ "~{library_name}" \ - ~{if protein then "--protein" else ""} \ + ~{if protein + then "--protein" + else "" + } \ --use-ftp \ --db "~{db_name}" 2>&1 \ | awk '/gunzip:/ { print; exit 42 } !/gunzip:/ { print }' 1>&2 @@ -140,7 +145,7 @@ task create_library_from_fastas { custom_library: { description: "Kraken2 compatible library, which is needed by the `build_db` task.", warning: "This output is not human-readable or meant for anything other than building a Kraken2 database.", - } + }, } } @@ -172,7 +177,10 @@ task create_library_from_fastas { while read -r fasta; do gunzip -c "$fasta" > tmp.fa kraken2-build \ - ~{if protein then "--protein" else ""} \ + ~{if protein + then "--protein" + else "" + } \ --add-to-library tmp.fa \ --db "~{db_name}" done < fastas.txt @@ -200,7 +208,7 @@ task build_db { meta { description: "Builds a custom Kraken2 database" outputs: { - built_db: "A complete Kraken2 database" + built_db: "A complete Kraken2 database", } } @@ -238,9 +246,15 @@ task build_db { String db_name = "kraken2_db" Boolean protein = false Boolean use_all_cores = false - Int kmer_len = if protein then 15 else 35 - Int minimizer_len = if protein then 12 else 31 - Int minimizer_spaces = if protein then 0 else 7 + Int kmer_len = if protein + then 15 + else 35 + Int minimizer_len = if protein + then 12 + else 31 + Int minimizer_spaces = if protein + then 0 + else 7 Int max_db_size_gb = -1 Int ncpu = 4 Int modify_memory_gb = 0 @@ -249,13 +263,10 @@ task build_db { Float tarballs_size = size(tarballs, "GiB") Int disk_size_gb = ceil(tarballs_size * 6) + 10 + modify_disk_size_gb - Int memory_gb = ( - ( - if (max_db_size_gb > 0) - then ceil(max_db_size_gb * 1.2) - else ceil(tarballs_size * 2) - ) + modify_memory_gb - ) + Int memory_gb = ((if (max_db_size_gb > 0) + then ceil(max_db_size_gb * 1.2) + else ceil(tarballs_size * 2) + ) + modify_memory_gb) String max_db_size_bytes = "~{max_db_size_gb}000000000" @@ -277,12 +288,14 @@ task build_db { >&2 echo "*** start DB build ***" kraken2-build --build \ - ~{if protein then "--protein" else ""} \ + ~{if protein + then "--protein" + else "" + } \ --kmer-len ~{kmer_len} \ --minimizer-len ~{minimizer_len} \ --minimizer-spaces ~{minimizer_spaces} \ - ~{( - if (max_db_size_gb > 0) + ~{(if (max_db_size_gb > 0) then "--max-db-size '" + max_db_size_bytes + "'" else "" )} \ @@ -359,9 +372,7 @@ task kraken { File read_two_fastq_gz #@ except: InputName File db - String prefix = sub( - basename(read_one_fastq_gz), - "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$", + String prefix = sub(basename(read_one_fastq_gz), "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$", "" # Once replacing with capturing groups is supported, replace with group 3 ) Boolean store_sequences = false @@ -376,11 +387,9 @@ task kraken { Float db_size = size(db, "GiB") Float read1_size = size(read_one_fastq_gz, "GiB") Float read2_size = size(read_two_fastq_gz, "GiB") - Int disk_size_gb_calculation = ( - ceil((db_size * 2) + read1_size + read2_size) + 10 + modify_disk_size_gb + Int disk_size_gb_calculation = (ceil((db_size * 2) + read1_size + read2_size) + 10 + modify_disk_size_gb ) - Int disk_size_gb = ( - if store_sequences + Int disk_size_gb = (if store_sequences then disk_size_gb_calculation + ceil(read1_size + read2_size) else disk_size_gb_calculation ) @@ -403,12 +412,18 @@ task kraken { kraken2 --db kraken2_db/ \ --paired \ - --output ~{if store_sequences then "'" + out_sequences + "'" else "-"} \ + --output ~{if store_sequences + then "'" + out_sequences + "'" + else "-" + } \ --threads "$n_cores" \ --minimum-base-quality ~{min_base_quality} \ --report "~{out_report}" \ --report-zero-counts \ - ~{if use_names then "--use-names" else ""} \ + ~{if use_names + then "--use-names" + else "" + } \ "~{read_one_fastq_gz}" \ "~{read_two_fastq_gz}" diff --git a/tools/librarian.wdl b/tools/librarian.wdl index faa136618..88b2fd078 100644 --- a/tools/librarian.wdl +++ b/tools/librarian.wdl @@ -1,5 +1,4 @@ ## # librarian - version 1.1 task librarian { @@ -24,18 +23,14 @@ task librarian { input { File read_one_fastq - String prefix = sub( - basename(read_one_fastq), - "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$", + String prefix = sub(basename(read_one_fastq), "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$", "" # Once replacing with capturing groups is supported, replace with group 3 ) + ".librarian" Int modify_disk_size_gb = 0 } Float read1_size = size(read_one_fastq, "GiB") - Int disk_size_gb = ( - ceil(read1_size) + 10 + modify_disk_size_gb - ) + Int disk_size_gb = (ceil(read1_size) + 10 + modify_disk_size_gb) command <<< set -euo pipefail diff --git a/tools/md5sum.wdl b/tools/md5sum.wdl index e967e55c3..1e79260a0 100755 --- a/tools/md5sum.wdl +++ b/tools/md5sum.wdl @@ -1,12 +1,11 @@ ## [Homepage](https://github.com/coreutils/coreutils) - version 1.1 task compute_checksum { meta { description: "Generates an MD5 checksum for the input file" outputs: { - md5sum: "STDOUT of the `md5sum` command that has been redirected to a file" + md5sum: "STDOUT of the `md5sum` command that has been redirected to a file", } } diff --git a/tools/mosdepth.wdl b/tools/mosdepth.wdl index 69b81d1ac..fdf4775c9 100644 --- a/tools/mosdepth.wdl +++ b/tools/mosdepth.wdl @@ -1,5 +1,4 @@ ## [Homepage](https://github.com/brentp/mosdepth) - version 1.1 task coverage { @@ -53,7 +52,10 @@ task coverage { -n \ ~{"-b '" + coverage_bed + "'"} \ -Q ~{min_mapping_quality} \ - ~{if (use_fast_mode) then "-x" else ""} \ + ~{if (use_fast_mode) + then "-x" + else "" + } \ "~{prefix}" \ "$CWD_BAM" diff --git a/tools/ngsderive.wdl b/tools/ngsderive.wdl index 3abaac343..72cb92ced 100644 --- a/tools/ngsderive.wdl +++ b/tools/ngsderive.wdl @@ -1,5 +1,4 @@ ## [Homepage](https://github.com/stjudecloud/ngsderive) - version 1.1 task strandedness { @@ -61,7 +60,10 @@ task strandedness { ln -s "~{gene_model}" "$CWD_GFF" ngsderive strandedness --verbose \ - ~{if split_by_rg then "--split-by-rg" else ""} \ + ~{if split_by_rg + then "--split-by-rg" + else "" + } \ -m ~{min_reads_per_gene} \ -n ~{num_genes} \ -q ~{min_mapq} \ @@ -398,21 +400,30 @@ task endedness { } Float bam_size = size(bam, "GiB") - Int memory_gb = ( - if calc_rpt - then ( - ceil(bam_size * 2.5) + 4 + modify_memory_gb - ) + Int memory_gb = (if calc_rpt + then (ceil(bam_size * 2.5) + 4 + modify_memory_gb) else 4 ) Int disk_size_gb = ceil(bam_size) + 10 + modify_disk_size_gb command <<< ngsderive endedness --verbose \ - ~{if lenient then "--lenient" else ""} \ - ~{if calc_rpt then "-r" else ""} \ - ~{if round_rpt then "--round-rpt" else ""} \ - ~{if split_by_rg then "--split-by-rg" else ""} \ + ~{if lenient + then "--lenient" + else "" + } \ + ~{if calc_rpt + then "-r" + else "" + } \ + ~{if round_rpt + then "--round-rpt" + else "" + } \ + ~{if split_by_rg + then "--split-by-rg" + else "" + } \ --paired-deviance ~{paired_deviance} \ -n ~{num_reads} \ "~{bam}" \ diff --git a/tools/picard.wdl b/tools/picard.wdl index 8f35947d1..3c8f7d4ce 100755 --- a/tools/picard.wdl +++ b/tools/picard.wdl @@ -1,5 +1,4 @@ ## [Homepage](https://broadinstitute.github.io/picard/) - version 1.1 task mark_duplicates { @@ -84,13 +83,10 @@ task mark_duplicates { Float bam_size = size(bam, "GiB") Int memory_gb = min(ceil(bam_size + 12), 50) + modify_memory_gb - Int disk_size_gb = ( - ( - if create_bam - then ceil((bam_size * 2) + 10) - else ceil(bam_size + 10) - ) + modify_disk_size_gb - ) + Int disk_size_gb = ((if create_bam + then ceil((bam_size * 2) + 10) + else ceil(bam_size + 10) + ) + modify_disk_size_gb) Int java_heap_size = ceil(memory_gb * 0.9) @@ -100,13 +96,17 @@ task mark_duplicates { picard -Xmx~{java_heap_size}g MarkDuplicates \ -I "~{bam}" \ --METRICS_FILE "~{prefix}.metrics.txt" \ - -O "~{if create_bam then prefix + ".bam" else "/dev/null"}" \ + -O "~{if create_bam + then prefix + ".bam" + else "/dev/null" + }" \ --CREATE_INDEX ~{create_bam} \ --CREATE_MD5_FILE ~{create_bam} \ --VALIDATION_STRINGENCY "~{validation_stringency}" \ --DUPLICATE_SCORING_STRATEGY "~{duplicate_scoring_strategy}" \ - --READ_NAME_REGEX '~{ - if (optical_distance > 0) then read_name_regex else "null" + --READ_NAME_REGEX '~{if (optical_distance > 0) + then read_name_regex + else "null" }' \ --TAGGING_POLICY "~{tagging_policy}" \ --CLEAR_DT ~{clear_dt} \ @@ -194,9 +194,10 @@ task validate_bam { Int modify_disk_size_gb = 0 } - String mode_arg = if (summary_mode) then "--MODE SUMMARY" else "" - String stringency_arg = ( - if (index_validation_stringency_less_exhaustive) + String mode_arg = if (summary_mode) + then "--MODE SUMMARY" + else "" + String stringency_arg = (if (index_validation_stringency_less_exhaustive) then "--INDEX_VALIDATION_STRINGENCY LESS_EXHAUSTIVE" else "" ) @@ -418,8 +419,10 @@ task merge_sam_files { File merged_bam_md5 = outfile_name + ".md5" } - runtime{ - cpu: if threading then 2 else 1 + runtime { + cpu: if threading + then 2 + else 1 memory: "~{memory_gb} GB" disks: "~{disk_size_gb} GB" container: "quay.io/biocontainers/picard:3.1.1--hdfd78af_0" @@ -504,7 +507,7 @@ task collect_wgs_metrics { wgs_metrics: { description: "Output report of `picard CollectWgsMetrics`", external_help: "https://broadinstitute.github.io/picard/picard-metric-definitions.html#CollectWgsMetrics.WgsMetrics", - } + }, } } @@ -851,8 +854,7 @@ task bam_to_fastq { picard -Xmx~{java_heap_size}g SamToFastq INPUT="~{bam}" \ FASTQ="~{prefix}.R1.fastq" \ - ~{( - if paired + ~{(if paired then "SECOND_END_FASTQ='" + prefix + ".R2.fastq'" else "" )} \ @@ -860,7 +862,10 @@ task bam_to_fastq { VALIDATION_STRINGENCY=SILENT gzip "~{prefix}.R1.fastq" \ - ~{if paired then "'" + prefix + ".R2.fastq'" else ""} + ~{if paired + then "'" + prefix + ".R2.fastq'" + else "" + } >>> output { @@ -868,7 +873,7 @@ task bam_to_fastq { File? read_two_fastq_gz = "~{prefix}.R2.fastq.gz" } - runtime{ + runtime { memory: "~{memory_gb} GB" disks: "~{disk_size_gb} GB" container: "quay.io/biocontainers/picard:3.1.1--hdfd78af_0" @@ -932,7 +937,7 @@ task scatter_interval_list { } } - parameter_meta { + parameter_meta { interval_list: "Input interval list to split" scatter_count: "Number of interval lists to create" subdivision_mode: { @@ -999,7 +1004,7 @@ task create_sequence_dictionary { description: "Creates a sequence dictionary for the input FASTA file using Picard" external_help: "https://gatk.broadinstitute.org/hc/en-us/articles/13832748622491-CreateSequenceDictionary-Picard-" outputs: { - dictionary: "Sequence dictionary produced by `picard CreateSequenceDictionary`." + dictionary: "Sequence dictionary produced by `picard CreateSequenceDictionary`.", } } diff --git a/tools/qualimap.wdl b/tools/qualimap.wdl index 67cd89d14..4aec8447d 100755 --- a/tools/qualimap.wdl +++ b/tools/qualimap.wdl @@ -1,5 +1,4 @@ ## [Homepage](http://qualimap.bioinfo.cipf.es/) - version 1.1 task rnaseq { @@ -41,8 +40,12 @@ task rnaseq { } String out_tar_gz = prefix + ".tar.gz" - String name_sorted_arg = if (name_sorted) then "-s" else "" - String paired_end_arg = if (paired_end) then "-pe" else "" + String name_sorted_arg = if (name_sorted) + then "-s" + else "" + String paired_end_arg = if (paired_end) + then "-pe" + else "" Int java_heap_size = ceil(memory_gb * 0.9) Float bam_size = size(bam, "GiB") @@ -50,13 +53,10 @@ task rnaseq { # Qualimap has an inefficient name sorting algorithm and will # use an excessive amount of storage. - Int disk_size_gb = ( - ( - if name_sorted - then ceil(bam_size + gtf_size + 15) - else ceil(((bam_size + gtf_size) * 12) + 10) - ) + modify_disk_size_gb - ) + Int disk_size_gb = ((if name_sorted + then ceil(bam_size + gtf_size + 15) + else ceil(((bam_size + gtf_size) * 12) + 10) + ) + modify_disk_size_gb) command <<< set -euo pipefail @@ -81,8 +81,7 @@ task rnaseq { output { File raw_summary = "~{prefix}/rnaseq_qc_results.txt" - File raw_coverage - = "~{prefix}/raw_data_qualimapReport/coverage_profile_along_genes_(total).txt" + File raw_coverage = "~{prefix}/raw_data_qualimapReport/coverage_profile_along_genes_(total).txt" File results = out_tar_gz } diff --git a/tools/sambamba.wdl b/tools/sambamba.wdl index 726b4714f..ac1fe349b 100644 --- a/tools/sambamba.wdl +++ b/tools/sambamba.wdl @@ -1,12 +1,11 @@ ## [Homepage](https://lomereiter.github.io/sambamba/) - version 1.1 task index { meta { description: "Creates a `.bai` BAM index for the input BAM" outputs: { - bam_index: "A `.bai` BAM index associated with the input BAM. Filename will be `basename(bam) + '.bai'`." + bam_index: "A `.bai` BAM index associated with the input BAM. Filename will be `basename(bam) + '.bai'`.", } } @@ -63,7 +62,7 @@ task merge { meta { description: "Merges multiple sorted BAMs into a single BAM" outputs: { - merged_bam: "The BAM resulting from merging all the input BAMs" + merged_bam: "The BAM resulting from merging all the input BAMs", } } @@ -123,7 +122,7 @@ task sort { meta { description: "Sorts the input BAM file" outputs: { - sorted_bam: "The input BAM after it has been sorted according to `sort_order`" + sorted_bam: "The input BAM after it has been sorted according to `sort_order`", } } @@ -157,7 +156,10 @@ task sort { sambamba sort \ --nthreads ~{ncpu} \ -o "~{outfile_name}" \ - ~{if queryname_sort then "-n" else ""} \ + ~{if queryname_sort + then "-n" + else "" + } \ "~{bam}" >>> @@ -209,7 +211,10 @@ task markdup { command <<< sambamba markdup \ --nthreads ~{ncpu} \ - ~{if remove_duplicates then "--remove-duplicates" else ""} \ + ~{if remove_duplicates + then "--remove-duplicates" + else "" + } \ "~{bam}" \ "~{prefix}.markdup.bam" \ > "~{prefix}.markdup_log.txt" @@ -234,7 +239,7 @@ task flagstat { meta { description: "Produces a report containing statistics about the alignments based on the bit flags set in the BAM" outputs: { - flagstat_report: "`sambamba flagstat` STDOUT redirected to a file" + flagstat_report: "`sambamba flagstat` STDOUT redirected to a file", } } @@ -275,7 +280,7 @@ task flagstat { >>> output { - File flagstat_report = outfile_name + File flagstat_report = outfile_name } runtime { diff --git a/tools/samtools.wdl b/tools/samtools.wdl index 21a8cfd46..b75864496 100755 --- a/tools/samtools.wdl +++ b/tools/samtools.wdl @@ -1,5 +1,4 @@ ## [Homepage](http://samtools.sourceforge.net/) - version 1.1 import "../data_structures/flag_filter.wdl" @@ -38,7 +37,7 @@ task split { meta { description: "Runs Samtools split on the input BAM file. This splits the BAM by read group into one or more output files." outputs: { - split_bams: "The split BAM files. The extensions will contain read group IDs, and will end in `.bam`." + split_bams: "The split BAM files. The extensions will contain read group IDs, and will end in `.bam`.", } } @@ -131,12 +130,12 @@ task split { rm first_read.sam done fi - + exit $EXITCODE >>> output { - Array[File] split_bams = glob("*.bam") + Array[File] split_bams = glob("*.bam") } runtime { @@ -152,7 +151,7 @@ task flagstat { meta { description: "Produces a `samtools flagstat` report containing statistics about the alignments based on the bit flags set in the BAM" outputs: { - flagstat_report: "`samtools flagstat` STDOUT redirected to a file" + flagstat_report: "`samtools flagstat` STDOUT redirected to a file", } } @@ -195,7 +194,7 @@ task flagstat { >>> output { - File flagstat_report = outfile_name + File flagstat_report = outfile_name } runtime { @@ -210,7 +209,7 @@ task index { meta { description: "Creates a `.bai` BAM index for the input BAM" outputs: { - bam_index: "A `.bai` BAM index associated with the input BAM. Filename will be `basename(bam) + '.bai'`." + bam_index: "A `.bai` BAM index associated with the input BAM. Filename will be `basename(bam) + '.bai'`.", } } @@ -397,7 +396,6 @@ task subsample { fi rm first_read.sam fi - >>> output { @@ -419,7 +417,7 @@ task filter { description: "Filters a BAM based on its bitwise flag value." help: "This task is a wrapper around `samtools view`. This task will fail if there are no reads in the output BAM. This can happen either because the input BAM was empty or because the supplied `bitwise_filter` was too strict. If you want to down-sample a BAM, use the `subsample` task instead." outputs: { - filtered_bam: "BAM file that has been filtered based on the input flags" + filtered_bam: "BAM file that has been filtered based on the input flags", } } @@ -505,7 +503,7 @@ task merge { meta { description: "Merges multiple sorted BAMs into a single BAM" outputs: { - merged_bam: "The BAM resulting from merging all the input BAMs" + merged_bam: "The BAM resulting from merging all the input BAMs", } } @@ -584,11 +582,26 @@ task merge { samtools merge \ --threads "$n_cores" \ ~{"-h \"" + new_header + "\""} \ - ~{if name_sorted then "-n" else ""} \ - ~{if (region != "") then "-R \"" + region + "\"" else ""} \ - ~{if attach_rg then "-r" else ""} \ - ~{if combine_rg then "-c" else ""} \ - ~{if combine_pg then "-p" else ""} \ + ~{if name_sorted + then "-n" + else "" + } \ + ~{if (region != "") + then "-R \"" + region + "\"" + else "" + } \ + ~{if attach_rg + then "-r" + else "" + } \ + ~{if combine_rg + then "-c" + else "" + } \ + ~{if combine_pg + then "-p" + else "" + } \ "~{prefix}.bam" \ "${bams[@]}" @@ -613,7 +626,7 @@ task addreplacerg { meta { description: "Adds or replaces read group tags" outputs: { - tagged_bam: "The transformed input BAM after read group modifications have been applied" + tagged_bam: "The transformed input BAM after read group modifications have been applied", } } @@ -677,8 +690,14 @@ task addreplacerg { --threads "$n_cores" \ ~{sep(" ", prefix("-r ", squote(read_group_line)))} \ ~{"-R \"" + read_group_id + "\""} \ - -m ~{if orphan_only then "orphan_only" else "overwrite_all"} \ - ~{if overwrite_header_record then "-w" else ""} \ + -m ~{if orphan_only + then "orphan_only" + else "overwrite_all" + } \ + ~{if overwrite_header_record + then "-w" + else "" + } \ -o "~{outfile_name}" \ "~{bam}" >>> @@ -700,7 +719,7 @@ task collate { meta { description: "Runs `samtools collate` on the input BAM file. Shuffles and groups reads together by their names." outputs: { - collated_bam: "A collated BAM (reads sharing a name next to each other, no other guarantee of sort order)" + collated_bam: "A collated BAM (reads sharing a name next to each other, no other guarantee of sort order)", } } @@ -751,7 +770,10 @@ task collate { samtools collate \ --threads "$n_cores" \ - ~{if fast_mode then "-f" else ""} \ + ~{if fast_mode + then "-f" + else "" + } \ -o "~{outfile_name}" \ "~{bam}" >>> @@ -854,13 +876,12 @@ task bam_to_fastq { } Float bam_size = size(bam, "GiB") - Int memory_gb = ( - if (collated || !paired_end) + Int memory_gb = (if (collated || !paired_end) then 4 else (ceil(bam_size * 0.4) + 4) ) + modify_memory_gb - Int disk_size_gb = ceil(bam_size * ( - if (retain_collated_bam && !collated && paired_end) + Int disk_size_gb = ceil(bam_size * (if (retain_collated_bam && !collated && paired_end + ) then 5 else 2 )) + 10 + modify_disk_size_gb @@ -878,13 +899,18 @@ task bam_to_fastq { mkfifo bam_pipe if ! ~{collated} && ~{paired_end}; then samtools collate \ - ~{if retain_collated_bam then "" else "-u"} \ + ~{if retain_collated_bam + then "" + else "-u" + } \ --threads "$n_cores" \ - ~{if fast_mode then "-f" else ""} \ + ~{if fast_mode + then "-f" + else "" + } \ -O \ "~{bam}" \ - | tee ~{( - if retain_collated_bam + | tee ~{(if retain_collated_bam then "\"" + prefix + ".collated.bam\"" else "" )} \ @@ -900,32 +926,26 @@ task bam_to_fastq { -F "~{bitwise_filter.exclude_if_any}" \ --rf "~{bitwise_filter.include_if_any}" \ -G "~{bitwise_filter.exclude_if_all}" \ - ~{( - if append_read_number + ~{(if append_read_number then "-N" else "-n" )} \ - -1 ~{( - if paired_end + -1 ~{(if paired_end then "\"" + prefix + ".R1.fastq.gz\"" else "\"" + prefix + ".fastq.gz\"" )} \ - -2 ~{( - if paired_end + -2 ~{(if paired_end then "\"" + prefix + ".R2.fastq.gz\"" else "\"" + prefix + ".fastq.gz\"" )} \ - ~{( - if paired_end - then ( - if output_singletons + ~{(if paired_end + then (if output_singletons then "-s \"" + prefix + ".singleton.fastq.gz\"" else "-s junk.singleton.fastq.gz" ) else "" )} \ - -0 ~{( - if paired_end + -0 ~{(if paired_end then "junk.unknown_bit_setting.fastq.gz" else "\"" + prefix + ".fastq.gz\"" )} \ @@ -971,7 +991,7 @@ task fixmate { description: "Runs `samtools fixmate` on the input BAM file. This fills in mate coordinates and insert size fields among other tags and fields." warning: "This task assumes a name-sorted or name-collated input BAM. If you have a position-sorted BAM, please use the `position_sorted_fixmate` task." outputs: { - fixmate_bam: "The BAM resulting from running `samtools fixmate` on the input BAM" + fixmate_bam: "The BAM resulting from running `samtools fixmate` on the input BAM", } } @@ -1042,11 +1062,26 @@ task fixmate { samtools fixmate \ --threads "$n_cores" \ - ~{if remove_unaligned_and_secondary then "-r" else ""} \ - ~{if disable_proper_pair_check then "-p" else ""} \ - ~{if add_cigar then "-c" else ""} \ - ~{if add_mate_score then "-m" else ""} \ - ~{if disable_flag_sanitization then "-z off" else ""} \ + ~{if remove_unaligned_and_secondary + then "-r" + else "" + } \ + ~{if disable_proper_pair_check + then "-p" + else "" + } \ + ~{if add_cigar + then "-c" + else "" + } \ + ~{if add_mate_score + then "-m" + else "" + } \ + ~{if disable_flag_sanitization + then "-z off" + else "" + } \ "~{bam}" \ "~{prefix}~{extension}" >>> @@ -1070,7 +1105,7 @@ task position_sorted_fixmate { warning: "If you already have a collated BAM, please use the `fixmate` task." help: "`fixmate` fills in mate coordinates and insert size fields among other tags and fields. This task collates the input BAM, runs `fixmate`, and then resorts the output into a position-sorted BAM." outputs: { - fixmate_bam: "BAM file with mate information added" + fixmate_bam: "BAM file with mate information added", } } @@ -1137,18 +1172,36 @@ task position_sorted_fixmate { samtools collate \ --threads "$n_cores" \ - ~{if fast_mode then "-f" else ""} \ + ~{if fast_mode + then "-f" + else "" + } \ -u \ -O \ "~{bam}" \ | samtools fixmate \ --threads "$n_cores" \ -u \ - ~{if remove_unaligned_and_secondary then "-r" else ""} \ - ~{if disable_proper_pair_check then "-p" else ""} \ - ~{if add_cigar then "-c" else ""} \ - ~{if add_mate_score then "-m" else ""} \ - ~{if disable_flag_sanitization then "-z off" else ""} \ + ~{if remove_unaligned_and_secondary + then "-r" + else "" + } \ + ~{if disable_proper_pair_check + then "-p" + else "" + } \ + ~{if add_cigar + then "-c" + else "" + } \ + ~{if add_mate_score + then "-m" + else "" + } \ + ~{if disable_flag_sanitization + then "-z off" + else "" + } \ - \ - \ | samtools sort \ @@ -1279,25 +1332,54 @@ task markdup { samtools markdup \ --threads "$n_cores" \ - -f "~{prefix + if json then ".json" else ".txt"}" \ + -f "~{prefix + if json + then ".json" + else ".txt" + }" \ --read-coords '~{read_coords_regex}' \ --coords-order "~{coordinates_order}" \ - ~{if remove_duplicates then "-r" else ""} \ - ~{if mark_supp_or_sec_or_unmapped_as_duplicates then "-S" else ""} \ - ~{if mark_duplicates_with_do_tag then "-t" else ""} \ - ~{if duplicate_count then "--duplicate-count" else ""} \ - ~{if include_qc_fails then "--include-fails" else ""} \ - ~{if duplicates_of_duplicates_check then "" else "--no-multi-dup"} \ - ~{if use_read_groups then "--use-read-groups" else ""} \ + ~{if remove_duplicates + then "-r" + else "" + } \ + ~{if mark_supp_or_sec_or_unmapped_as_duplicates + then "-S" + else "" + } \ + ~{if mark_duplicates_with_do_tag + then "-t" + else "" + } \ + ~{if duplicate_count + then "--duplicate-count" + else "" + } \ + ~{if include_qc_fails + then "--include-fails" + else "" + } \ + ~{if duplicates_of_duplicates_check + then "" + else "--no-multi-dup" + } \ + ~{if use_read_groups + then "--use-read-groups" + else "" + } \ -l ~{max_readlen} \ -d ~{optical_distance} \ -c \ "~{bam}" \ - "~{if create_bam then prefix + ".bam" else "/dev/null"}" + "~{if create_bam + then prefix + ".bam" + else "/dev/null" + }" >>> output { - File markdup_report = prefix + if json then ".json" else ".txt" + File markdup_report = prefix + if json + then ".json" + else ".txt" File? markdup_bam = prefix + ".bam" } @@ -1314,7 +1396,7 @@ task faidx { meta { description: "Creates a `.fai` FASTA index for the input FASTA" outputs: { - fasta_index: "A `.fai` FASTA index associated with the input FASTA. Filename will be `basename(fasta) + '.fai'`." + fasta_index: "A `.fai` FASTA index associated with the input FASTA. Filename will be `basename(fasta) + '.fai'`.", } } diff --git a/tools/star.wdl b/tools/star.wdl index 47fd141e7..8b77ced1b 100755 --- a/tools/star.wdl +++ b/tools/star.wdl @@ -1,12 +1,11 @@ ## [Homepage](https://github.com/alexdobin/STAR) - version 1.1 task build_star_db { meta { description: "Runs STAR's build command to generate a STAR format reference for alignment" outputs: { - star_db: "A gzipped TAR file containing the STAR reference files. Suitable as the `star_db_tar_gz` input to the `alignment` task." + star_db: "A gzipped TAR file containing the STAR reference files. Suitable as the `star_db_tar_gz` input to the `alignment` task.", } } @@ -86,8 +85,7 @@ task build_star_db { Float reference_fasta_size = size(reference_fasta, "GiB") Float gtf_size = size(gtf, "GiB") - Int disk_size_gb = ( - ceil((reference_fasta_size + gtf_size) * 3) + 10 + modify_disk_size_gb + Int disk_size_gb = (ceil((reference_fasta_size + gtf_size) * 3) + 10 + modify_disk_size_gb ) # Leave 2GB as system overhead @@ -558,7 +556,11 @@ task alignment { Array[File] read_one_fastqs_gz Array[String] read_groups Array[File]? read_two_fastqs_gz - Array[Int] out_sj_filter_intron_max_vs_read_n = [50000, 100000, 200000] + Array[Int] out_sj_filter_intron_max_vs_read_n = [ + 50000, + 100000, + 200000, + ] SpliceJunctionMotifs out_sj_filter_overhang_min = SpliceJunctionMotifs { noncanonical_motifs: 30, GT_AG_and_CT_AC_motif: 12, @@ -595,9 +597,7 @@ task alignment { Pair[Int, Int] clip_3p_n_bases = (0, 0) Pair[Int, Int] clip_3p_after_adapter_n_bases = (0, 0) Pair[Int, Int] clip_5p_n_bases = (0, 0) - String prefix = sub( - basename(read_one_fastqs_gz[0]), - "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$", + String prefix = sub(basename(read_one_fastqs_gz[0]), "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$", "" # Once replacing with capturing groups is supported, replace with group 3 ) String read_name_separator = "/" @@ -699,16 +699,16 @@ task alignment { Int modify_disk_size_gb = 0 } - Array[File] read_twos = select_first([read_two_fastqs_gz, []]) + Array[File] read_twos = select_first([ + read_two_fastqs_gz, + [], + ]) Float read_one_fastqs_size = size(read_one_fastqs_gz, "GiB") Float read_two_fastqs_size = size(read_twos, "GiB") Float star_db_tar_gz_size = size(star_db_tar_gz, "GiB") - Int disk_size_gb = ( - ( - ceil(read_one_fastqs_size + read_two_fastqs_size + star_db_tar_gz_size) * 3 - ) + 10 + modify_disk_size_gb - ) + Int disk_size_gb = ((ceil(read_one_fastqs_size + read_two_fastqs_size + star_db_tar_gz_size + ) * 3) + 10 + modify_disk_size_gb) command <<< set -euo pipefail @@ -733,9 +733,9 @@ task alignment { --outFileNamePrefix "~{prefix + "."}" \ --twopassMode "~{twopass_mode}" \ --outSAMattrRGline ~{sep(" , ", read_groups)} \ - --outSJfilterIntronMaxVsReadN ~{ - sep(" ", quote(out_sj_filter_intron_max_vs_read_n)) - } \ + --outSJfilterIntronMaxVsReadN ~{sep(" ", quote( + out_sj_filter_intron_max_vs_read_n + ))} \ --outSJfilterOverhangMin ~{sep(" ", quote([ out_sj_filter_overhang_min.noncanonical_motifs, out_sj_filter_overhang_min.GT_AG_and_CT_AC_motif, @@ -766,33 +766,31 @@ task alignment { align_sj_stitch_mismatch_n_max.GC_AG_and_CT_GC_motif, align_sj_stitch_mismatch_n_max.AT_AC_and_GT_AT_motif, ]))} \ - --clip3pAdapterSeq "~{clip_3p_adapter_seq.left}" ~{( - if (length(read_twos) != 0) + --clip3pAdapterSeq "~{clip_3p_adapter_seq.left}" ~{(if (length(read_twos) != 0 + ) then "'" + clip_3p_adapter_seq.right + "'" else "" )} \ - --clip3pAdapterMMp ~{clip_3p_adapter_mmp.left} ~{( - if (length(read_twos) != 0) + --clip3pAdapterMMp ~{clip_3p_adapter_mmp.left} ~{(if (length(read_twos) != 0) then clip_3p_adapter_mmp.right else None )} \ - --alignEndsProtrude ~{align_ends_protrude.left} "~{( - if (length(read_twos) != 0) + --alignEndsProtrude ~{align_ends_protrude.left} "~{(if (length(read_twos) != 0 + ) then align_ends_protrude.right else None )}" \ - --clip3pNbases ~{clip_3p_n_bases.left} ~{( - if (length(read_twos) != 0) + --clip3pNbases ~{clip_3p_n_bases.left} ~{(if (length(read_twos) != 0) then clip_3p_n_bases.right else None )} \ - --clip3pAfterAdapterNbases ~{clip_3p_after_adapter_n_bases.left} ~{( - if (length(read_twos) != 0) + --clip3pAfterAdapterNbases ~{clip_3p_after_adapter_n_bases.left} ~{(if (length( + read_twos + ) != 0) then clip_3p_after_adapter_n_bases.right else None )} \ - --clip5pNbases ~{clip_5p_n_bases.left} ~{( - if (length(read_twos) != 0) + --clip5pNbases ~{clip_5p_n_bases.left} ~{(if (length(read_twos) != 0) then clip_5p_n_bases.right else None )} \ @@ -803,11 +801,11 @@ task alignment { --outSAMunmapped "~{out_sam_unmapped}" \ --outSAMorder "~{out_sam_order}" \ --outSAMreadID "~{out_sam_read_id}" \ - --outSAMtlen ~{( - if (out_sam_tlen == "left_plus") + --outSAMtlen ~{(if (out_sam_tlen == "left_plus") then "1" - else ( - if (out_sam_tlen == "left_any") then "2" else "error" + else (if (out_sam_tlen == "left_any") + then "2" + else "error" ) )} \ --outFilterType "~{out_filter_type}" \ diff --git a/tools/util.wdl b/tools/util.wdl index 3bb4c5963..be141df2c 100644 --- a/tools/util.wdl +++ b/tools/util.wdl @@ -1,12 +1,11 @@ ## # Utilities - version 1.1 task download { meta { description: "Uses wget to download a file from a remote URL to the local filesystem" outputs: { - downloaded_file: "File downloaded from provided URL" + downloaded_file: "File downloaded from provided URL", } } @@ -53,7 +52,7 @@ task split_string { description: "Split a string into an array of strings based on a delimiter" warning: "This implementation will result in a runtime error if the provided string has any embedded single quotes (`'`)!" outputs: { - split_strings: "Split string as an array" + split_strings: "Split string as an array", } } @@ -90,7 +89,7 @@ task calc_feature_lengths { description: "Calculate feature lengths from a GTF file using the non-overlapping exonic length algorithm" help: "The non-overlapping exonic length algorithm can be implemented as the sum of each base covered by at least one exon; where each base is given a value of 1 regardless of how many exons overlap it." outputs: { - feature_lengths: "A two column headered TSV file with feature names in the first column and feature lengths (as integers) in the second column" + feature_lengths: "A two column headered TSV file with feature names in the first column and feature lengths (as integers) in the second column", } } @@ -166,7 +165,7 @@ task add_to_bam_header { meta { description: "Adds another line of text to the bottom of a BAM header" outputs: { - reheadered_bam: "The BAM after its header has been modified" + reheadered_bam: "The BAM after its header has been modified", } } @@ -215,7 +214,7 @@ task unpack_tarball { meta { description: "Accepts a `.tar.gz` archive and converts it into a flat array of files. Any directory structure of the archive is ignored." outputs: { - tarball_contents: "An array of files found in the input tarball" + tarball_contents: "An array of files found in the input tarball", } } @@ -314,7 +313,7 @@ task global_phred_scores { meta { description: "Calculates statistics about PHRED scores of the input BAM" outputs: { - phred_scores: "Headered TSV file containing PHRED score statistics" + phred_scores: "Headered TSV file containing PHRED score statistics", } } @@ -339,7 +338,10 @@ task global_phred_scores { command <<< python3 /scripts/util/calc_global_phred_scores.py \ - ~{if fast_mode then "--fast_mode" else ""} \ + ~{if fast_mode + then "--fast_mode" + else "" + } \ "~{bam}" \ "~{prefix}" >>> @@ -384,13 +386,15 @@ task check_fastq_and_rg_concordance { Array[String]? read_two_names } - Array[String] read_twos = select_first([read_two_names, []]) + Array[String] read_twos = select_first([ + read_two_names, + [], + ]) command <<< python3 /scripts/util/check_FQs_and_RGs.py \ --read-one-fastqs "~{sep(",", read_one_names)}" \ - ~{( - if length(read_twos) > 0 + ~{(if length(read_twos) > 0 then "--read-two-fastqs \"" + sep(",", squote(read_twos)) + "\"" else "" )} \ @@ -407,7 +411,7 @@ task split_fastq { meta { description: "Splits a FASTQ into multiple files based on the number of reads per file" outputs: { - fastqs: "Array of FASTQ files, each containing a subset of the input FASTQ" + fastqs: "Array of FASTQ files, each containing a subset of the input FASTQ", } } @@ -427,11 +431,7 @@ task split_fastq { input { File fastq - String prefix = sub( - basename(fastq), - "(fastq|fq)\\.gz$", - "" - ) + String prefix = sub(basename(fastq), "(fastq|fq)\\.gz$", "") Int reads_per_file = 10000000 Int modify_disk_size_gb = 0 Int ncpu = 2 diff --git a/workflows/chipseq/chipseq-standard.wdl b/workflows/chipseq/chipseq-standard.wdl index d9f9290df..aa30759fe 100755 --- a/workflows/chipseq/chipseq-standard.wdl +++ b/workflows/chipseq/chipseq-standard.wdl @@ -9,11 +9,14 @@ import "../../tools/samtools.wdl" import "../../tools/util.wdl" import "../general/bam-to-fastqs.wdl" as b2fq #@ except: LineWidth -import "https://raw.githubusercontent.com/stjude/seaseq/2.3/workflows/workflows/mapping.wdl" as seaseq_map +import "https://raw.githubusercontent.com/stjude/seaseq/2.3/workflows/workflows/mapping.wdl" + as seaseq_map #@ except: LineWidth -import "https://raw.githubusercontent.com/stjude/seaseq/3.0/workflows/tasks/samtools.wdl" as seaseq_samtools +import "https://raw.githubusercontent.com/stjude/seaseq/3.0/workflows/tasks/samtools.wdl" + as seaseq_samtools #@ except: LineWidth -import "https://raw.githubusercontent.com/stjude/seaseq/3.0/workflows/tasks/seaseq_util.wdl" as seaseq_util +import "https://raw.githubusercontent.com/stjude/seaseq/3.0/workflows/tasks/seaseq_util.wdl" + as seaseq_util workflow chipseq_standard_experimental { meta { @@ -67,7 +70,10 @@ workflow chipseq_standard_experimental { use_all_cores, } } - File selected_bam = select_first([subsample.sampled_bam, bam]) + File selected_bam = select_first([ + subsample.sampled_bam, + bam, + ]) call read_group.get_read_groups after validate_input_bam { input: bam = selected_bam, @@ -79,7 +85,7 @@ workflow chipseq_standard_experimental { use_all_cores, } - scatter (pair in zip(bam_to_fastqs.read1s, get_read_groups.read_groups)){ + scatter (pair in zip(bam_to_fastqs.read1s, get_read_groups.read_groups)) { if (enable_read_trimming) { call fp.fastp as trim { input: read_one_fastq = pair.left, @@ -93,10 +99,13 @@ workflow chipseq_standard_experimental { } } - File chosen_fastq = select_first([trim.single_end_reads_fastq_gz, pair.left]) + File chosen_fastq = select_first([ + trim.single_end_reads_fastq_gz, + pair.left, + ]) call seaseq_util.basicfastqstats as basic_stats { input: - fastqfile = chosen_fastq + fastqfile = chosen_fastq, } call seaseq_map.mapping as bowtie_single_end_mapping { input: fastqfile = chosen_fastq, @@ -104,13 +113,11 @@ workflow chipseq_standard_experimental { metricsfile = basic_stats.metrics_out, blacklist = excludelist, } - File chosen_bam = select_first( - [ - bowtie_single_end_mapping.bklist_bam, - bowtie_single_end_mapping.mkdup_bam, - bowtie_single_end_mapping.sorted_bam, - ] - ) + File chosen_bam = select_first([ + bowtie_single_end_mapping.bklist_bam, + bowtie_single_end_mapping.mkdup_bam, + bowtie_single_end_mapping.sorted_bam, + ]) call read_group.read_group_to_string { input: read_group = pair.right, @@ -127,7 +134,7 @@ workflow chipseq_standard_experimental { } Array[File] aligned_bams = addreplacerg.tagged_bam - scatter(aligned_bam in aligned_bams){ + scatter (aligned_bam in aligned_bams) { call picard.clean_sam as picard_clean { input: bam = aligned_bam, } @@ -147,7 +154,9 @@ workflow chipseq_standard_experimental { use_all_cores, } #@ except: UnusedCall - call picard.validate_bam { input: bam = markdup.mkdupbam } + call picard.validate_bam { input: + bam = markdup.mkdupbam, + } call md5sum.compute_checksum { input: file = markdup.mkdupbam, @@ -164,9 +173,13 @@ workflow chipseq_standard_experimental { File bam_checksum = compute_checksum.md5sum File bam_index = samtools_index.bam_index File bigwig = deeptools_bam_coverage.bigwig - Array[File] fastp_reports = select_all(flatten([fastp.report, trim.report])) - Array[File] fastp_jsons = select_all(flatten( - [fastp.report_json, trim.report_json] - )) + Array[File] fastp_reports = select_all(flatten([ + fastp.report, + trim.report, + ])) + Array[File] fastp_jsons = select_all(flatten([ + fastp.report_json, + trim.report_json, + ])) } } diff --git a/workflows/dnaseq/dnaseq-core.wdl b/workflows/dnaseq/dnaseq-core.wdl index 62b027db2..21a1d3990 100644 --- a/workflows/dnaseq/dnaseq-core.wdl +++ b/workflows/dnaseq/dnaseq-core.wdl @@ -1,5 +1,4 @@ ## **WARNING:** this workflow is experimental! Use at your own risk! - version 1.1 import "../../tools/bwa.wdl" @@ -69,10 +68,7 @@ workflow dnaseq_core_experimental { read_groups, } - scatter (tuple in zip( - zip(read_one_fastqs_gz, read_two_fastqs_gz), - read_groups - )) { + scatter (tuple in zip(zip(read_one_fastqs_gz, read_two_fastqs_gz), read_groups)) { if (enable_read_trimming) { call fp.fastp as trim after validate { input: read_one_fastq = tuple.left.left, @@ -87,8 +83,14 @@ workflow dnaseq_core_experimental { output_fastq = enable_read_trimming, } } - File chosen_r1_fastq = select_first([trim.read_one_fastq_gz, tuple.left.left]) - File chosen_r2_fastq = select_first([trim.read_two_fastq_gz, tuple.left.right]) + File chosen_r1_fastq = select_first([ + trim.read_one_fastq_gz, + tuple.left.left, + ]) + File chosen_r2_fastq = select_first([ + trim.read_two_fastq_gz, + tuple.left.right, + ]) call util.split_fastq as read_ones after validate { input: fastq = chosen_r1_fastq, @@ -105,11 +107,8 @@ workflow dnaseq_core_experimental { read_one_fastq_gz = t.left, read_two_fastq_gz = t.right, bwa_db_tar_gz = bwa_db, - prefix = sub(sub( - basename(t.left), - "(\\.subsampled)?\\.(fastq|fq)(\\.gz)?$", - "" - ), "\\.([rR][12])\\.", "."), + prefix = sub(sub(basename(t.left), "(\\.subsampled)?\\.(fastq|fq)(\\.gz)?$", + ""), "\\.([rR][12])\\.", "."), read_group = tuple.right, use_all_cores, } @@ -119,17 +118,17 @@ workflow dnaseq_core_experimental { read_one_fastq_gz = t.left, read_two_fastq_gz = t.right, bwa_db_tar_gz = bwa_db, - prefix = sub(sub( - basename(t.left), - "(\\.subsampled)?\\.(fastq|fq)(\\.gz)?$", - "" - ), "\\.([rR][12])\\.", "."), + prefix = sub(sub(basename(t.left), "(\\.subsampled)?\\.(fastq|fq)(\\.gz)?$", + ""), "\\.([rR][12])\\.", "."), read_group = tuple.right, use_all_cores, } } call picard.sort as sort { input: - bam = select_first([bwa_mem.bam, bwa_aln_pe.bam]) + bam = select_first([ + bwa_mem.bam, + bwa_aln_pe.bam, + ]), } } } @@ -146,9 +145,13 @@ workflow dnaseq_core_experimental { output { File harmonized_bam = merge.merged_bam File harmonized_bam_index = index.bam_index - Array[File] fastp_reports = select_all(flatten([fastp.report, trim.report])) - Array[File] fastp_jsons = select_all(flatten( - [fastp.report_json, trim.report_json] - )) + Array[File] fastp_reports = select_all(flatten([ + fastp.report, + trim.report, + ])) + Array[File] fastp_jsons = select_all(flatten([ + fastp.report_json, + trim.report_json, + ])) } } diff --git a/workflows/dnaseq/dnaseq-standard-fastq.wdl b/workflows/dnaseq/dnaseq-standard-fastq.wdl index c0542c19d..fdf48606b 100644 --- a/workflows/dnaseq/dnaseq-standard-fastq.wdl +++ b/workflows/dnaseq/dnaseq-standard-fastq.wdl @@ -1,5 +1,4 @@ ## **WARNING:** this workflow is experimental! Use at your own risk! - version 1.1 import "../../data_structures/read_group.wdl" @@ -54,9 +53,7 @@ workflow dnaseq_standard_fastq_experimental { Array[File] read_one_fastqs_gz Array[File] read_two_fastqs_gz Array[ReadGroup] read_groups - String prefix = sub( - basename(read_one_fastqs_gz[0]), - "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$", + String prefix = sub(basename(read_one_fastqs_gz[0]), "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$", "" # Once replacing with capturing groups is supported, replace with group 3 ) String aligner = "mem" @@ -101,12 +98,10 @@ workflow dnaseq_standard_fastq_experimental { subsample.subsampled_read1, read_one_fastqs_gz, ]) - Array[File] selected_read_two_fastqs = select_all( - select_first([ - subsample.subsampled_read2, - read_two_fastqs_gz, - ]) - ) + Array[File] selected_read_two_fastqs = select_all(select_first([ + subsample.subsampled_read2, + read_two_fastqs_gz, + ])) call dnaseq_core_wf.dnaseq_core_experimental after fqlint { input: read_one_fastqs_gz = selected_read_one_fastqs, diff --git a/workflows/dnaseq/dnaseq-standard.wdl b/workflows/dnaseq/dnaseq-standard.wdl index 7b2e86154..194c18cea 100644 --- a/workflows/dnaseq/dnaseq-standard.wdl +++ b/workflows/dnaseq/dnaseq-standard.wdl @@ -1,5 +1,4 @@ ## **WARNING:** this workflow is experimental! Use at your own risk! - version 1.1 import "../../data_structures/read_group.wdl" @@ -55,7 +54,7 @@ workflow dnaseq_standard_experimental { } call parse_input { input: - aligner + aligner, } if (validate_input) { @@ -71,7 +70,10 @@ workflow dnaseq_standard_experimental { use_all_cores, } } - File selected_bam = select_first([subsample.sampled_bam, bam]) + File selected_bam = select_first([ + subsample.sampled_bam, + bam, + ]) call read_group.get_read_groups after parse_input { input: bam = selected_bam, @@ -95,7 +97,10 @@ workflow dnaseq_standard_experimental { SM: sample_override, } } - ReadGroup selected_rg = select_first([overriden_rg, rg]) + ReadGroup selected_rg = select_first([ + overriden_rg, + rg, + ]) call read_group.read_group_to_string { input: read_group = selected_rg, format_as_sam_record = true, diff --git a/workflows/general/alignment-post.wdl b/workflows/general/alignment-post.wdl index 53c18d64a..9caa5344c 100644 --- a/workflows/general/alignment-post.wdl +++ b/workflows/general/alignment-post.wdl @@ -4,7 +4,8 @@ import "../../tools/md5sum.wdl" import "../../tools/picard.wdl" import "../../tools/samtools.wdl" #@ except: LineWidth -import "https://raw.githubusercontent.com/stjude/XenoCP/4.0.0-alpha/wdl/workflows/xenocp.wdl" as xenocp_wf +import "https://raw.githubusercontent.com/stjude/XenoCP/4.0.0-alpha/wdl/workflows/xenocp.wdl" + as xenocp_wf workflow alignment_post { meta { @@ -12,7 +13,7 @@ workflow alignment_post { outputs: { processed_bam: "Input BAM after being transformed by standard processing", bam_index: "BAI index associated with `processed_bam`", - bam_checksum: "STDOUT of the `md5sum` command run on the input BAM that has been redirected to a file", + bam_checksum: "STDOUT of the `md5sum` command run on the input BAM that has been redirected to a file", validate_report: "Validation report produced by `picard ValidateSamFile`. Validation warnings and errors are logged.", } allowNestedInputs: true @@ -46,7 +47,9 @@ workflow alignment_post { Boolean use_all_cores = false } - call picard.sort as picard_sort { input: bam } + call picard.sort as picard_sort { input: + bam, + } if (cleanse_xenograft) { call samtools.index as pre_xenocp_index { input: @@ -57,14 +60,23 @@ workflow alignment_post { call xenocp_wf.xenocp { input: input_bam = picard_sort.sorted_bam, input_bai = pre_xenocp_index.bam_index, - reference_tar_gz = select_first([contaminant_db, ""]), - aligner = select_first([xenocp_aligner, "undefined"]), + reference_tar_gz = select_first([ + contaminant_db, + "", + ]), + aligner = select_first([ + xenocp_aligner, + "undefined", + ]), skip_duplicate_marking = true, } } if (mark_duplicates) { call picard.mark_duplicates as picard_markdup { input: - bam = select_first([xenocp.bam, picard_sort.sorted_bam]), + bam = select_first([ + xenocp.bam, + picard_sort.sorted_bam, + ]), } } @@ -79,9 +91,13 @@ workflow alignment_post { use_all_cores, } File aligned_bam_index = samtools_index.bam_index - call picard.validate_bam { input: bam = aligned_bam } + call picard.validate_bam { input: + bam = aligned_bam, + } - call md5sum.compute_checksum { input: file = aligned_bam } + call md5sum.compute_checksum { input: + file = aligned_bam, + } output { File processed_bam = aligned_bam diff --git a/workflows/general/bam-to-fastqs.wdl b/workflows/general/bam-to-fastqs.wdl index 409ac0133..573db7c7d 100644 --- a/workflows/general/bam-to-fastqs.wdl +++ b/workflows/general/bam-to-fastqs.wdl @@ -27,7 +27,9 @@ workflow bam_to_fastqs { Boolean use_all_cores = false } - call samtools.quickcheck { input: bam } + call samtools.quickcheck { input: + bam, + } call samtools.split after quickcheck { input: bam, @@ -42,11 +44,13 @@ workflow bam_to_fastqs { } if (paired_end) { - scatter (reads in - zip(bam_to_fastq.read_one_fastq_gz, bam_to_fastq.read_two_fastq_gz) - ) { + scatter (reads in zip(bam_to_fastq.read_one_fastq_gz, bam_to_fastq.read_two_fastq_gz + )) { call fq.fqlint { input: - read_one_fastq = select_first([reads.left, "undefined"]), + read_one_fastq = select_first([ + reads.left, + "undefined", + ]), read_two_fastq = reads.right, } } @@ -54,14 +58,16 @@ workflow bam_to_fastqs { if (!paired_end) { scatter (fq in bam_to_fastq.single_end_reads_fastq_gz) { call fq.fqlint as se_fqlint { input: - read_one_fastq = select_first([fq, "undefined"]), + read_one_fastq = select_first([ + fq, + "undefined", + ]), } } } output { - Array[File] read1s = ( - if paired_end + Array[File] read1s = (if paired_end then select_all(bam_to_fastq.read_one_fastq_gz) else select_all(bam_to_fastq.single_end_reads_fastq_gz) ) diff --git a/workflows/general/samtools-merge.wdl b/workflows/general/samtools-merge.wdl index a34585c99..fe6825278 100644 --- a/workflows/general/samtools-merge.wdl +++ b/workflows/general/samtools-merge.wdl @@ -1,21 +1,20 @@ ## **WARNING:** this workflow is experimental! Use at your own risk! - version 1.1 import "../../tools/samtools.wdl" workflow samtools_merge { - meta{ + meta { name: "Merge BAMs" description: "Runs `samtools merge`, with optional iteration to avoid maximum command line argument length" category: "Utility" outputs: { - merged_bam: "The BAM resulting from merging all the input BAMs" + merged_bam: "The BAM resulting from merging all the input BAMs", } allowNestedInputs: true } - parameter_meta{ + parameter_meta { bams: "BAMs to merge into a final BAM" prefix: "Prefix for output BAM." use_all_cores: "Use all cores? Recommended for cloud environments." @@ -31,22 +30,21 @@ workflow samtools_merge { Int bam_length = length(bams) - if (bam_length > max_length){ + if (bam_length > max_length) { # Find the number of merges required - scatter (merge_num in range((bam_length / max_length) + 1)){ + scatter (merge_num in range((bam_length / max_length) + 1)) { # Get the sublist of bams - scatter (bam_num in range(max_length)){ - Int num = ( - if merge_num > 0 + scatter (bam_num in range(max_length)) { + Int num = (if merge_num > 0 then bam_num + (merge_num * max_length) else bam_num ) - if (num < bam_length){ + if (num < bam_length) { File bam_list = bams[num] } } } - scatter (list in bam_list){ + scatter (list in bam_list) { call samtools.merge as inner_merge { input: bams = select_all(list), prefix, @@ -65,7 +63,7 @@ workflow samtools_merge { } } - if (bam_length < max_length){ + if (bam_length < max_length) { call samtools.merge as basic_merge { input: bams, prefix, @@ -76,6 +74,9 @@ workflow samtools_merge { } output { - File merged_bam = select_first([final_merge.merged_bam, basic_merge.merged_bam]) + File merged_bam = select_first([ + final_merge.merged_bam, + basic_merge.merged_bam, + ]) } } diff --git a/workflows/methylation/methylation-cohort.wdl b/workflows/methylation/methylation-cohort.wdl index 4632d5402..03a06cf6b 100644 --- a/workflows/methylation/methylation-cohort.wdl +++ b/workflows/methylation/methylation-cohort.wdl @@ -33,21 +33,20 @@ workflow methylation_cohort { Int beta_length = length(unfiltered_normalized_beta) Int pval_length = length(p_values) - if (beta_length > max_length){ - scatter (merge_num in range((beta_length / max_length) + 1)){ + if (beta_length > max_length) { + scatter (merge_num in range((beta_length / max_length) + 1)) { # Get the sublist of beta files - scatter (beta_num in range(max_length)){ - Int num = ( - if merge_num > 0 + scatter (beta_num in range(max_length)) { + Int num = (if merge_num > 0 then beta_num + (merge_num * max_length) else beta_num ) - if (num < beta_length){ + if (num < beta_length) { File bam_list = unfiltered_normalized_beta[num] } } } - scatter (iter_index in range(length(bam_list))){ + scatter (iter_index in range(length(bam_list))) { call combine_data as inner_merge { input: files_to_combine = select_all(bam_list[iter_index]), combined_file_name = "~{iter_index}.combined.csv", @@ -59,22 +58,21 @@ workflow methylation_cohort { combined_file_name = "combined_beta.csv", } - if (pval_length > 0 && !skip_pvalue_check){ + if (pval_length > 0 && !skip_pvalue_check) { # If p-values are provided, merge those as well - scatter (merge_num in range((pval_length / max_length) + 1)){ + scatter (merge_num in range((pval_length / max_length) + 1)) { # Get the sublist of p-value files - scatter (pval_num in range(max_length)){ - Int num_p = ( - if merge_num > 0 + scatter (pval_num in range(max_length)) { + Int num_p = (if merge_num > 0 then pval_num + (merge_num * max_length) else pval_num ) - if (num_p < pval_length){ + if (num_p < pval_length) { File pval_list = p_values[num_p] } } } - scatter (iter_index in range(length(pval_list))){ + scatter (iter_index in range(length(pval_list))) { call combine_data as inner_merge_pvals { input: files_to_combine = select_all(pval_list[iter_index]), combined_file_name = "~{iter_index}.pvals.combined.csv", @@ -88,12 +86,12 @@ workflow methylation_cohort { } } - if (beta_length <= max_length){ + if (beta_length <= max_length) { call combine_data as simple_merge { input: files_to_combine = unfiltered_normalized_beta, combined_file_name = "combined_beta.csv", } - if (pval_length > 0 && !skip_pvalue_check){ + if (pval_length > 0 && !skip_pvalue_check) { call combine_data as simple_merge_pval { input: files_to_combine = p_values, combined_file_name = "combined_pvals.csv", @@ -101,23 +99,19 @@ workflow methylation_cohort { } } - File? pval_file = ( - if (pval_length > 0 && !skip_pvalue_check) - then select_first( - [ - final_merge_pvals.combined_file, - simple_merge_pval.combined_file, - ]) + File? pval_file = (if (pval_length > 0 && !skip_pvalue_check) + then select_first([ + final_merge_pvals.combined_file, + simple_merge_pval.combined_file, + ]) else None ) call filter_probes { input: - beta_values = select_first( - [ - final_merge.combined_file, - simple_merge.combined_file, - ] - ), + beta_values = select_first([ + final_merge.combined_file, + simple_merge.combined_file, + ]), p_values = pval_file, num_probes, } @@ -131,12 +125,10 @@ workflow methylation_cohort { } output { - File combined_beta = select_first( - [ - final_merge.combined_file, - simple_merge.combined_file, - ] - ) + File combined_beta = select_first([ + final_merge.combined_file, + simple_merge.combined_file, + ]) File filtered_beta = filter_probes.filtered_beta_values File filtered_probeset = filter_probes.filtered_probes File umap_embedding = generate_umap.umap @@ -149,7 +141,7 @@ task combine_data { meta { description: "Combine data from multiple CSV files by column" outputs: { - combined_file: "Combined CSV file" + combined_file: "Combined CSV file", } } @@ -173,16 +165,19 @@ task combine_data { Int modify_memory_gb = 0 } - Int memory_gb = ceil(size(files_to_combine, "GiB") * - if simple_merge then 2 else 1) - + modify_memory_gb - + 2 + Int memory_gb = ceil(size(files_to_combine, "GiB") * if simple_merge + then 2 + else 1 + ) + modify_memory_gb + 2 Int disk_size_gb = ceil(size(files_to_combine, "GiB") * 2) + 2 command <<< python /scripts/methylation/combine.py \ --output-name "~{combined_file_name}" \ - ~{if simple_merge then "--simple-merge" else ""} \ + ~{if simple_merge + then "--simple-merge" + else "" + } \ ~{sep(" ", quote(files_to_combine))} >>> @@ -258,7 +253,7 @@ task generate_umap { meta { description: "Generate UMAP embedding" outputs: { - umap: "UMAP embedding for all samples" + umap: "UMAP embedding for all samples", } } @@ -297,7 +292,7 @@ task plot_umap { meta { description: "Plot UMAP embedding" outputs: { - umap_plot: "UMAP plot for all samples" + umap_plot: "UMAP plot for all samples", } } diff --git a/workflows/methylation/methylation-preprocess.wdl b/workflows/methylation/methylation-preprocess.wdl index 95132cfc8..f85ed8613 100644 --- a/workflows/methylation/methylation-preprocess.wdl +++ b/workflows/methylation/methylation-preprocess.wdl @@ -48,10 +48,8 @@ task process_raw_idats { >>> output { - File beta_swan_norm_unfiltered - = out_base + ".beta_swan_norm_unfiltered.csv" - File beta_swan_norm_unfiltered_genomic - = out_base + ".beta_swan_norm_unfiltered.genomic.csv" + File beta_swan_norm_unfiltered = out_base + ".beta_swan_norm_unfiltered.csv" + File beta_swan_norm_unfiltered_genomic = out_base + ".beta_swan_norm_unfiltered.genomic.csv" File annotation = out_base + ".annotation.csv" File beta_unnorm = out_base + ".beta.csv" File cn_values = out_base + ".cn_values.csv" diff --git a/workflows/methylation/methylation-standard.wdl b/workflows/methylation/methylation-standard.wdl index 618d41efb..23be2d06b 100644 --- a/workflows/methylation/methylation-standard.wdl +++ b/workflows/methylation/methylation-standard.wdl @@ -32,19 +32,17 @@ workflow methylation { scatter (pair in zip(green_idats, red_idats)) { call preprocess.process_raw_idats { input: - idats = pair + idats = pair, } } call cohort.methylation_cohort { input: - unfiltered_normalized_beta = - process_raw_idats.beta_swan_norm_unfiltered_genomic, + unfiltered_normalized_beta = process_raw_idats.beta_swan_norm_unfiltered_genomic, p_values = process_raw_idats.probe_pvalues, } output { - Array[File] beta_swan_norm_unfiltered_genomic = - process_raw_idats.beta_swan_norm_unfiltered_genomic + Array[File] beta_swan_norm_unfiltered_genomic = process_raw_idats.beta_swan_norm_unfiltered_genomic File combined_beta = methylation_cohort.combined_beta File filtered_beta = methylation_cohort.filtered_beta File filtered_probeset = methylation_cohort.filtered_probeset diff --git a/workflows/qc/markdups-post.wdl b/workflows/qc/markdups-post.wdl index 70771d8e8..2e0420fa5 100644 --- a/workflows/qc/markdups-post.wdl +++ b/workflows/qc/markdups-post.wdl @@ -5,7 +5,6 @@ ## whether a read is a duplicate or not. ## But the tasks called below produce different results depending on whether the ## input BAM has been duplicate marked or not. - version 1.1 import "../../tools/mosdepth.wdl" @@ -61,7 +60,7 @@ workflow markdups_post { bam_index = markdups_bam_index, prefix = prefix + "." + "whole_genome", } - scatter(coverage_pair in zip(coverage_beds, coverage_labels)) { + scatter (coverage_pair in zip(coverage_beds, coverage_labels)) { call mosdepth.coverage as regions_coverage { input: bam = markdups_bam, bam_index = markdups_bam_index, @@ -72,8 +71,7 @@ workflow markdups_post { output { File insert_size_metrics = collect_insert_size_metrics.insert_size_metrics - File insert_size_metrics_pdf - = collect_insert_size_metrics.insert_size_metrics_pdf + File insert_size_metrics_pdf = collect_insert_size_metrics.insert_size_metrics_pdf File flagstat_report = flagstat.flagstat_report File mosdepth_global_summary = wg_coverage.summary File mosdepth_global_dist = wg_coverage.global_dist diff --git a/workflows/qc/quality-check-standard.wdl b/workflows/qc/quality-check-standard.wdl index 1587b7fda..3762e2883 100644 --- a/workflows/qc/quality-check-standard.wdl +++ b/workflows/qc/quality-check-standard.wdl @@ -126,8 +126,7 @@ workflow quality_check_standard { File kraken_db File? gtf #@ except: LineWidth - File multiqc_config - = "https://raw.githubusercontent.com/stjudecloud/workflows/main/workflows/qc/multiqc_config/multiqc_config.yaml" + File multiqc_config = "https://raw.githubusercontent.com/stjudecloud/workflows/main/workflows/qc/multiqc_config/multiqc_config.yaml" Array[File] extra_multiqc_inputs = [] Array[File] coverage_beds = [] Array[String] coverage_labels = [] @@ -164,20 +163,24 @@ workflow quality_check_standard { coverage_labels, } call flag_filter.validate_flag_filter as kraken_filter_validator { input: - flags = standard_filter + flags = standard_filter, } if (run_comparative_kraken) { - call flag_filter.validate_flag_filter - as comparative_kraken_filter_validator - { input: - flags = comparative_filter + call flag_filter.validate_flag_filter as comparative_kraken_filter_validator { input: + flags = comparative_filter, } } - call md5sum.compute_checksum after parse_input { input: file = bam } + call md5sum.compute_checksum after parse_input { input: + file = bam, + } - call samtools.quickcheck after parse_input { input: bam } - call util.compression_integrity after parse_input { input: bgzipped_file = bam } + call samtools.quickcheck after parse_input { input: + bam, + } + call util.compression_integrity after parse_input { input: + bgzipped_file = bam, + } if (subsample_n_reads > 0) { call samtools.subsample after quickcheck { input: @@ -188,7 +191,10 @@ workflow quality_check_standard { } if (defined(subsample.sampled_bam)) { call samtools.index as subsample_index { input: - bam = select_first([subsample.sampled_bam, "undefined"]), + bam = select_first([ + subsample.sampled_bam, + "undefined", + ]), use_all_cores, } } @@ -203,8 +209,7 @@ workflow quality_check_standard { subsample_index.bam_index, bam_index, ]) - String post_subsample_prefix = ( - if (defined(subsample.sampled_bam)) + String post_subsample_prefix = (if (defined(subsample.sampled_bam)) then prefix + ".subsampled" else prefix ) @@ -235,7 +240,9 @@ workflow quality_check_standard { outfile_name = post_subsample_prefix + ".readlength.tsv", } call ngsderive.encoding after quickcheck { input: - ngs_files = [post_subsample_bam], + ngs_files = [ + post_subsample_bam, + ], outfile_name = post_subsample_prefix + ".encoding.tsv", num_reads = -1, } @@ -249,9 +256,7 @@ workflow quality_check_standard { prefix = post_subsample_prefix, } - call samtools.bam_to_fastq after quickcheck - after kraken_filter_validator - { input: + call samtools.bam_to_fastq after quickcheck after kraken_filter_validator { input: bam = post_subsample_bam, bitwise_filter = standard_filter, prefix = post_subsample_prefix, @@ -267,14 +272,24 @@ workflow quality_check_standard { } call fq.fqlint { input: - read_one_fastq = select_first([bam_to_fastq.read_one_fastq_gz, "undefined"]), - read_two_fastq = select_first([bam_to_fastq.read_two_fastq_gz, "undefined"]), + read_one_fastq = select_first([ + bam_to_fastq.read_one_fastq_gz, + "undefined", + ]), + read_two_fastq = select_first([ + bam_to_fastq.read_two_fastq_gz, + "undefined", + ]), } call kraken2.kraken after fqlint { input: - read_one_fastq_gz - = select_first([bam_to_fastq.read_one_fastq_gz, "undefined"]), - read_two_fastq_gz - = select_first([bam_to_fastq.read_two_fastq_gz, "undefined"]), + read_one_fastq_gz = select_first([ + bam_to_fastq.read_one_fastq_gz, + "undefined", + ]), + read_two_fastq_gz = select_first([ + bam_to_fastq.read_two_fastq_gz, + "undefined", + ]), db = kraken_db, store_sequences = store_kraken_sequences, prefix = post_subsample_prefix, @@ -282,23 +297,29 @@ workflow quality_check_standard { } if (run_fastp) { call fp.fastp after fqlint { input: - read_one_fastq - = select_first([bam_to_fastq.read_one_fastq_gz, "undefined"]), - read_two_fastq - = select_first([bam_to_fastq.read_two_fastq_gz, "undefined"]), + read_one_fastq = select_first([ + bam_to_fastq.read_one_fastq_gz, + "undefined", + ]), + read_two_fastq = select_first([ + bam_to_fastq.read_two_fastq_gz, + "undefined", + ]), output_fastq = false, } } if (run_librarian) { call libraran_tasks.librarian after fqlint { input: - read_one_fastq = select_first([bam_to_fastq.read_one_fastq_gz, "undefined"]), + read_one_fastq = select_first([ + bam_to_fastq.read_one_fastq_gz, + "undefined", + ]), } } if (run_comparative_kraken) { - call samtools.bam_to_fastq as alt_filtered_fastq after quickcheck - after comparative_kraken_filter_validator - { input: + call samtools.bam_to_fastq as alt_filtered_fastq after quickcheck after comparative_kraken_filter_validator { + input: bam = post_subsample_bam, bitwise_filter = comparative_filter, prefix = post_subsample_prefix + ".alt_filtered", @@ -315,16 +336,24 @@ workflow quality_check_standard { use_all_cores, } call fq.fqlint as alt_filtered_fqlint { input: - read_one_fastq - = select_first([alt_filtered_fastq.read_one_fastq_gz, "undefined"]), - read_two_fastq - = select_first([alt_filtered_fastq.read_two_fastq_gz, "undefined"]), + read_one_fastq = select_first([ + alt_filtered_fastq.read_one_fastq_gz, + "undefined", + ]), + read_two_fastq = select_first([ + alt_filtered_fastq.read_two_fastq_gz, + "undefined", + ]), } call kraken2.kraken as comparative_kraken after alt_filtered_fqlint { input: - read_one_fastq_gz - = select_first([alt_filtered_fastq.read_one_fastq_gz, "undefined"]), - read_two_fastq_gz - = select_first([alt_filtered_fastq.read_two_fastq_gz, "undefined"]), + read_one_fastq_gz = select_first([ + alt_filtered_fastq.read_one_fastq_gz, + "undefined", + ]), + read_two_fastq_gz = select_first([ + alt_filtered_fastq.read_two_fastq_gz, + "undefined", + ]), db = kraken_db, store_sequences = store_kraken_sequences, prefix = post_subsample_prefix + ".alt_filtered", @@ -337,8 +366,8 @@ workflow quality_check_standard { bam_index = post_subsample_bam_index, prefix = post_subsample_prefix + ".whole_genome", } - scatter(coverage_pair in zip(coverage_beds, parse_input.labels)) { - call mosdepth.coverage as regions_coverage after quickcheck { input: + scatter (coverage_pair in zip(coverage_beds, parse_input.labels)) { + call mosdepth.coverage as regions_coverage after quickcheck { input: bam = post_subsample_bam, bam_index = post_subsample_bam_index, coverage_bed = coverage_pair.left, @@ -350,19 +379,31 @@ workflow quality_check_standard { call ngsderive.junction_annotation after quickcheck { input: bam = post_subsample_bam, bam_index = post_subsample_bam_index, - gene_model = select_first([gtf, "undefined"]), + gene_model = select_first([ + gtf, + "undefined", + ]), prefix = post_subsample_prefix, } call ngsderive.strandedness after quickcheck { input: bam = post_subsample_bam, bam_index = post_subsample_bam_index, - gene_model = select_first([gtf, "undefined"]), + gene_model = select_first([ + gtf, + "undefined", + ]), outfile_name = post_subsample_prefix + ".strandedness.tsv", } call qualimap.rnaseq as qualimap_rnaseq { input: - bam = select_first([bam_to_fastq.collated_bam, "undefined"]), + bam = select_first([ + bam_to_fastq.collated_bam, + "undefined", + ]), prefix = post_subsample_prefix + ".qualimap_rnaseq_results", - gtf = select_first([gtf, "undefined"]), + gtf = select_first([ + gtf, + "undefined", + ]), name_sorted = true, paired_end = true, # matches default but prevents user from overriding } @@ -434,17 +475,27 @@ workflow quality_check_standard { ], regions_coverage.summary, select_all(regions_coverage.region_dist), - select_first([markdups_post.mosdepth_region_summary, []]), - select_first([markdups_post.mosdepth_region_dist, []]), - ( - if (mark_duplicates && optical_distance > 0) - then [markdups.mark_duplicates_metrics] + select_first([ + markdups_post.mosdepth_region_summary, + [], + ]), + select_first([ + markdups_post.mosdepth_region_dist, + [], + ]), + (if (mark_duplicates && optical_distance > 0) + then [ + markdups.mark_duplicates_metrics, + ] else [] ), ])) call multiqc_tasks.multiqc { input: - files = flatten([multiqc_files, extra_multiqc_inputs]), + files = flatten([ + multiqc_files, + extra_multiqc_inputs, + ]), config = multiqc_config, report_name = post_subsample_prefix + ".multiqc", } @@ -483,7 +534,10 @@ workflow quality_check_standard { File? kraken_sequences = kraken.sequences File? comparative_kraken_sequences = comparative_kraken.sequences File? junctions = junction_annotation.junctions - Array[File] intermediate_files = select_first([optional_files, []]) + Array[File] intermediate_files = select_first([ + optional_files, + [], + ]) } } @@ -491,7 +545,7 @@ task parse_input { meta { description: "Parses and validates the `quality_check_standard` workflow's provided inputs" outputs: { - labels: "An array of labels to use on the result coverage files associated with each coverage BED" + labels: "An array of labels to use on the result coverage files associated with each coverage BED", } } @@ -539,8 +593,7 @@ task parse_input { >>> output { - Array[String] labels = ( - if (coverage_beds_len > 0) + Array[String] labels = (if (coverage_beds_len > 0) then read_lines("labels.txt") else [] ) diff --git a/workflows/reference/bwa-db-build.wdl b/workflows/reference/bwa-db-build.wdl index 3aefef4f1..579385f1c 100644 --- a/workflows/reference/bwa-db-build.wdl +++ b/workflows/reference/bwa-db-build.wdl @@ -40,7 +40,7 @@ workflow bwa_db_build { } output { - File reference_fa = reference_download.downloaded_file - File bwa_db_tar_gz = build_bwa_db.bwa_db_tar_gz + File reference_fa = reference_download.downloaded_file + File bwa_db_tar_gz = build_bwa_db.bwa_db_tar_gz } } diff --git a/workflows/reference/gatk-reference.wdl b/workflows/reference/gatk-reference.wdl index 3b4dff835..2cf2e4087 100644 --- a/workflows/reference/gatk-reference.wdl +++ b/workflows/reference/gatk-reference.wdl @@ -88,16 +88,28 @@ workflow gatk_reference { if (defined(dbSNP_vcf_index_url) && defined(dbSNP_vcf_index_name)) { call util.download as dbsnp_index { input: - url = select_first([dbSNP_vcf_index_url, "undefined"]), - outfile_name = select_first([dbSNP_vcf_index_name, "undefined"]), + url = select_first([ + dbSNP_vcf_index_url, + "undefined", + ]), + outfile_name = select_first([ + dbSNP_vcf_index_name, + "undefined", + ]), disk_size_gb = dbSNP_vcf_index_disk_size_gb, } } if (defined(interval_list_url) && defined(interval_list_name)) { call util.download as intervals { input: - url = select_first([interval_list_url, "undefined"]), - outfile_name = select_first([interval_list_name, "undefined"]), + url = select_first([ + interval_list_url, + "undefined", + ]), + outfile_name = select_first([ + interval_list_name, + "undefined", + ]), disk_size_gb = interval_list_disk_size_gb, } } diff --git a/workflows/reference/qc-reference.wdl b/workflows/reference/qc-reference.wdl index 2d64b8901..28f67aebf 100644 --- a/workflows/reference/qc-reference.wdl +++ b/workflows/reference/qc-reference.wdl @@ -121,12 +121,12 @@ workflow qc_reference { } } - if ( - (length(kraken_fastas) > 0) - || (length(kraken_fasta_urls) > 0) - || (length(kraken_libraries) > 0) - ) { - call kraken2.download_taxonomy { input: protein } + if ((length(kraken_fastas) > 0) || (length(kraken_fasta_urls) > 0) || (length( + kraken_libraries + ) > 0)) { + call kraken2.download_taxonomy { input: + protein, + } } scatter (lib in kraken_libraries) { @@ -136,7 +136,10 @@ workflow qc_reference { } } - Array[File] custom_fastas = flatten([kraken_fastas, fastas_download.downloaded_file]) + Array[File] custom_fastas = flatten([ + kraken_fastas, + fastas_download.downloaded_file, + ]) if (length(custom_fastas) > 0) { call kraken2.create_library_from_fastas { input: fastas_gz = custom_fastas, @@ -145,9 +148,13 @@ workflow qc_reference { } Array[File] kraken_tarballs = flatten([ - select_all([download_taxonomy.taxonomy]), + select_all([ + download_taxonomy.taxonomy, + ]), download_library.library, - select_all([create_library_from_fastas.custom_library]), + select_all([ + create_library_from_fastas.custom_library, + ]), ]) if (length(kraken_tarballs) > 0) { call kraken2.build_db as kraken_build_db { input: diff --git a/workflows/reference/star-db-build.wdl b/workflows/reference/star-db-build.wdl index d3a99fbe2..d2d14b2a1 100644 --- a/workflows/reference/star-db-build.wdl +++ b/workflows/reference/star-db-build.wdl @@ -56,8 +56,8 @@ workflow star_db_build { } output { - File reference_fa = reference_download.downloaded_file - File gtf = gtf_download.downloaded_file - File star_db_tar_gz = build_star_db.star_db + File reference_fa = reference_download.downloaded_file + File gtf = gtf_download.downloaded_file + File star_db_tar_gz = build_star_db.star_db } } diff --git a/workflows/rnaseq/rnaseq-core.wdl b/workflows/rnaseq/rnaseq-core.wdl index b5d088317..afad29046 100644 --- a/workflows/rnaseq/rnaseq-core.wdl +++ b/workflows/rnaseq/rnaseq-core.wdl @@ -144,9 +144,7 @@ workflow rnaseq_core { GC_AG_and_CT_GC_motif: 5, AT_AC_and_GT_AT_motif: 5, } - String prefix = sub( - basename(read_one_fastqs_gz[0]), - "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$", + String prefix = sub(basename(read_one_fastqs_gz[0]), "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$", "" # Once replacing with capturing groups is supported, replace with group 3 ) String xenocp_aligner = "star" @@ -201,13 +199,11 @@ workflow rnaseq_core { } } - Array[File] chosen_r1s = ( - if enable_read_trimming + Array[File] chosen_r1s = (if enable_read_trimming then select_all(trim.read_one_fastq_gz) else read_one_fastqs_gz ) - Array[File] chosen_r2s = ( - if enable_read_trimming + Array[File] chosen_r2s = (if enable_read_trimming then select_all(trim.read_two_fastq_gz) else read_two_fastqs_gz ) @@ -252,8 +248,7 @@ workflow rnaseq_core { gene_model = gtf, } - String htseq_strandedness = ( - if (provided_strandedness != "") + String htseq_strandedness = (if (provided_strandedness != "") then htseq_strandedness_mapping[provided_strandedness] else htseq_strandedness_mapping[ngsderive_strandedness.strandedness_string] ) @@ -262,12 +257,11 @@ workflow rnaseq_core { bam = alignment_post.processed_bam, gtf, strandedness = htseq_strandedness, - prefix = basename(alignment_post.processed_bam, "bam") - + ( - if provided_strandedness == "" - then ngsderive_strandedness.strandedness_string - else provided_strandedness - ), + prefix = basename(alignment_post.processed_bam, "bam") + (if provided_strandedness + == "" + then ngsderive_strandedness.strandedness_string + else provided_strandedness + ), pos_sorted = true, } @@ -280,9 +274,13 @@ workflow rnaseq_core { File feature_counts = htseq_count.feature_counts File inferred_strandedness = ngsderive_strandedness.strandedness_file String inferred_strandedness_string = ngsderive_strandedness.strandedness_string - Array[File] fastp_reports = select_all(flatten([fastp.report, trim.report])) - Array[File] fastp_jsons = select_all(flatten( - [fastp.report_json, trim.report_json] - )) + Array[File] fastp_reports = select_all(flatten([ + fastp.report, + trim.report, + ])) + Array[File] fastp_jsons = select_all(flatten([ + fastp.report_json, + trim.report_json, + ])) } } diff --git a/workflows/rnaseq/rnaseq-standard-fastq.wdl b/workflows/rnaseq/rnaseq-standard-fastq.wdl index dee188b10..29ff3bfc6 100644 --- a/workflows/rnaseq/rnaseq-standard-fastq.wdl +++ b/workflows/rnaseq/rnaseq-standard-fastq.wdl @@ -73,9 +73,7 @@ workflow rnaseq_standard_fastq { Array[File] read_two_fastqs_gz Array[ReadGroup] read_groups File? contaminant_db - String prefix = sub( - basename(read_one_fastqs_gz[0]), - "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$", + String prefix = sub(basename(read_one_fastqs_gz[0]), "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$", "" # Once replacing with capturing groups is supported, replace with group 3 ) String xenocp_aligner = "star" @@ -100,7 +98,7 @@ workflow rnaseq_standard_fastq { } } - if (validate_input){ + if (validate_input) { scatter (reads in zip(read_one_fastqs_gz, read_two_fastqs_gz)) { call fq.fqlint after parse_input { input: read_one_fastq = reads.left, @@ -123,12 +121,10 @@ workflow rnaseq_standard_fastq { subsample.subsampled_read1, read_one_fastqs_gz, ]) - Array[File] selected_read_two_fastqs = select_all( - select_first([ - subsample.subsampled_read2, - read_two_fastqs_gz, - ]) - ) + Array[File] selected_read_two_fastqs = select_all(select_first([ + subsample.subsampled_read2, + read_two_fastqs_gz, + ])) call rnaseq_core_wf.rnaseq_core after fqlint { input: read_one_fastqs_gz = selected_read_one_fastqs, diff --git a/workflows/rnaseq/rnaseq-standard.wdl b/workflows/rnaseq/rnaseq-standard.wdl index c7278a72a..48f70ab95 100755 --- a/workflows/rnaseq/rnaseq-standard.wdl +++ b/workflows/rnaseq/rnaseq-standard.wdl @@ -92,7 +92,10 @@ workflow rnaseq_standard { use_all_cores, } } - File selected_bam = select_first([subsample.sampled_bam, bam]) + File selected_bam = select_first([ + subsample.sampled_bam, + bam, + ]) call read_group.get_read_groups after validate_input_bam { input: bam = selected_bam, diff --git a/workflows/rnaseq/rnaseq-variant-calling.wdl b/workflows/rnaseq/rnaseq-variant-calling.wdl index 8df2e61a1..ab7a8a445 100644 --- a/workflows/rnaseq/rnaseq-variant-calling.wdl +++ b/workflows/rnaseq/rnaseq-variant-calling.wdl @@ -54,7 +54,7 @@ workflow rnaseq_variant_calling { Int scatter_count = 6 } - if (!bam_is_dup_marked){ + if (!bam_is_dup_marked) { call picard.mark_duplicates { input: bam, create_bam = true, @@ -62,8 +62,14 @@ workflow rnaseq_variant_calling { } call gatk.split_n_cigar_reads { input: - bam = select_first([mark_duplicates.duplicate_marked_bam, bam]), - bam_index = select_first([mark_duplicates.duplicate_marked_bam_index, bam_index]), + bam = select_first([ + mark_duplicates.duplicate_marked_bam, + bam, + ]), + bam_index = select_first([ + mark_duplicates.duplicate_marked_bam_index, + bam_index, + ]), fasta, fasta_index, dict, From 2805d4c154e6b2c325efd73667b06e152bc8e78d Mon Sep 17 00:00:00 2001 From: Ari Frantz Date: Fri, 6 Feb 2026 11:23:59 -0500 Subject: [PATCH 33/47] docs: revise best practices and contributing guide (#293) _Describe the problem or feature in addition to a link to the issues._ closes #277 Before submitting this PR, please make sure: - [ ] You have added a few sentences describing the PR here. - [ ] The code passes all CI tests without any errors or warnings. - [ ] You have added tests (when appropriate). - [ ] You have added an entry in any relevant CHANGELOGs (when appropriate). - [ ] If you have made any changes to the `scripts/` or `docker/` directories, please ensure any image versions have been incremented accordingly! - [ ] You have updated the README or other documentation to account for these changes (when appropriate). --------- Co-authored-by: Andrew Thrasher --- CONTRIBUTING.md | 44 ++++++++++++++++++++++ best-practices.md | 93 ++++++++++++++--------------------------------- 2 files changed, 71 insertions(+), 66 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 81fc14d24..706a0a221 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -31,6 +31,50 @@ Our pull request template has an extensive checklist that must be completed prio Note that the maintainers reserve the right to close any submission without review for any reason. +## Expectations for WDL contributions + +We have some opinionated rules and guidelines we use while writing WDL for this repository. These include: + +- See `template/common-parameter-meta.txt` for common description strings. + - If applicable, use the same parameter name and help text as the underlying tool called by the task. +- All requirement values are overridable at runtime. However, tasks should have easily configurable memory and disk space allocations. + - See the various tasks in the template directory for possible ways to allocate resources. + - Contributors can mix and match the available templates, copy and pasting subsections as appropriate. + - A task may contain both statically and dynamically allocated resources. +- Multi-core tasks should *always* follow the conventions laid out in the `use_all_cores_task` example (see `template/task-examples.wdl`). + - This is catering to cloud users, who may be allocated a machine with more cores than are specified by the `ncpu` parameter. + - Note that future versions of WDL will likely cause a change to this convention. + - We plan to deprecate the `ncpu` param in favor of accessing the runtime section directly (`n_cores=~{task.runtime.cpu}`). +- Output file names should *always* be determined with either the `outfile_name` parameter or the `prefix` parameter. + - `outfile_name` should be preferred if no downstream tasks/tools rely on the file name/extension. + - Tasks with multiple outputs should always use the `prefix` convention. +- After the input sorting rules in `sprocket lint` have been applied, follow the below rules for further sorting. + - "sample" files come before "reference" files. + - If present, `use_all_cores` should be the last `Boolean` in its block. + - The `ncpu` parameter comes before inputs that allocate memory, which come before inputs that allocate disk space. + - This block of 2-3 inputs should come after all other inputs. +- If a task uses multiple cores or is multithreaded, then at least 2 cpu should be specified. +- Use the `as` keyword sparingly; only in the case of increased readability or to avoid name collisions. + - Prefer using `as` in the import block rather than at the task/workflow call level. + - When using `as` to rename an invalid URI, attempt to make as few changes to the filename as possible (i.e. try not to abbreviate). + - To disambiguate a task or workflow file from it's contents, you can respectively add the `_tasks` or `_wf` suffix in the import section. +- Whenever possible, prefer a Docker image maintained by an external source (such as BioContainers) rather than creating your own image. +- When adding a `Dockerfile` to this repository, follow the below conventions: + - Create a directory under the `docker/` directory and choose an appropriate name (likely shared with the underlying tool). The `Dockerfile` should be nested under this new directory. Then create a `package.json` alongside the `Dockerfile`. The `package.json` file is required to contain two JSON fields (`name` and `version`). It can optionally contain a `revision` field. + - Docker images should be versioned according to the following convention + - The `version` should be shared with whatever underlying tool is being used + - If no specific tool is named (e.g. the `util` image), default to SemVer. Ignore the next 3 bullet points. + - The revision should start with zero (`0`). + - If the Docker image gets updated, *without* updating the base tool's version, increment the number by one. + - If the Docker image gets updated, *including* updating the base tool's version, revert back to zero. +- Any tasks which are deprecated should have a `deprecated: true` key in their `meta` section. + - Never include a `deprecated: false` key in any production tasks. All tasks are assumed to not be deprecated unless otherwise noted. + - In addition, there should be a `warning` key which starts with the text `**[DEPRECATED]**`. + - No other text or explanation is required after the above text, but it can be added for further context. + - These two conventions allow for a task's deprecated status to be communicated in multiple ways, ensuring no user misses the notice. + - Deprecated tasks should be placed at the end of their file. +- While WDL allows embedded scripts in the `command` block sections, this repository requires scripts (e.g. R, Python) to be separate and placed in the `scripts` folder. The relevant Docker image build for your task should then include the script during the build so the task can access it. This separation of concerns improves the developer experience by improving syntax highlighting in the WDL document and enabling linting and formatting checks for the scripting languages. + ## FAQs ### Can I use Artificial Intelligence (AI)? diff --git a/best-practices.md b/best-practices.md index 5a9df8072..38d6cb005 100644 --- a/best-practices.md +++ b/best-practices.md @@ -1,69 +1,30 @@ # WDL Best Practices -All rules below should be followed by contributors to this repo. Contributors should also follow the rules enforced by [Sprocket](https://sprocket.bio/). Pull Requests which do not conform to these specifications will be asked to change. - -## Rules - -- All WDL should be written in v1.1+ - All tasks with multiple commands (including any pipes (`|`)) should have `set -euo pipefail` before any other commands. -- All tasks should run in a persistently versioned container - - This ensures reproducibility across time and environments -- See `template/common-parameter-meta.txt` for common description strings. - - If applicable, use the same parameter name, help string, and parameter ordering as the underlying tool called by the task -- Check all assumptions made about workflow inputs before beginning long running executions - - Common examples of assumptions that should be checked: valid `String` choice, mutually exclusive parameters, missing optional file for selected parameters, filename extensions - - This can commonly be handled by a `parse_input` task (defined in the same file as the workflow in question) - - When possible, avoid passing in entire files to the `parse_input` task. Coerce files to `Boolean`s or `String`s to avoid unnecessary disk space usage -- Tasks with string parameters for which a limited number of choices are valid, must be documented following the template in `string_choices_task` (see `template/task-examples.wdl`) - - they should also fail quickly with an informative error message if an invalid input is provided - - In most cases, just passing the parameter to the underlying tool should produce a satisfactory error, but this must be checked for each tool - - While redundant, it is still best practice to validate these strings in the `parse_input` task of any workflow which calls the task - - This ensures the workflow will fail as fast as possible to save users time and resources -- All requirement values are overridable at runtime. However, tasks should have easily configurable memory and disk space allocations - - see the various tasks in the template directory for possible ways to allocate resources - - Contributors can mix and match the available templates, copy and pasting subsections as appropriate - - It is allowed to have one resource allocated dynamically, and another allocated statically in the same task. -- multi-core tasks should *always* follow the conventions laid out in the `use_all_cores_task` example (see `template/task-examples.wdl`) - - this is catering to cloud users, who may be allocated a machine with more cores than are specified by the `ncpu` parameter - - Note that future versions of WDL will likely cause a change to this convention. - - We plan to deprecate the `ncpu` param in favor of accessing the runtime section directly (`n_cores=~{task.runtime.cpu}`) -- Tasks which assume a file and any accessory files (e.g. a BAM and a BAI) have specific extensions and/or are in the same directory should *always* follow the conventions laid out in the `localize_files_task` example (see `template/task-examples.wdl`) - - This is to accommodate as many backends as possible -- output file names should *always* be determined with either the `outfile_name` parameter or the `prefix` parameter. - - `outfile_name` should be preferred if no downstream tasks/tools rely on the file name/extension - - tasks with multiple outputs should always use the `prefix` convention -- After the input sorting rules in `sprocket lint` have been applied, follow the below rules for further sorting. - - "sample" files come before "reference" files - - If present, `use_all_cores` should be the last `Boolean` in its block - - the `ncpu` parameter comes before inputs that allocate memory, which come before inputs that allocate disk space - - This block of 2-3 inputs should come after all other inputs. -- Most tasks should have a default `maxRetries` of 1 - - Certain tasks are prone to intermittent failure (often if an internet connection is involved) and can have a higher default `maxRetries`. -- If a task uses multiple cores or is multithreaded, then at least 2 cpu should be specified. -- Use the `as` keyword sparingly; only in the case of increased readability or to avoid name collisions - - Prefer using `as` in the import block rather than at the task/workflow call level - - When using `as` to rename an invalid URI, attempt to make as few changes to the filename as possible (i.e. try not to abbreviate) - - To disambiguate a task or workflow file from it's contents, you can respectively add the `_tasks` or `_wf` suffix in the import section -- the non-empty qualifier (`+`) of arrays and maps should be avoided -- Whenever possible, prefer a Docker image maintained by an external source (such as BioContainers) rather than creating your own image -- When adding a Dockerfile to this repository, follow the below conventions - - Create a directory under the `docker/` directory and choose an appropriate name (likely shared with the underlying tool). The `Dockerfile` should be nested under this new directory. Then create a `package.json` alongside the `Dockerfile`. The `package.json` file is required to contain two JSON fields (`name` and `version`). It can optionally contain a `revision` field. - - Docker images should be versioned according to the following convention - - The `version` should be shared with whatever underlying tool is being used - - If no specific tool is named (e.g. the `util` image), default to SemVer. Ignore the next 3 bullet points. - - The revision should start with zero (`0`) - - If the Docker image gets updated, *without* updating the base tool's version, increment the number by one - - If the Docker image gets updated, *including* updating the base tool's version, revert back to zero -- general purpose tasks can use the `util` image maintained in this repo -- The `description` key in WDL meta sections should be in active voice, beginning the first sentence with a verb - - Each task/workflow is _doing_ something. The first sentence should be a succinct description of what that "something" is. - - The `description` key should be succinct. Generally, one sentence shorter than 140 characters is appropriate. -- If documenting a workflow, task, input, or output and you need to be more verbose than is appropriate in a `description` field, you may include _in addition_ a `help` key with extended prose or an `external_help` key with a URL - - the presence of `help` or `external_help` is _not_ a substitute for a `description` -- Any tasks which are deprecated should have a `deprecated: true` key in their `meta` section - - It is allowed (but redundant and discouraged) to include a `deprecated: false` key in any production tasks. All tasks are assumed to not be deprecated unless otherwise noted. - - In addition, there should be a `warning` key which starts with the text `**[DEPRECATED]**` - - No other text or explanation is required after the above text, but it can be added for further context - - These two rules allow for a task's deprecated status to be communicated in multiple ways, ensuring no user misses the notice - - Deprecated tasks should be placed at the end of their file -- While WDL allows embedded scripts in the `command` block sections, this repository requires scripts (e.g. R, Python) to be separate and placed in the `scripts` folder. The relevant Docker image build for your task should then include the script during the build so the task can access it. This separation of concerns improves the developer experience by improving syntax highlighting in the WDL document and enabling linting and formatting checks for the scripting languages. \ No newline at end of file + - Tasks without multiple commands or pipes can omit this. + - These options will cause common classes of bugs in Bash scripts to fail immediately and loudly, instead of causing silent or subtle bugs in your task behavior. +- All tasks should run in a persistently versioned container. + - e.g. do not use `latest` tags for Docker images. + - This helps ensure reproducibility across time and environments. +- Check all assumptions made about workflow inputs before beginning long running executions. + - Common examples of assumptions that should be checked: + - valid `String` choice (for WDL 1.3 and later, an `enum` should be used in place of `String`s with a fixed set of valid options) + - mutually exclusive parameters + - missing optional file for selected parameters + - filename extensions + - Use `after` clauses in workflows to ensure that all these assumptions are valid before beginning tasks with heavy computation. +- If the _contents_ of a `File` are not read or do not need to be localized for a task, try to coerce the `File` variable to a `Boolean` (with `defined()`) or a `String` (with `basename()`) to avoid unnecessary disk space usage and networking. +- All requirement values are overridable at runtime. However, tasks should have easily configurable memory and disk space allocations. + - Often, tasks have a dynamic calculation for resource requirements based on input sizes. Users of a WDL should have an easy way to fine tune this calculation. + - This may mean incorporating an `Int` or `Float` in the inputs of the task that is applied to the dynamic calculation. + - For WDL 1.3 and later, WDL authors can change resource requirements between retry attempts. This enables mitigation of errors relating to resources limits, but users may inadvertantly disable these mitigations by introducing runtime overrides. WDL authors should expose resource fine tuning via the input section and incorporate those user values in any dynamic calculations to prevent runtime locking. +- Tasks which assume a file and any accessory files (e.g. a BAM and a BAI) have specific extensions and/or are in the same directory should *always* create symlinks from the mounted inputs to the work directory of the task + - This is because individual `File` types are not guarenteed to be in the same mounted directory. + - The `command` may include something like: `ln -s "~{}" "./"` +- Tasks should `rm` any temporary or intermediate files created in the work directory (including symlinks). + - This helps reduce disk bloat from keeping unnecessary files around. + - This is especially important for any large or uncompressed files, such as reference FASTAs or databases. +- Most tasks should have a default `maxRetries` of 1. + - This is because many WDL backends are prone to intermittent failures that can be recovered from with a retry. + - Certain tasks are especially prone to intermittent failure (often if any networking is involved) and can have a higher default `maxRetries`. + - Certain tasks with potentially high compute costs in cloud environments may default to `0`. This should be used in combination with call caching to aid rerunning while minimizing costs. From c958f35c8f4b9778ab1a0e5282587819ab751338 Mon Sep 17 00:00:00 2001 From: Andrew Thrasher Date: Fri, 6 Feb 2026 16:15:39 -0500 Subject: [PATCH 34/47] chore: add WDL-specific instructions for copilot (#295) Adding an initial set of instructions for Copilot specific to WDL and our internal best practices. Before submitting this PR, please make sure: - [x] You have added a few sentences describing the PR here. - [ ] The code passes all CI tests without any errors or warnings. - [ ] You have added tests (when appropriate). - [ ] You have added an entry in any relevant CHANGELOGs (when appropriate). - [ ] If you have made any changes to the `scripts/` or `docker/` directories, please ensure any image versions have been incremented accordingly! - [ ] You have updated the README or other documentation to account for these changes (when appropriate). --- .github/instructions/wdl.instructions.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 .github/instructions/wdl.instructions.md diff --git a/.github/instructions/wdl.instructions.md b/.github/instructions/wdl.instructions.md new file mode 100644 index 000000000..0f3fbef0a --- /dev/null +++ b/.github/instructions/wdl.instructions.md @@ -0,0 +1,13 @@ +--- +applyTo: "**/*.wdl" +--- +# This file contains instructions for the WDL code style in the workflows directory. + +# WDL Development Instructions + +- Use the latest version of the WDL spec from https://github.com/openwdl/wdl/ +- Use the CONTRIBUTING.md guide in this repository for general coding style and best practices. +- Use the best-practices.md guide in this repository for WDL-specific best practices. +- Prefer the strings in template/common-parameter-meta.txt when writing parameter meta sections. +- Use the templates in template/task-examples.wdl when writing new tasks. +- All WDL code should be checked with Sprocket lint and formatted with Sprocket format before committing. From 4a7fa73dddfe143e2e874853ad37c384866d4741 Mon Sep 17 00:00:00 2001 From: Kevin Benton <1820709+kevin-benton@users.noreply.github.com> Date: Mon, 9 Feb 2026 12:47:29 -0600 Subject: [PATCH 35/47] chore(Snyk): Security upgrade ubuntu from noble-20250925 to noble-20260113 (#288) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ![snyk-top-banner](https://res.cloudinary.com/snyk/image/upload/r-d/scm-platform/snyk-pull-requests/pr-banner-default.svg) ### Snyk has created this PR to fix 2 vulnerabilities in the dockerfile dependencies of this project. Keeping your Docker base image up-to-date means you’ll benefit from security fixes in the latest version of your chosen image. #### Snyk changed the following file(s): - `docker/util/Dockerfile` We recommend upgrading to `ubuntu:24.04`, as this image has only **8** known vulnerabilities. To do this, merge this pull request, then verify your application still works as expected. #### Vulnerabilities that will be fixed with an upgrade: | | Issue | Score | :-------------------------:|:-------------------------|:------------------------- ![high severity](https://res.cloudinary.com/snyk/image/upload/w_20,h_20/v1561977819/icon/h.png 'high severity') | Out-of-bounds Write
[SNYK-UBUNTU2404-GNUPG2-14849555](https://snyk.io/vuln/SNYK-UBUNTU2404-GNUPG2-14849555) |   **281**   ![medium severity](https://res.cloudinary.com/snyk/image/upload/w_20,h_20/v1561977819/icon/m.png 'medium severity') | Directory Traversal
[SNYK-UBUNTU2404-PAM-11936905](https://snyk.io/vuln/SNYK-UBUNTU2404-PAM-11936905) |   **231**   ![medium severity](https://res.cloudinary.com/snyk/image/upload/w_20,h_20/v1561977819/icon/m.png 'medium severity') | Directory Traversal
[SNYK-UBUNTU2404-PAM-11936905](https://snyk.io/vuln/SNYK-UBUNTU2404-PAM-11936905) |   **231**   ![medium severity](https://res.cloudinary.com/snyk/image/upload/w_20,h_20/v1561977819/icon/m.png 'medium severity') | Directory Traversal
[SNYK-UBUNTU2404-PAM-11936905](https://snyk.io/vuln/SNYK-UBUNTU2404-PAM-11936905) |   **231**   ![medium severity](https://res.cloudinary.com/snyk/image/upload/w_20,h_20/v1561977819/icon/m.png 'medium severity') | Directory Traversal
[SNYK-UBUNTU2404-PAM-11936905](https://snyk.io/vuln/SNYK-UBUNTU2404-PAM-11936905) |   **231**   --- > [!IMPORTANT] > > - Check the changes in this PR to ensure they won't cause issues with your project. > - Max score is 1000. Note that the real score may have changed since the PR was raised. > - This PR was automatically created by Snyk using the credentials of a real user. --- **Note:** _You are seeing this because you or someone else with access to this repository has authorized Snyk to open fix PRs._ For more information: 🧐 [View latest project report](https://app.snyk.io/org/compbio/project/c80293d7-c0f4-4d94-b297-efe1fc5caf56?utm_source=github&utm_medium=referral&page=fix-pr) 📜 [Customise PR templates](https://docs.snyk.io/scan-using-snyk/pull-requests/snyk-fix-pull-or-merge-requests/customize-pr-templates?utm_source=github&utm_content=fix-pr-template) 🛠 [Adjust project settings](https://app.snyk.io/org/compbio/project/c80293d7-c0f4-4d94-b297-efe1fc5caf56?utm_source=github&utm_medium=referral&page=fix-pr/settings) 📚 [Read about Snyk's upgrade logic](https://docs.snyk.io/scan-with-snyk/snyk-open-source/manage-vulnerabilities/upgrade-package-versions-to-fix-vulnerabilities?utm_source=github&utm_content=fix-pr-template) --- **Learn how to fix vulnerabilities with free interactive lessons:** 🦉 [Out-of-bounds Write](https://learn.snyk.io/lesson/out-of-bounds-write/?loc=fix-pr) 🦉 [Directory Traversal](https://learn.snyk.io/lesson/directory-traversal/?loc=fix-pr) [//]: # 'snyk:metadata:{"breakingChangeRiskLevel":null,"FF_showPullRequestBreakingChanges":false,"FF_showPullRequestBreakingChangesWebSearch":false,"customTemplate":{"variablesUsed":[],"fieldsUsed":[]},"dependencies":[{"name":"ubuntu","from":"noble-20250925","to":"24.04"}],"env":"prod","issuesToFix":["SNYK-UBUNTU2404-GNUPG2-14849555","SNYK-UBUNTU2404-PAM-11936905","SNYK-UBUNTU2404-PAM-11936905","SNYK-UBUNTU2404-PAM-11936905","SNYK-UBUNTU2404-PAM-11936905"],"prId":"5aa4378b-d706-4faa-a034-858aa6048a9b","prPublicId":"5aa4378b-d706-4faa-a034-858aa6048a9b","packageManager":"dockerfile","priorityScoreList":[281,231],"projectPublicId":"c80293d7-c0f4-4d94-b297-efe1fc5caf56","projectUrl":"https://app.snyk.io/org/compbio/project/c80293d7-c0f4-4d94-b297-efe1fc5caf56?utm_source=github&utm_medium=referral&page=fix-pr","prType":"fix","templateFieldSources":{"branchName":"default","commitMessage":"default","description":"default","title":"default"},"templateVariants":["updated-fix-title","priorityScore"],"type":"auto","upgrade":["SNYK-UBUNTU2404-GNUPG2-14849555","SNYK-UBUNTU2404-PAM-11936905","SNYK-UBUNTU2404-PAM-11936905","SNYK-UBUNTU2404-PAM-11936905","SNYK-UBUNTU2404-PAM-11936905"],"vulns":["SNYK-UBUNTU2404-GNUPG2-14849555","SNYK-UBUNTU2404-PAM-11936905"],"patch":[],"isBreakingChange":false,"remediationStrategy":"vuln"}' --------- Co-authored-by: snyk-bot Co-authored-by: Copilot <198982749+Copilot@users.noreply.github.com> Co-authored-by: adthrasher <1165729+adthrasher@users.noreply.github.com> Co-authored-by: Andrew Thrasher --- data_structures/flag_filter.wdl | 2 +- data_structures/read_group.wdl | 6 +++--- docker/python-plotting/package.json | 2 +- docker/umap/Dockerfile | 12 ++++++------ docker/umap/package.json | 2 +- docker/util/Dockerfile | 2 +- docker/util/package.json | 2 +- tools/htseq.wdl | 2 +- tools/md5sum.wdl | 2 +- tools/util.wdl | 14 +++++++------- workflows/dnaseq/dnaseq-standard.wdl | 2 +- workflows/methylation/methylation-cohort.wdl | 4 ++-- workflows/qc/quality-check-standard.wdl | 2 +- workflows/rnaseq/rnaseq-standard.wdl | 2 +- 14 files changed, 28 insertions(+), 28 deletions(-) diff --git a/data_structures/flag_filter.wdl b/data_structures/flag_filter.wdl index 263b7f456..e57326359 100644 --- a/data_structures/flag_filter.wdl +++ b/data_structures/flag_filter.wdl @@ -106,7 +106,7 @@ task validate_string_is_12bit_int { >>> runtime { - container: "ghcr.io/stjudecloud/util:3.0.1" + container: "ghcr.io/stjudecloud/util:3.0.2" maxRetries: 1 } } diff --git a/data_structures/read_group.wdl b/data_structures/read_group.wdl index df81982d6..b06ba639b 100644 --- a/data_structures/read_group.wdl +++ b/data_structures/read_group.wdl @@ -138,7 +138,7 @@ task get_read_groups { runtime { disks: "~{disk_size_gb} GB" - container: "ghcr.io/stjudecloud/util:3.0.1" + container: "ghcr.io/stjudecloud/util:3.0.2" maxRetries: 1 } } @@ -363,7 +363,7 @@ task validate_read_group { >>> runtime { - container: "ghcr.io/stjudecloud/util:3.0.1" + container: "ghcr.io/stjudecloud/util:3.0.2" maxRetries: 1 } } @@ -421,7 +421,7 @@ task inner_read_group_to_string { } runtime { - container: "ghcr.io/stjudecloud/util:3.0.1" + container: "ghcr.io/stjudecloud/util:3.0.2" maxRetries: 1 } } diff --git a/docker/python-plotting/package.json b/docker/python-plotting/package.json index 9ca1275fa..1fc0df903 100644 --- a/docker/python-plotting/package.json +++ b/docker/python-plotting/package.json @@ -1,4 +1,4 @@ { "name": "python-plotting", - "version": "2.0.7" + "version": "2.0.8" } \ No newline at end of file diff --git a/docker/umap/Dockerfile b/docker/umap/Dockerfile index 9afd846b3..452d55829 100644 --- a/docker/umap/Dockerfile +++ b/docker/umap/Dockerfile @@ -1,10 +1,10 @@ -FROM python:3.13.1-alpine +FROM python:3.14.2-alpine -# Downstream requires llvm15, but that is out-of-date -# llvm15 installs with the -15 suffix, so we need to symlink -RUN apk add --no-cache build-base llvm15-dev bash \ - && ln -s /usr/bin/llvm-config-15 /usr/bin/llvm-config +# Downstream requires llvm20, but that is out-of-date +# llvm20 installs with the -20 suffix, so we need to symlink +RUN apk add --no-cache build-base llvm20-dev bash cmake llvm20 llvm20-libs llvm20-static llvm20-gtest \ + && ln -s /usr/bin/llvm20-config /usr/bin/llvm-config -RUN pip install umap-learn==0.5.7 pandas +RUN pip install umap-learn==0.5.11 pandas COPY --from=scripts --chmod=777 methylation/generate_umap.py /scripts/methylation/generate_umap.py \ No newline at end of file diff --git a/docker/umap/package.json b/docker/umap/package.json index fc6ab1a0d..71cfaf371 100644 --- a/docker/umap/package.json +++ b/docker/umap/package.json @@ -1,5 +1,5 @@ { "name": "umap", "version": "0.5.7", - "revision": "10" + "revision": "11" } \ No newline at end of file diff --git a/docker/util/Dockerfile b/docker/util/Dockerfile index ef7287a87..ed5fbbe42 100644 --- a/docker/util/Dockerfile +++ b/docker/util/Dockerfile @@ -1,4 +1,4 @@ -FROM ubuntu:noble-20250925 +FROM ubuntu:noble-20260113 RUN apt-get update \ && apt-get upgrade -y \ diff --git a/docker/util/package.json b/docker/util/package.json index 252cb57ef..a30115713 100644 --- a/docker/util/package.json +++ b/docker/util/package.json @@ -1,4 +1,4 @@ { "name": "util", - "version": "3.0.1" + "version": "3.0.2" } \ No newline at end of file diff --git a/tools/htseq.wdl b/tools/htseq.wdl index cb8b16014..18ff0e0da 100755 --- a/tools/htseq.wdl +++ b/tools/htseq.wdl @@ -195,7 +195,7 @@ task calc_tpm { runtime { memory: "4 GB" disks: "10 GB" - container: "ghcr.io/stjudecloud/util:3.0.1" + container: "ghcr.io/stjudecloud/util:3.0.2" maxRetries: 1 } } diff --git a/tools/md5sum.wdl b/tools/md5sum.wdl index 1e79260a0..c450d9282 100755 --- a/tools/md5sum.wdl +++ b/tools/md5sum.wdl @@ -34,7 +34,7 @@ task compute_checksum { runtime { disks: "~{disk_size_gb} GB" - container: "ghcr.io/stjudecloud/util:3.0.1" + container: "ghcr.io/stjudecloud/util:3.0.2" maxRetries: 1 } } diff --git a/tools/util.wdl b/tools/util.wdl index be141df2c..fd0f35263 100644 --- a/tools/util.wdl +++ b/tools/util.wdl @@ -40,7 +40,7 @@ task download { runtime { disks: "~{disk_size_gb} GB" - container: "ghcr.io/stjudecloud/util:3.0.1" + container: "ghcr.io/stjudecloud/util:3.0.2" maxRetries: 1 } } @@ -79,7 +79,7 @@ task split_string { } runtime { - container: "ghcr.io/stjudecloud/util:3.0.1" + container: "ghcr.io/stjudecloud/util:3.0.2" maxRetries: 1 } } @@ -127,7 +127,7 @@ task calc_feature_lengths { runtime { memory: "16 GB" disks: "~{disk_size_gb} GB" - container: "ghcr.io/stjudecloud/util:3.0.1" + container: "ghcr.io/stjudecloud/util:3.0.2" maxRetries: 1 } } @@ -246,7 +246,7 @@ task unpack_tarball { runtime { disks: "~{disk_size_gb} GB" - container: "ghcr.io/stjudecloud/util:3.0.1" + container: "ghcr.io/stjudecloud/util:3.0.2" maxRetries: 1 } } @@ -353,7 +353,7 @@ task global_phred_scores { runtime { memory: "4 GB" disks: "~{disk_size_gb} GB" - container: "ghcr.io/stjudecloud/util:3.0.1" + container: "ghcr.io/stjudecloud/util:3.0.2" maxRetries: 1 } } @@ -402,7 +402,7 @@ task check_fastq_and_rg_concordance { >>> runtime { - container: "ghcr.io/stjudecloud/util:3.0.1" + container: "ghcr.io/stjudecloud/util:3.0.2" maxRetries: 1 } } @@ -462,7 +462,7 @@ task split_fastq { cpu: ncpu memory: "4 GB" disks: "~{disk_size_gb} GB" - container: "ghcr.io/stjudecloud/util:3.0.1" + container: "ghcr.io/stjudecloud/util:3.0.2" maxRetries: 1 } } diff --git a/workflows/dnaseq/dnaseq-standard.wdl b/workflows/dnaseq/dnaseq-standard.wdl index 194c18cea..b4608c891 100644 --- a/workflows/dnaseq/dnaseq-standard.wdl +++ b/workflows/dnaseq/dnaseq-standard.wdl @@ -163,7 +163,7 @@ task parse_input { >>> runtime { - container: "ghcr.io/stjudecloud/util:3.0.1" + container: "ghcr.io/stjudecloud/util:3.0.2" maxRetries: 1 } } diff --git a/workflows/methylation/methylation-cohort.wdl b/workflows/methylation/methylation-cohort.wdl index 03a06cf6b..cb5f39c8a 100644 --- a/workflows/methylation/methylation-cohort.wdl +++ b/workflows/methylation/methylation-cohort.wdl @@ -280,7 +280,7 @@ task generate_umap { } runtime { - container: "ghcr.io/stjudecloud/umap:0.5.7-10" + container: "ghcr.io/stjudecloud/umap:0.5.7-11" memory: "8 GB" cpu: 1 disks: "~{disk_size_gb} GB" @@ -320,7 +320,7 @@ task plot_umap { cpu: 1 memory: "4 GB" disks: "4 GB" - container: "ghcr.io/stjudecloud/python-plotting:2.0.7" + container: "ghcr.io/stjudecloud/python-plotting:2.0.8" maxRetries: 1 } } diff --git a/workflows/qc/quality-check-standard.wdl b/workflows/qc/quality-check-standard.wdl index 3762e2883..062216b2d 100644 --- a/workflows/qc/quality-check-standard.wdl +++ b/workflows/qc/quality-check-standard.wdl @@ -600,7 +600,7 @@ task parse_input { } runtime { - container: "ghcr.io/stjudecloud/util:3.0.1" + container: "ghcr.io/stjudecloud/util:3.0.2" maxRetries: 1 } } diff --git a/workflows/rnaseq/rnaseq-standard.wdl b/workflows/rnaseq/rnaseq-standard.wdl index 48f70ab95..7db5e205e 100755 --- a/workflows/rnaseq/rnaseq-standard.wdl +++ b/workflows/rnaseq/rnaseq-standard.wdl @@ -184,7 +184,7 @@ task parse_input { >>> runtime { - container: "ghcr.io/stjudecloud/util:3.0.1" + container: "ghcr.io/stjudecloud/util:3.0.2" maxRetries: 1 } } From 90573e087c5568d3dd3313f7cf60b52c1df2e4b9 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 9 Feb 2026 15:22:16 -0500 Subject: [PATCH 36/47] chore(deps): bump urllib3 from 2.5.0 to 2.6.3 (#291) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [urllib3](https://github.com/urllib3/urllib3) from 2.5.0 to 2.6.3.
Release notes

Sourced from urllib3's releases.

2.6.3

🚀 urllib3 is fundraising for HTTP/2 support

urllib3 is raising ~$40,000 USD to release HTTP/2 support and ensure long-term sustainable maintenance of the project after a sharp decline in financial support. If your company or organization uses Python and would benefit from HTTP/2 support in Requests, pip, cloud SDKs, and thousands of other projects please consider contributing financially to ensure HTTP/2 support is developed sustainably and maintained for the long-haul.

Thank you for your support.

Changes

  • Fixed a security issue where decompression-bomb safeguards of the streaming API were bypassed when HTTP redirects were followed. (CVE-2026-21441 reported by @​D47A, 8.9 High, GHSA-38jv-5279-wg99)
  • Started treating Retry-After times greater than 6 hours as 6 hours by default. (urllib3/urllib3#3743)
  • Fixed urllib3.connection.VerifiedHTTPSConnection on Emscripten. (urllib3/urllib3#3752)

2.6.2

🚀 urllib3 is fundraising for HTTP/2 support

urllib3 is raising ~$40,000 USD to release HTTP/2 support and ensure long-term sustainable maintenance of the project after a sharp decline in financial support. If your company or organization uses Python and would benefit from HTTP/2 support in Requests, pip, cloud SDKs, and thousands of other projects please consider contributing financially to ensure HTTP/2 support is developed sustainably and maintained for the long-haul.

Thank you for your support.

Changes

  • Fixed HTTPResponse.read_chunked() to properly handle leftover data in the decoder's buffer when reading compressed chunked responses. (urllib3/urllib3#3734)

2.6.1

🚀 urllib3 is fundraising for HTTP/2 support

urllib3 is raising ~$40,000 USD to release HTTP/2 support and ensure long-term sustainable maintenance of the project after a sharp decline in financial support. If your company or organization uses Python and would benefit from HTTP/2 support in Requests, pip, cloud SDKs, and thousands of other projects please consider contributing financially to ensure HTTP/2 support is developed sustainably and maintained for the long-haul.

Thank you for your support.

Changes

  • Restore previously removed HTTPResponse.getheaders() and HTTPResponse.getheader() methods. (#3731)

2.6.0

🚀 urllib3 is fundraising for HTTP/2 support

urllib3 is raising ~$40,000 USD to release HTTP/2 support and ensure long-term sustainable maintenance of the project after a sharp decline in financial support. If your company or organization uses Python and would benefit from HTTP/2 support in Requests, pip, cloud SDKs, and thousands of other projects please consider contributing financially to ensure HTTP/2 support is developed sustainably and maintained for the long-haul.

Thank you for your support.

Security

  • Fixed a security issue where streaming API could improperly handle highly compressed HTTP content ("decompression bombs") leading to excessive resource consumption even when a small amount of data was requested. Reading small chunks of compressed data is safer and much more efficient now. (CVE-2025-66471 reported by @​Cycloctane, 8.9 High, GHSA-2xpw-w6gg-jr37)
  • Fixed a security issue where an attacker could compose an HTTP response with virtually unlimited links in the Content-Encoding header, potentially leading to a denial of service (DoS) attack by exhausting system resources during decoding. The number of allowed chained encodings is now limited to 5. (CVE-2025-66418 reported by @​illia-v, 8.9 High, GHSA-gm62-xv2j-4w53)

[!IMPORTANT]

  • If urllib3 is not installed with the optional urllib3[brotli] extra, but your environment contains a Brotli/brotlicffi/brotlipy package anyway, make sure to upgrade it to at least Brotli 1.2.0 or brotlicffi 1.2.0.0 to benefit from the security fixes and avoid warnings. Prefer using urllib3[brotli] to install a compatible Brotli package automatically.

... (truncated)

Changelog

Sourced from urllib3's changelog.

2.6.3 (2026-01-07)

  • Fixed a high-severity security issue where decompression-bomb safeguards of the streaming API were bypassed when HTTP redirects were followed. (GHSA-38jv-5279-wg99 <https://github.com/urllib3/urllib3/security/advisories/GHSA-38jv-5279-wg99>__)
  • Started treating Retry-After times greater than 6 hours as 6 hours by default. ([#3743](https://github.com/urllib3/urllib3/issues/3743) <https://github.com/urllib3/urllib3/issues/3743>__)
  • Fixed urllib3.connection.VerifiedHTTPSConnection on Emscripten. ([#3752](https://github.com/urllib3/urllib3/issues/3752) <https://github.com/urllib3/urllib3/issues/3752>__)

2.6.2 (2025-12-11)

  • Fixed HTTPResponse.read_chunked() to properly handle leftover data in the decoder's buffer when reading compressed chunked responses. ([#3734](https://github.com/urllib3/urllib3/issues/3734) <https://github.com/urllib3/urllib3/issues/3734>__)

2.6.1 (2025-12-08)

  • Restore previously removed HTTPResponse.getheaders() and HTTPResponse.getheader() methods. ([#3731](https://github.com/urllib3/urllib3/issues/3731) <https://github.com/urllib3/urllib3/issues/3731>__)

2.6.0 (2025-12-05)

Security

  • Fixed a security issue where streaming API could improperly handle highly compressed HTTP content ("decompression bombs") leading to excessive resource consumption even when a small amount of data was requested. Reading small chunks of compressed data is safer and much more efficient now. (GHSA-2xpw-w6gg-jr37 <https://github.com/urllib3/urllib3/security/advisories/GHSA-2xpw-w6gg-jr37>__)
  • Fixed a security issue where an attacker could compose an HTTP response with virtually unlimited links in the Content-Encoding header, potentially leading to a denial of service (DoS) attack by exhausting system resources during decoding. The number of allowed chained encodings is now limited to 5. (GHSA-gm62-xv2j-4w53 <https://github.com/urllib3/urllib3/security/advisories/GHSA-gm62-xv2j-4w53>__)

.. caution::

  • If urllib3 is not installed with the optional urllib3[brotli] extra, but your environment contains a Brotli/brotlicffi/brotlipy package anyway, make sure to upgrade it to at least Brotli 1.2.0 or brotlicffi 1.2.0.0 to benefit from the security fixes and avoid warnings. Prefer using

... (truncated)

Commits
  • 0248277 Release 2.6.3
  • 8864ac4 Merge commit from fork
  • 70cecb2 Fix Scorecard issues related to vulnerable dev dependencies (#3755)
  • 41f249a Move "v2.0 Migration Guide" to the end of the table of contents (#3747)
  • fd4dffd Patch VerifiedHTTPSConnection for Emscripten (#3752)
  • 13f0bfd Handle massive values in Retry-After when calculating time to sleep for (#3743)
  • 8c480bf Bump actions/upload-artifact from 5.0.0 to 6.0.0 (#3748)
  • 4b40616 Bump actions/cache from 4.3.0 to 5.0.1 (#3750)
  • 82b8479 Bump actions/download-artifact from 6.0.0 to 7.0.0 (#3749)
  • 34284cb Mention experimental features in the security policy (#3746)
  • Additional commits viewable in compare view

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=urllib3&package-manager=uv&previous-version=2.5.0&new-version=2.6.3)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself) You can disable automated security fix PRs for this repo from the [Security Alerts page](https://github.com/stjudecloud/workflows/network/alerts).
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- uv.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/uv.lock b/uv.lock index 5d61e8384..ba7164f2e 100644 --- a/uv.lock +++ b/uv.lock @@ -543,11 +543,11 @@ wheels = [ [[package]] name = "urllib3" -version = "2.5.0" +version = "2.6.3" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/15/22/9ee70a2574a4f4599c47dd506532914ce044817c7752a79b6a51286319bc/urllib3-2.5.0.tar.gz", hash = "sha256:3fc47733c7e419d4bc3f6b3dc2b4f890bb743906a30d56ba4a5bfa4bbff92760", size = 393185, upload-time = "2025-06-18T14:07:41.644Z" } +sdist = { url = "https://files.pythonhosted.org/packages/c7/24/5f1b3bdffd70275f6661c76461e25f024d5a38a46f04aaca912426a2b1d3/urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed", size = 435556, upload-time = "2026-01-07T16:24:43.925Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/a7/c2/fe1e52489ae3122415c51f387e221dd0773709bad6c6cdaa599e8a2c5185/urllib3-2.5.0-py3-none-any.whl", hash = "sha256:e6b01673c0fa6a13e374b50871808eb3bf7046c4b125b216f6bf1cc604cff0dc", size = 129795, upload-time = "2025-06-18T14:07:40.39Z" }, + { url = "https://files.pythonhosted.org/packages/39/08/aaaad47bc4e9dc8c725e68f9d04865dbcb2052843ff09c97b08904852d84/urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4", size = 131584, upload-time = "2026-01-07T16:24:42.685Z" }, ] [[package]] From 79aa181a13360999aead4a82529ff709feac512e Mon Sep 17 00:00:00 2001 From: Ari Frantz Date: Mon, 16 Feb 2026 09:27:04 -0500 Subject: [PATCH 37/47] feat: gzip picard validate STDOUT (#298) _Describe the problem or feature in addition to a link to the issues._ the non summary mode of validate_bam can output multiple lines _per read_ in the input BAM, using more disk than the input file. This passing that output through gzip. This also removes the x2 factor on disk size allocation for this task. We may instead want to keep that x2 factor but only if we're in full report mode? Before submitting this PR, please make sure: - [ ] You have added a few sentences describing the PR here. - [ ] The code passes all CI tests without any errors or warnings. - [ ] You have added tests (when appropriate). - [ ] You have added an entry in any relevant CHANGELOGs (when appropriate). - [ ] If you have made any changes to the `scripts/` or `docker/` directories, please ensure any image versions have been incremented accordingly! - [ ] You have updated the README or other documentation to account for these changes (when appropriate). --------- Co-authored-by: Andrew Thrasher --- .gitignore | 3 ++- tools/CHANGELOG.md | 7 ++++++ tools/picard.wdl | 63 ++++++++++++++++++++++------------------------ 3 files changed, 39 insertions(+), 34 deletions(-) diff --git a/.gitignore b/.gitignore index 2bab65b34..ac07b2a2c 100644 --- a/.gitignore +++ b/.gitignore @@ -15,6 +15,7 @@ pytest/ **/womtool* *.jar runs/ +out/ miniwdl_call_cache/ miniwdl_singularity_cache/ _LAST @@ -30,7 +31,7 @@ _LAST results*/ output*/ -# Ignore JSONs in the root (probably being used for input to sprocket run) +# Ignore JSONs in the root (probably being used for input) /*.json # Ignore `sprocket doc` dir diff --git a/tools/CHANGELOG.md b/tools/CHANGELOG.md index c57ce6972..12563cd4a 100644 --- a/tools/CHANGELOG.md +++ b/tools/CHANGELOG.md @@ -4,6 +4,13 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/). +## 2026 February + +### Changed + +- gzip `picard.validate_bam` output when not in `summary_mode` [#298](https://github.com/stjudecloud/workflows/pull/298) +- Lowered default disk allocation for `picard.validate_bam` [#298](https://github.com/stjudecloud/workflows/pull/298) + ## 2026 January ### Changed diff --git a/tools/picard.wdl b/tools/picard.wdl index 3c8f7d4ce..22570d083 100755 --- a/tools/picard.wdl +++ b/tools/picard.wdl @@ -1,4 +1,5 @@ ## [Homepage](https://broadinstitute.github.io/picard/) + version 1.1 task mark_duplicates { @@ -83,10 +84,13 @@ task mark_duplicates { Float bam_size = size(bam, "GiB") Int memory_gb = min(ceil(bam_size + 12), 50) + modify_memory_gb - Int disk_size_gb = ((if create_bam - then ceil((bam_size * 2) + 10) - else ceil(bam_size + 10) - ) + modify_disk_size_gb) + Int disk_size_gb = ( + ( + if create_bam + then ceil((bam_size * 2) + 10) + else ceil(bam_size + 10) + ) + modify_disk_size_gb + ) Int java_heap_size = ceil(memory_gb * 0.9) @@ -96,17 +100,13 @@ task mark_duplicates { picard -Xmx~{java_heap_size}g MarkDuplicates \ -I "~{bam}" \ --METRICS_FILE "~{prefix}.metrics.txt" \ - -O "~{if create_bam - then prefix + ".bam" - else "/dev/null" - }" \ + -O "~{if create_bam then prefix + ".bam" else "/dev/null"}" \ --CREATE_INDEX ~{create_bam} \ --CREATE_MD5_FILE ~{create_bam} \ --VALIDATION_STRINGENCY "~{validation_stringency}" \ --DUPLICATE_SCORING_STRATEGY "~{duplicate_scoring_strategy}" \ - --READ_NAME_REGEX '~{if (optical_distance > 0) - then read_name_regex - else "null" + --READ_NAME_REGEX '~{ + if (optical_distance > 0) then read_name_regex else "null" }' \ --TAGGING_POLICY "~{tagging_policy}" \ --CLEAR_DT ~{clear_dt} \ @@ -194,15 +194,15 @@ task validate_bam { Int modify_disk_size_gb = 0 } - String mode_arg = if (summary_mode) - then "--MODE SUMMARY" - else "" - String stringency_arg = (if (index_validation_stringency_less_exhaustive) + String outfile = if summary_mode then outfile_name else outfile_name + ".gz" + String mode_arg = if (summary_mode) then "--MODE SUMMARY" else "" + String stringency_arg = ( + if (index_validation_stringency_less_exhaustive) then "--INDEX_VALIDATION_STRINGENCY LESS_EXHAUSTIVE" else "" ) Float bam_size = size(bam, "GiB") - Int disk_size_gb = ceil(bam_size * 2) + 10 + modify_disk_size_gb + Int disk_size_gb = ceil(bam_size) + 10 + modify_disk_size_gb Int java_heap_size = ceil(memory_gb * 0.9) command <<< @@ -218,7 +218,8 @@ task validate_bam { --VALIDATION_STRINGENCY "~{validation_stringency}" \ ~{sep(" ", prefix("--IGNORE ", squote(ignore_list)))} \ --MAX_OUTPUT ~{max_errors} \ - > "~{outfile_name}" \ + ~{if !summary_mode then "| gzip" else ""} \ + > "~{outfile}" \ || rc=$? # rc = 0 = success @@ -236,16 +237,16 @@ task validate_bam { fi if ! ~{succeed_on_errors} \ - && [ "$(grep -Ec "$GREP_PATTERN" "~{outfile_name}")" -gt 0 ] + && [ "$(grep -Ec "$GREP_PATTERN" "~{outfile}")" -gt 0 ] then >&2 echo "Problems detected by Picard ValidateSamFile" - >&2 grep -E "$GREP_PATTERN" "~{outfile_name}" + >&2 grep -E "$GREP_PATTERN" "~{outfile}" exit $rc fi >>> output { - File validate_report = outfile_name + File validate_report = outfile } runtime { @@ -419,10 +420,8 @@ task merge_sam_files { File merged_bam_md5 = outfile_name + ".md5" } - runtime { - cpu: if threading - then 2 - else 1 + runtime{ + cpu: if threading then 2 else 1 memory: "~{memory_gb} GB" disks: "~{disk_size_gb} GB" container: "quay.io/biocontainers/picard:3.1.1--hdfd78af_0" @@ -507,7 +506,7 @@ task collect_wgs_metrics { wgs_metrics: { description: "Output report of `picard CollectWgsMetrics`", external_help: "https://broadinstitute.github.io/picard/picard-metric-definitions.html#CollectWgsMetrics.WgsMetrics", - }, + } } } @@ -854,7 +853,8 @@ task bam_to_fastq { picard -Xmx~{java_heap_size}g SamToFastq INPUT="~{bam}" \ FASTQ="~{prefix}.R1.fastq" \ - ~{(if paired + ~{( + if paired then "SECOND_END_FASTQ='" + prefix + ".R2.fastq'" else "" )} \ @@ -862,10 +862,7 @@ task bam_to_fastq { VALIDATION_STRINGENCY=SILENT gzip "~{prefix}.R1.fastq" \ - ~{if paired - then "'" + prefix + ".R2.fastq'" - else "" - } + ~{if paired then "'" + prefix + ".R2.fastq'" else ""} >>> output { @@ -873,7 +870,7 @@ task bam_to_fastq { File? read_two_fastq_gz = "~{prefix}.R2.fastq.gz" } - runtime { + runtime{ memory: "~{memory_gb} GB" disks: "~{disk_size_gb} GB" container: "quay.io/biocontainers/picard:3.1.1--hdfd78af_0" @@ -937,7 +934,7 @@ task scatter_interval_list { } } - parameter_meta { + parameter_meta { interval_list: "Input interval list to split" scatter_count: "Number of interval lists to create" subdivision_mode: { @@ -1004,7 +1001,7 @@ task create_sequence_dictionary { description: "Creates a sequence dictionary for the input FASTA file using Picard" external_help: "https://gatk.broadinstitute.org/hc/en-us/articles/13832748622491-CreateSequenceDictionary-Picard-" outputs: { - dictionary: "Sequence dictionary produced by `picard CreateSequenceDictionary`.", + dictionary: "Sequence dictionary produced by `picard CreateSequenceDictionary`." } } From 74e0dbf20f7ff4c80b89f3cd9ac9d16dcfa877d7 Mon Sep 17 00:00:00 2001 From: Andrew Thrasher Date: Mon, 16 Feb 2026 16:39:38 -0500 Subject: [PATCH 38/47] feat: methylation filtering (#283) Add filtering of sex chromosomes to the UMAP generation. Also generate a list of probes that have SNPs. Before submitting this PR, please make sure: - [x] You have added a few sentences describing the PR here. - [x] The code passes all CI tests without any errors or warnings. - [ ] You have added tests (when appropriate). - [x] You have added an entry in any relevant CHANGELOGs (when appropriate). - [x] If you have made any changes to the `scripts/` or `docker/` directories, please ensure any image versions have been incremented accordingly! - [ ] You have updated the README or other documentation to account for these changes (when appropriate). --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: Ari Frantz Co-authored-by: Jobin Sunny <38107318+jsunny23@users.noreply.github.com> --- .github/workflows/docker-build.yaml | 41 ------ data_structures/flag_filter.wdl | 2 +- data_structures/read_group.wdl | 6 +- developer_scripts/run_sprocket_or_miniwdl.sh | 3 +- docker/minfi/Dockerfile | 3 +- docker/minfi/package.json | 2 +- docker/pandas/package.json | 2 +- docker/python-plotting/package.json | 2 +- docker/util/package.json | 2 +- scripts/CHANGELOG.md | 11 ++ scripts/methylation/filter.py | 18 ++- scripts/methylation/list-sex-probes.R | 15 ++ scripts/methylation/methylation-preprocess.R | 82 ++++++++--- tools/htseq.wdl | 2 +- tools/md5sum.wdl | 2 +- tools/util.wdl | 14 +- workflows/dnaseq/dnaseq-standard.wdl | 2 +- workflows/methylation/CHANGELOG.md | 11 +- workflows/methylation/methylation-cohort.wdl | 21 ++- .../methylation/methylation-preprocess.wdl | 41 +++++- .../methylation/methylation-standard.wdl | 136 ++++++++++++++++++ workflows/qc/quality-check-standard.wdl | 2 +- workflows/rnaseq/rnaseq-standard.wdl | 2 +- 23 files changed, 331 insertions(+), 91 deletions(-) create mode 100644 scripts/methylation/list-sex-probes.R diff --git a/.github/workflows/docker-build.yaml b/.github/workflows/docker-build.yaml index 641134af4..d88d9170b 100644 --- a/.github/workflows/docker-build.yaml +++ b/.github/workflows/docker-build.yaml @@ -70,47 +70,6 @@ jobs: tags: ${{ env.REGISTRY }}/${{ github.repository_owner }}/${{ env.TOOL }}:local cache-from: type=registry,ref=${{ env.REGISTRY }}/${{ github.repository_owner }}/${{ env.TOOL }}:buildcache cache-to: type=registry,ref=${{ env.REGISTRY }}/${{ github.repository_owner }}/${{ env.TOOL }}:buildcache,mode=max - - name: container diff - run: | - docker pull ${{ env.REGISTRY }}/${{ github.repository_owner }}/${{ env.TOOL }}:${{ env.TAG_TO_CHECK }} || true - if [ -z "$(docker images -q ${{ env.REGISTRY }}/${{ github.repository_owner }}/${{ env.TOOL }}:${{ env.TAG_TO_CHECK }})" ]; then - echo "Prior image not found in registry, skipping container-diff" - exit 0 - fi - curl -LO https://storage.googleapis.com/container-diff/latest/container-diff-linux-amd64 && \ - chmod +x container-diff-linux-amd64 && \ - ./container-diff-linux-amd64 diff --json daemon://${{ env.REGISTRY }}/${{ github.repository_owner }}/${{ env.TOOL }}:${{ env.TAG_TO_CHECK }} daemon://${{ env.REGISTRY }}/${{ github.repository_owner }}/${{ env.TOOL }}:local --type=history --type=file --type=size | tee comparison.json - # check overall size result - changed=0 - size_diff=$(jq '.[] | select(.DiffType == "Size") | if .Diff[0].Size1 == .Diff[0].Size2 then 1 else 0 end' comparison.json) - if [ $size_diff -eq 0 ] - then - echo "Size of the image has changed" - changed=1 - fi - # check file diff result - additions=$(jq '.[] | select(.DiffType == "File") | if .Diff.Adds != null then .Diff.Adds[].Name else 0 end' comparison.json) - deletions=$(jq '.[] | select(.DiffType == "File") | if .Diff.Dels != null then .Diff.Dels[].Name else 0 end' comparison.json) - modifications=$(jq '.[] | select(.DiffType == "File") | if .Diff.Mods != null then .Diff.Mods[].Name else 0 end' comparison.json) - if [ "$additions" != "0" ] - then - echo "Files have been added" - echo $additions - changed=1 - fi - if [ "$deletions" != "0" ] - then - echo "Files have been deleted" - echo $deletions - changed=1 - fi - if [ "$modifications" != "0" ] - then - echo "Files have been modified" - echo $modifications - changed=1 - fi - exit $changed - name: Run Snyk to check Docker image for vulnerabilities # Snyk can be used to break the build when it detects vulnerabilities. # In this case we want to upload the issues to GitHub Code Scanning diff --git a/data_structures/flag_filter.wdl b/data_structures/flag_filter.wdl index e57326359..7a27ea213 100644 --- a/data_structures/flag_filter.wdl +++ b/data_structures/flag_filter.wdl @@ -106,7 +106,7 @@ task validate_string_is_12bit_int { >>> runtime { - container: "ghcr.io/stjudecloud/util:3.0.2" + container: "ghcr.io/stjudecloud/util:3.0.3" maxRetries: 1 } } diff --git a/data_structures/read_group.wdl b/data_structures/read_group.wdl index b06ba639b..460ddea33 100644 --- a/data_structures/read_group.wdl +++ b/data_structures/read_group.wdl @@ -138,7 +138,7 @@ task get_read_groups { runtime { disks: "~{disk_size_gb} GB" - container: "ghcr.io/stjudecloud/util:3.0.2" + container: "ghcr.io/stjudecloud/util:3.0.3" maxRetries: 1 } } @@ -363,7 +363,7 @@ task validate_read_group { >>> runtime { - container: "ghcr.io/stjudecloud/util:3.0.2" + container: "ghcr.io/stjudecloud/util:3.0.3" maxRetries: 1 } } @@ -421,7 +421,7 @@ task inner_read_group_to_string { } runtime { - container: "ghcr.io/stjudecloud/util:3.0.2" + container: "ghcr.io/stjudecloud/util:3.0.3" maxRetries: 1 } } diff --git a/developer_scripts/run_sprocket_or_miniwdl.sh b/developer_scripts/run_sprocket_or_miniwdl.sh index 57869e6fa..4bccf4104 100755 --- a/developer_scripts/run_sprocket_or_miniwdl.sh +++ b/developer_scripts/run_sprocket_or_miniwdl.sh @@ -66,7 +66,8 @@ if [ "$runner" != "miniwdl" ]; then else entrypoint=$wf fi - sprocket run --output output --overwrite -e "$entrypoint" "$wdl" ${input_file:+"$input_file"} "$@" + sprocket run --output-dir output --target "$entrypoint" "$wdl" ${input_file:+"$input_file"} "$@" + cp output/runs/*/_latest/outputs.json output/outputs.json else if [[ $input_file ]]; then input_dir=$(dirname "$input_file") diff --git a/docker/minfi/Dockerfile b/docker/minfi/Dockerfile index 8c979be56..a338a8859 100644 --- a/docker/minfi/Dockerfile +++ b/docker/minfi/Dockerfile @@ -8,4 +8,5 @@ RUN R --no-save <