From 328566412a2d24983e57b2760270a5282684d687 Mon Sep 17 00:00:00 2001
From: Ari Frantz <ari.frantz@stjude.org>
Date: Wed, 3 Sep 2025 17:01:40 -0400
Subject: [PATCH 01/47] init demo

---
 sprocket-tests/custom/quickcheck.sh           | 10 ++++
 .../data_structures/flag_filter.toml          | 49 +++++++++++++++++++
 sprocket-tests/tools/picard.toml              | 11 +++++
 sprocket-tests/tools/samtools.toml            | 31 ++++++++++++
 4 files changed, 101 insertions(+)
 create mode 100644 sprocket-tests/custom/quickcheck.sh
 create mode 100644 sprocket-tests/data_structures/flag_filter.toml
 create mode 100644 sprocket-tests/tools/picard.toml
 create mode 100644 sprocket-tests/tools/samtools.toml

diff --git a/sprocket-tests/custom/quickcheck.sh b/sprocket-tests/custom/quickcheck.sh
new file mode 100644
index 000000000..8c270cc8e
--- /dev/null
+++ b/sprocket-tests/custom/quickcheck.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+
+set -euo pipefail
+
+out_json=$1
+
+out_bam=$(jq -r .bam "$out_json")
+
+samtools quickcheck "$out_bam"
+
diff --git a/sprocket-tests/data_structures/flag_filter.toml b/sprocket-tests/data_structures/flag_filter.toml
new file mode 100644
index 000000000..549004e1f
--- /dev/null
+++ b/sprocket-tests/data_structures/flag_filter.toml
@@ -0,0 +1,49 @@
+[[validate_string_is_12bit_int]]
+name = "decimal_passes" # each test must have a unique identifier
+[validate_string_is_12bit_int.inputs]
+number = "5"
+# without any tests explicitly configured, Sprocket will consider the task executing with a 0 exit code to be a "pass" and any non-zero exit code as a "fail"
+
+[[validate_string_is_12bit_int]]
+name = "hexadecimal_passes"
+[validate_string_is_12bit_int.inputs]
+number = "0x900"
+[validate_string_is_12bit_int.tests]
+stdout.contains = "Input number (0x900) is valid" # builtin test for checking STDOUT logs
+
+[[validate_string_is_12bit_int]]
+name = "too_big_hexadecimal_fails"
+[validate_string_is_12bit_int.inputs]
+number = "0x1000"
+[validate_string_is_12bit_int.tests]
+exit_code = 42 # the task should fail for this test
+stderr.contains = "Input number (0x1000) is invalid" # similar to the stdout test
+
+[[validate_string_is_12bit_int]]
+name = "too_big_decimal_fails"
+[validate_string_is_12bit_int.inputs]
+number = "4096"
+[validate_string_is_12bit_int.tests]
+exit_code = 42
+stderr.contains = [
+    "Input number (4096) interpreted as decimal",
+    "But number must be less than 4096!",
+] # `contains` test can also be an array of strings
+
+[[validate_flag_filter]] # a workflow test
+name = "valid_FlagFilter_passes"
+[validate_flag_filter.inputs.flags]
+include_if_all = "3" # decimal
+exclude_if_any = "0xF04" # hexadecimal
+include_if_any = "03" # octal
+exclude_if_all = "4095" # decimal
+
+[[validate_flag_filter]]
+name = "invalid_FlagFilter_fails"
+[validate_flag_filter.inputs.flags]
+include_if_all = "" # empty string
+exclude_if_any = "this is not a number"
+include_if_any = "000000000011" # binary interpreted as octal. Too many digits for octal
+exclude_if_all = "4095" # this is fine
+[validate_flag_filter.tests]
+should_fail = true
diff --git a/sprocket-tests/tools/picard.toml b/sprocket-tests/tools/picard.toml
new file mode 100644
index 000000000..b1723646b
--- /dev/null
+++ b/sprocket-tests/tools/picard.toml
@@ -0,0 +1,11 @@
+[[merge_sam_files]]
+name = "Merge works"
+[merge_sam_files.inputs]
+bams = [
+    "$FIXTURES/test1.bam",
+    "$FIXTURES/test2.bam",
+]
+prefix = "test.merged"
+[merge_sam_files.tests]
+custom = "quickcheck.sh"
+
diff --git a/sprocket-tests/tools/samtools.toml b/sprocket-tests/tools/samtools.toml
new file mode 100644
index 000000000..070d9c0bc
--- /dev/null
+++ b/sprocket-tests/tools/samtools.toml
@@ -0,0 +1,31 @@
+[[bam_to_fastq]]
+name = "kitchen_sink"
+[[bam_to_fastq.matrix]]
+bam = [
+    "$FIXTURES/test1.bam",
+    "$FIXTURES/test2.bam",
+    "$FIXTURES/test3.bam",
+]
+bam_index = [
+    "$FIXTURES/test1.bam.bai",
+    "$FIXTURES/test2.bam.bai",
+    "$FIXTURES/test3.bam.bai",
+]
+[[bam_to_fastq.matrix]]
+bitwise_filter = [
+    { include_if_all = "0x0", exclude_if_any = "0x900", include_if_any = "0x0", exclude_if_all = "0x0" },
+    { include_if_all = "00", exclude_if_any = "0x904", include_if_any = "3", exclude_if_all = "0" },
+]
+[[bam_to_fastq.matrix]]
+paired_end = [true, false]
+[[bam_to_fastq.matrix]]
+retain_collated_bam = [true, false]
+[[bam_to_fastq.matrix]]
+append_read_number = [true, false]
+[[bam_to_fastq.matrix]]
+output_singletons = [true, false]
+[bam_to_fastq.inputs]
+prefix = "kitchen_sink_test" # the `prefix` input will be shared by _all_ permutations of the test matrix
+# this test is to ensure all the options (and combinations thereof) are valid
+# so no tests beyond a `0` exit code are needed here
+

From 7bc1e3a9c85e19c47dfb1f87c78930fb9b8b367e Mon Sep 17 00:00:00 2001
From: Ari Frantz <ari.frantz@stjude.org>
Date: Tue, 9 Sep 2025 08:16:55 -0400
Subject: [PATCH 02/47] revise: mv TOML out of tests dir and into main
 workspace

---
 .../data_structures => data_structures}/flag_filter.toml          | 0
 {sprocket-tests/tools => tools}/picard.toml                       | 0
 {sprocket-tests/tools => tools}/samtools.toml                     | 0
 3 files changed, 0 insertions(+), 0 deletions(-)
 rename {sprocket-tests/data_structures => data_structures}/flag_filter.toml (100%)
 rename {sprocket-tests/tools => tools}/picard.toml (100%)
 rename {sprocket-tests/tools => tools}/samtools.toml (100%)

diff --git a/sprocket-tests/data_structures/flag_filter.toml b/data_structures/flag_filter.toml
similarity index 100%
rename from sprocket-tests/data_structures/flag_filter.toml
rename to data_structures/flag_filter.toml
diff --git a/sprocket-tests/tools/picard.toml b/tools/picard.toml
similarity index 100%
rename from sprocket-tests/tools/picard.toml
rename to tools/picard.toml
diff --git a/sprocket-tests/tools/samtools.toml b/tools/samtools.toml
similarity index 100%
rename from sprocket-tests/tools/samtools.toml
rename to tools/samtools.toml

From 6438780b5ab75c551365755080ea1567cdd4ef86 Mon Sep 17 00:00:00 2001
From: Ari Frantz <ari.frantz@stjude.org>
Date: Tue, 18 Nov 2025 09:18:08 -0500
Subject: [PATCH 03/47] switch from TOML to YAML

---
 data_structures/flag_filter.toml | 49 --------------------------------
 data_structures/flag_filter.yaml | 43 ++++++++++++++++++++++++++++
 tools/picard.toml                | 11 -------
 tools/picard.yaml                |  9 ++++++
 tools/samtools.toml              | 31 --------------------
 tools/samtools.yaml              | 34 ++++++++++++++++++++++
 6 files changed, 86 insertions(+), 91 deletions(-)
 delete mode 100644 data_structures/flag_filter.toml
 create mode 100644 data_structures/flag_filter.yaml
 delete mode 100644 tools/picard.toml
 create mode 100644 tools/picard.yaml
 delete mode 100644 tools/samtools.toml
 create mode 100644 tools/samtools.yaml

diff --git a/data_structures/flag_filter.toml b/data_structures/flag_filter.toml
deleted file mode 100644
index 549004e1f..000000000
--- a/data_structures/flag_filter.toml
+++ /dev/null
@@ -1,49 +0,0 @@
-[[validate_string_is_12bit_int]]
-name = "decimal_passes" # each test must have a unique identifier
-[validate_string_is_12bit_int.inputs]
-number = "5"
-# without any tests explicitly configured, Sprocket will consider the task executing with a 0 exit code to be a "pass" and any non-zero exit code as a "fail"
-
-[[validate_string_is_12bit_int]]
-name = "hexadecimal_passes"
-[validate_string_is_12bit_int.inputs]
-number = "0x900"
-[validate_string_is_12bit_int.tests]
-stdout.contains = "Input number (0x900) is valid" # builtin test for checking STDOUT logs
-
-[[validate_string_is_12bit_int]]
-name = "too_big_hexadecimal_fails"
-[validate_string_is_12bit_int.inputs]
-number = "0x1000"
-[validate_string_is_12bit_int.tests]
-exit_code = 42 # the task should fail for this test
-stderr.contains = "Input number (0x1000) is invalid" # similar to the stdout test
-
-[[validate_string_is_12bit_int]]
-name = "too_big_decimal_fails"
-[validate_string_is_12bit_int.inputs]
-number = "4096"
-[validate_string_is_12bit_int.tests]
-exit_code = 42
-stderr.contains = [
-    "Input number (4096) interpreted as decimal",
-    "But number must be less than 4096!",
-] # `contains` test can also be an array of strings
-
-[[validate_flag_filter]] # a workflow test
-name = "valid_FlagFilter_passes"
-[validate_flag_filter.inputs.flags]
-include_if_all = "3" # decimal
-exclude_if_any = "0xF04" # hexadecimal
-include_if_any = "03" # octal
-exclude_if_all = "4095" # decimal
-
-[[validate_flag_filter]]
-name = "invalid_FlagFilter_fails"
-[validate_flag_filter.inputs.flags]
-include_if_all = "" # empty string
-exclude_if_any = "this is not a number"
-include_if_any = "000000000011" # binary interpreted as octal. Too many digits for octal
-exclude_if_all = "4095" # this is fine
-[validate_flag_filter.tests]
-should_fail = true
diff --git a/data_structures/flag_filter.yaml b/data_structures/flag_filter.yaml
new file mode 100644
index 000000000..9888ec27b
--- /dev/null
+++ b/data_structures/flag_filter.yaml
@@ -0,0 +1,43 @@
+validate_string_is_12bit_int:
+  - name: decimal_passes
+    inputs:
+      - number: "5"
+  - name: hexadecimal_passes
+    inputs:
+      - number: "0x900"
+    tests:
+      stdout:
+        contains: Input number (0x900) is valid
+  - name: too_big_hexadecimal_fails
+    inputs:
+      - number: "0x1000"
+    tests:
+      exit_code: 42
+      stderr:
+        contains: Input number (0x1000) is invalid
+  - name: too_big_decimal_fails
+    inputs:
+      - number: "4096"
+    tests:
+      exit_code: 42
+      stderr:
+        contains:
+          - Input number (4096) interpreted as decimal
+          - But number must be less than 4096!
+validate_flag_filter:
+  - name: valid_FlagFilter_passes
+    inputs:
+      - flags:
+          include_if_all: "3"
+          exclude_if_any: "0xF04"
+          include_if_any: "03"
+          exclude_if_all: "4095"
+  - name: invalid_FlagFilter_fails
+    inputs:
+      - flags:
+          include_if_all: ""
+          exclude_if_any: this is not a number
+          include_if_any: "000000000011"
+          exclude_if_all: "4095"
+    tests:
+      should_fail: true
diff --git a/tools/picard.toml b/tools/picard.toml
deleted file mode 100644
index b1723646b..000000000
--- a/tools/picard.toml
+++ /dev/null
@@ -1,11 +0,0 @@
-[[merge_sam_files]]
-name = "Merge works"
-[merge_sam_files.inputs]
-bams = [
-    "$FIXTURES/test1.bam",
-    "$FIXTURES/test2.bam",
-]
-prefix = "test.merged"
-[merge_sam_files.tests]
-custom = "quickcheck.sh"
-
diff --git a/tools/picard.yaml b/tools/picard.yaml
new file mode 100644
index 000000000..075d6c794
--- /dev/null
+++ b/tools/picard.yaml
@@ -0,0 +1,9 @@
+merge_sam_files:
+  - name: Merge works
+    inputs:
+      - bams:
+          - $FIXTURES/test1.bam
+          - $FIXTURES/test2.bam
+      - prefix: test.merged
+    tests:
+      custom: quickcheck.sh
diff --git a/tools/samtools.toml b/tools/samtools.toml
deleted file mode 100644
index 070d9c0bc..000000000
--- a/tools/samtools.toml
+++ /dev/null
@@ -1,31 +0,0 @@
-[[bam_to_fastq]]
-name = "kitchen_sink"
-[[bam_to_fastq.matrix]]
-bam = [
-    "$FIXTURES/test1.bam",
-    "$FIXTURES/test2.bam",
-    "$FIXTURES/test3.bam",
-]
-bam_index = [
-    "$FIXTURES/test1.bam.bai",
-    "$FIXTURES/test2.bam.bai",
-    "$FIXTURES/test3.bam.bai",
-]
-[[bam_to_fastq.matrix]]
-bitwise_filter = [
-    { include_if_all = "0x0", exclude_if_any = "0x900", include_if_any = "0x0", exclude_if_all = "0x0" },
-    { include_if_all = "00", exclude_if_any = "0x904", include_if_any = "3", exclude_if_all = "0" },
-]
-[[bam_to_fastq.matrix]]
-paired_end = [true, false]
-[[bam_to_fastq.matrix]]
-retain_collated_bam = [true, false]
-[[bam_to_fastq.matrix]]
-append_read_number = [true, false]
-[[bam_to_fastq.matrix]]
-output_singletons = [true, false]
-[bam_to_fastq.inputs]
-prefix = "kitchen_sink_test" # the `prefix` input will be shared by _all_ permutations of the test matrix
-# this test is to ensure all the options (and combinations thereof) are valid
-# so no tests beyond a `0` exit code are needed here
-
diff --git a/tools/samtools.yaml b/tools/samtools.yaml
new file mode 100644
index 000000000..0e09fe9f4
--- /dev/null
+++ b/tools/samtools.yaml
@@ -0,0 +1,34 @@
+bam_to_fastq:
+  - name: kitchen_sink
+    inputs:
+      - bam:
+          - $FIXTURES/test1.bam
+          - $FIXTURES/test2.bam
+          - $FIXTURES/test3.bam
+        bam_index:
+          - $FIXTURES/test1.bam.bai
+          - $FIXTURES/test2.bam.bai
+          - $FIXTURES/test3.bam.bai
+      - bitwise_filter:
+          - include_if_all: "0x0"
+            exclude_if_any: "0x900"
+            include_if_any: "0x0"
+            exclude_if_all: "0x0"
+          - include_if_all: "00"
+            exclude_if_any: "0x904"
+            include_if_any: "3"
+            exclude_if_all: "0"
+      - paired_end:
+          - true
+          - false
+      - retain_collated_bam:
+          - true
+          - false
+      - append_read_number:
+          - true
+          - false
+      - output_singletons:
+          - true
+          - false
+      - prefix:
+          - kitchen_sink_test

From 62744f295be4186311f62b44606ec2ab2adc34fc Mon Sep 17 00:00:00 2001
From: Ari Frantz <ari.frantz@stjude.org>
Date: Wed, 19 Nov 2025 14:54:00 -0500
Subject: [PATCH 04/47] WIP

---
 data_structures/flag_filter.yaml | 16 ++++++++++------
 tools/picard.yaml                |  6 +++---
 2 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/data_structures/flag_filter.yaml b/data_structures/flag_filter.yaml
index 9888ec27b..793edb8f4 100644
--- a/data_structures/flag_filter.yaml
+++ b/data_structures/flag_filter.yaml
@@ -1,23 +1,27 @@
 validate_string_is_12bit_int:
   - name: decimal_passes
     inputs:
-      - number: "5"
+      - number:
+          - "5"
   - name: hexadecimal_passes
     inputs:
-      - number: "0x900"
+      - number:
+          - "0x900"
     tests:
       stdout:
         contains: Input number (0x900) is valid
   - name: too_big_hexadecimal_fails
     inputs:
-      - number: "0x1000"
+      - number:
+          - "0x1000"
     tests:
       exit_code: 42
       stderr:
         contains: Input number (0x1000) is invalid
   - name: too_big_decimal_fails
     inputs:
-      - number: "4096"
+      - number:
+          - "4096"
     tests:
       exit_code: 42
       stderr:
@@ -28,14 +32,14 @@ validate_flag_filter:
   - name: valid_FlagFilter_passes
     inputs:
       - flags:
-          include_if_all: "3"
+        - include_if_all: "3"
           exclude_if_any: "0xF04"
           include_if_any: "03"
           exclude_if_all: "4095"
   - name: invalid_FlagFilter_fails
     inputs:
       - flags:
-          include_if_all: ""
+        - include_if_all: ""
           exclude_if_any: this is not a number
           include_if_any: "000000000011"
           exclude_if_all: "4095"
diff --git a/tools/picard.yaml b/tools/picard.yaml
index 075d6c794..502cd7ea1 100644
--- a/tools/picard.yaml
+++ b/tools/picard.yaml
@@ -2,8 +2,8 @@ merge_sam_files:
   - name: Merge works
     inputs:
       - bams:
-          - $FIXTURES/test1.bam
-          - $FIXTURES/test2.bam
-      - prefix: test.merged
+          - [$FIXTURES/test1.bam, $FIXTURES/test2.bam]
+      - prefix:
+          - test.merged
     tests:
       custom: quickcheck.sh

From a9cc715c65c4b8242f4e679e54bcc2cb10dc57e8 Mon Sep 17 00:00:00 2001
From: Ari Frantz <ari.frantz@stjude.org>
Date: Wed, 19 Nov 2025 14:57:20 -0500
Subject: [PATCH 05/47] fix: tests -> assertions

---
 data_structures/flag_filter.yaml | 6 +++---
 tools/picard.yaml                | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/data_structures/flag_filter.yaml b/data_structures/flag_filter.yaml
index 793edb8f4..d503db985 100644
--- a/data_structures/flag_filter.yaml
+++ b/data_structures/flag_filter.yaml
@@ -14,7 +14,7 @@ validate_string_is_12bit_int:
     inputs:
       - number:
           - "0x1000"
-    tests:
+    assertions:
       exit_code: 42
       stderr:
         contains: Input number (0x1000) is invalid
@@ -22,7 +22,7 @@ validate_string_is_12bit_int:
     inputs:
       - number:
           - "4096"
-    tests:
+    assertions:
       exit_code: 42
       stderr:
         contains:
@@ -43,5 +43,5 @@ validate_flag_filter:
           exclude_if_any: this is not a number
           include_if_any: "000000000011"
           exclude_if_all: "4095"
-    tests:
+    assertions:
       should_fail: true
diff --git a/tools/picard.yaml b/tools/picard.yaml
index 502cd7ea1..a6a7be331 100644
--- a/tools/picard.yaml
+++ b/tools/picard.yaml
@@ -5,5 +5,5 @@ merge_sam_files:
           - [$FIXTURES/test1.bam, $FIXTURES/test2.bam]
       - prefix:
           - test.merged
-    tests:
+    assertions:
       custom: quickcheck.sh

From e09621cfa16f756c3f1dae9f9adf57b82588a726 Mon Sep 17 00:00:00 2001
From: Ari Frantz <ari.frantz@stjude.org>
Date: Wed, 19 Nov 2025 16:09:23 -0500
Subject: [PATCH 06/47] Update flag_filter.yaml

---
 data_structures/flag_filter.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/data_structures/flag_filter.yaml b/data_structures/flag_filter.yaml
index d503db985..a9abf5b16 100644
--- a/data_structures/flag_filter.yaml
+++ b/data_structures/flag_filter.yaml
@@ -7,7 +7,7 @@ validate_string_is_12bit_int:
     inputs:
       - number:
           - "0x900"
-    tests:
+    assertions:
       stdout:
         contains: Input number (0x900) is valid
   - name: too_big_hexadecimal_fails

From 0be911adff50fd00e911ae69b47b701c5551e49e Mon Sep 17 00:00:00 2001
From: Ari Frantz <ari.frantz@stjude.org>
Date: Sat, 29 Nov 2025 19:58:16 -0500
Subject: [PATCH 07/47] feat: use Peter's top level mapping representation

---
 data_structures/flag_filter.yaml | 20 +++----
 data_structures/read_group.yml   | 13 +++++
 tools/picard.yaml                |  4 +-
 tools/samtools.yaml              | 89 ++++++++++++++++++++++----------
 4 files changed, 88 insertions(+), 38 deletions(-)
 create mode 100644 data_structures/read_group.yml

diff --git a/data_structures/flag_filter.yaml b/data_structures/flag_filter.yaml
index a9abf5b16..1a667700a 100644
--- a/data_structures/flag_filter.yaml
+++ b/data_structures/flag_filter.yaml
@@ -1,27 +1,27 @@
 validate_string_is_12bit_int:
   - name: decimal_passes
     inputs:
-      - number:
-          - "5"
+      number:
+        - "5"
   - name: hexadecimal_passes
     inputs:
-      - number:
-          - "0x900"
+      number:
+        - "0x900"
     assertions:
       stdout:
         contains: Input number (0x900) is valid
   - name: too_big_hexadecimal_fails
     inputs:
-      - number:
-          - "0x1000"
+      number:
+        - "0x1000"
     assertions:
       exit_code: 42
       stderr:
         contains: Input number (0x1000) is invalid
   - name: too_big_decimal_fails
     inputs:
-      - number:
-          - "4096"
+      number:
+        - "4096"
     assertions:
       exit_code: 42
       stderr:
@@ -31,14 +31,14 @@ validate_string_is_12bit_int:
 validate_flag_filter:
   - name: valid_FlagFilter_passes
     inputs:
-      - flags:
+      flags:
         - include_if_all: "3"
           exclude_if_any: "0xF04"
           include_if_any: "03"
           exclude_if_all: "4095"
   - name: invalid_FlagFilter_fails
     inputs:
-      - flags:
+      flags:
         - include_if_all: ""
           exclude_if_any: this is not a number
           include_if_any: "000000000011"
diff --git a/data_structures/read_group.yml b/data_structures/read_group.yml
new file mode 100644
index 000000000..c92e5c2b0
--- /dev/null
+++ b/data_structures/read_group.yml
@@ -0,0 +1,13 @@
+# Note this file has the extension `.yml` while other tests end with `.yaml`.
+# This is an intentional test that both extensions work.
+read_group_to_string:
+  - name: bad_id
+    inputs:
+      read_group:
+        - ID: id,
+          SM: sample_a
+          LB: library
+    assertions:
+      exit_code: 1
+      stdout:
+        contains: ID (id) must not match pattern
diff --git a/tools/picard.yaml b/tools/picard.yaml
index a6a7be331..f2b547bb4 100644
--- a/tools/picard.yaml
+++ b/tools/picard.yaml
@@ -1,9 +1,9 @@
 merge_sam_files:
   - name: Merge works
     inputs:
-      - bams:
+      bams:
           - [$FIXTURES/test1.bam, $FIXTURES/test2.bam]
-      - prefix:
+      prefix:
           - test.merged
     assertions:
       custom: quickcheck.sh
diff --git a/tools/samtools.yaml b/tools/samtools.yaml
index 0e09fe9f4..8f6e5276f 100644
--- a/tools/samtools.yaml
+++ b/tools/samtools.yaml
@@ -1,34 +1,71 @@
 bam_to_fastq:
   - name: kitchen_sink
     inputs:
-      - bam:
+      $files:
+          bam:
+            - $FIXTURES/test1.bam
+            - $FIXTURES/test2.bam
+            - $FIXTURES/test3.bam
+          bam_index:
+            - $FIXTURES/test1.bam.bai
+            - $FIXTURES/test2.bam.bai
+            - $FIXTURES/test3.bam.bai
+      bitwise_filter:
+        - include_if_all: "0x0"
+          exclude_if_any: "0x900"
+          include_if_any: "0x0"
+          exclude_if_all: "0x0"
+        - include_if_all: "00"
+          exclude_if_any: "0x904"
+          include_if_any: "3"
+          exclude_if_all: "0"
+      paired_end:
+        - true
+        - false
+      retain_collated_bam:
+        - true
+        - false
+      append_read_number:
+        - true
+        - false
+      output_singletons:
+        - true
+        - false
+      prefix:
+        - kitchen_sink_test
+  - name: simpler
+    inputs:
+      output_singletons:
+        - true
+        - false
+      $files:
+        bam:
           - $FIXTURES/test1.bam
           - $FIXTURES/test2.bam
-          - $FIXTURES/test3.bam
         bam_index:
           - $FIXTURES/test1.bam.bai
           - $FIXTURES/test2.bam.bai
-          - $FIXTURES/test3.bam.bai
-      - bitwise_filter:
-          - include_if_all: "0x0"
-            exclude_if_any: "0x900"
-            include_if_any: "0x0"
-            exclude_if_all: "0x0"
-          - include_if_all: "00"
-            exclude_if_any: "0x904"
-            include_if_any: "3"
-            exclude_if_all: "0"
-      - paired_end:
-          - true
-          - false
-      - retain_collated_bam:
-          - true
-          - false
-      - append_read_number:
-          - true
-          - false
-      - output_singletons:
-          - true
-          - false
-      - prefix:
-          - kitchen_sink_test
+  - name: not as simple
+    inputs:
+      output_singletons:
+        - true
+        - false
+      $files:
+          bam:
+            - $FIXTURES/test1.bam
+            - $FIXTURES/test2.bam
+            - $FIXTURES/test3.bam
+          bam_index:
+            - $FIXTURES/test1.bam.bai
+            - $FIXTURES/test2.bam.bai
+            - $FIXTURES/test3.bam.bai
+      $ref:
+          ref_fasta:
+            - hg19.fasta
+            - GRCh38.fasta
+          ref_fasta_index:
+            - hg19.fa.fai
+            - GRCh38.fa.fai
+      prefix:
+        - not_as_simple
+

From 374392a61921c0c688c087da0652b6f46cf3bfb8 Mon Sep 17 00:00:00 2001
From: Ari Frantz <ari.frantz@stjude.org>
Date: Tue, 9 Dec 2025 13:56:33 -0500
Subject: [PATCH 08/47] set up sprocket test infra

---
 sprocket-tests/custom/quickcheck.sh                 |  10 ----------
 test/bin/quickcheck.sh                              |   0
 .../fixtures}/1scattered.interval_list              |   0
 ...001_R01C01.beta_swan_norm_unfiltered.genomic.csv |   0
 ...001_R03C01.beta_swan_norm_unfiltered.genomic.csv |   0
 .../fixtures}/201533520001_R03C01_Grn.idat          | Bin
 .../fixtures}/201533520001_R03C01_Red.idat          | Bin
 .../fixtures}/Aligned.sortedByCoord.chr9_chr22.bam  |   0
 .../Aligned.sortedByCoord.chr9_chr22.bam.bai        |   0
 .../input => test/fixtures}/GRCh38.chr1_chr19.dict  |   0
 {tests/input => test/fixtures}/GRCh38.chr1_chr19.fa |   0
 .../fixtures}/GRCh38.chr1_chr19.fa.fai              |   0
 .../input => test/fixtures}/GRCh38.chr9_chr22.fa.gz |   0
 .../fixtures}/GRCh38.chrY_chrM.bwa_db.tar.gz        |   0
 .../input => test/fixtures}/GRCh38.chrY_chrM.dict   |   0
 {tests/input => test/fixtures}/GRCh38.chrY_chrM.fa  |   0
 .../input => test/fixtures}/GRCh38.chrY_chrM.fa.fai |   0
 .../Homo_sapiens_assembly38.dbsnp138.top5000.vcf    |   0
 ...Homo_sapiens_assembly38.dbsnp138.top5000.vcf.idx | Bin
 ...Mills_and_1000G_gold_standard.indels.hg38.vcf.gz |   0
 ...s_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi | Bin
 {tests/input => test/fixtures}/README.md            |   0
 .../fixtures}/chr1_chr19.interval_list              |   0
 {tests/input => test/fixtures}/combined_beta.csv    |   0
 {tests/input => test/fixtures}/filtered_beta.csv    |   0
 {tests/input => test/fixtures}/fusions.BCR_ABL1.tsv |   0
 .../fixtures}/gencode.v31.chr9_chr22.gtf.gz         |   0
 .../fixtures}/gencode.v31.chrY_chrM.gene.bed        |   0
 .../fixtures}/gencode.v31.chrY_chrM.genelengths.txt |   0
 .../fixtures}/gencode.v31.chrY_chrM.gtf.gz          |   0
 .../fixtures}/kraken2_C_elegans_library.tar.gz      |   0
 .../input => test/fixtures}/kraken2_db.mini.tar.gz  |   0
 .../input => test/fixtures}/kraken2_taxonomy.tar.gz |   0
 {tests/input => test/fixtures}/random10k.r1.fq.gz   |   0
 {tests/input => test/fixtures}/random10k.r2.fq.gz   |   0
 .../fixtures}/star_db.chrY_chrM.tar.gz              |   0
 .../fixtures}/test.PE.2_RGs.Aligned.out.sorted.bam  |   0
 {tests/input => test/fixtures}/test.bam             |   0
 {tests/input => test/fixtures}/test.bam.bai         |   0
 .../fixtures}/test.bwa_aln_pe.chrY_chrM.TPM.txt     |   0
 .../fixtures}/test.bwa_aln_pe.chrY_chrM.bam         |   0
 .../fixtures}/test.bwa_aln_pe.chrY_chrM.bam.bai     |   0
 .../test.bwa_aln_pe.chrY_chrM.feature-counts.txt    |   0
 .../test.bwa_aln_pe.chrY_chrM.readlength.txt        |   0
 {tests/input => test/fixtures}/test.extra_RG.bam    |   0
 {tests/input => test/fixtures}/test.fa              |   0
 {tests/input => test/fixtures}/test.tar.gz          |   0
 .../fixtures}/test.unaccounted_read.bam             |   0
 {tests/input => test/fixtures}/test1.vcf.gz         |   0
 {tests/input => test/fixtures}/test1.vcf.gz.tbi     | Bin
 {tests/input => test/fixtures}/test2.bam            |   0
 {tests/input => test/fixtures}/test2.vcf.gz         |   0
 {tests/input => test/fixtures}/test2.vcf.gz.tbi     | Bin
 {tests/input => test/fixtures}/test_R1.fq.gz        |   0
 {tests/input => test/fixtures}/test_R2.fq.gz        |   0
 .../input => test/fixtures}/test_rnaseq_variant.bam |   0
 .../fixtures}/test_rnaseq_variant.bam.bai           |   0
 .../fixtures}/test_rnaseq_variant.recal.txt         |   0
 {tests/input => test/fixtures}/umap.csv             |   0
 .../wgs_calling_regions.hg38.interval_list          |   0
 60 files changed, 10 deletions(-)
 delete mode 100644 sprocket-tests/custom/quickcheck.sh
 create mode 100644 test/bin/quickcheck.sh
 rename {tests/input => test/fixtures}/1scattered.interval_list (100%)
 rename {tests/input => test/fixtures}/201533520001_R01C01.beta_swan_norm_unfiltered.genomic.csv (100%)
 rename {tests/input => test/fixtures}/201533520001_R03C01.beta_swan_norm_unfiltered.genomic.csv (100%)
 rename {tests/input => test/fixtures}/201533520001_R03C01_Grn.idat (100%)
 rename {tests/input => test/fixtures}/201533520001_R03C01_Red.idat (100%)
 rename {tests/input => test/fixtures}/Aligned.sortedByCoord.chr9_chr22.bam (100%)
 rename {tests/input => test/fixtures}/Aligned.sortedByCoord.chr9_chr22.bam.bai (100%)
 rename {tests/input => test/fixtures}/GRCh38.chr1_chr19.dict (100%)
 rename {tests/input => test/fixtures}/GRCh38.chr1_chr19.fa (100%)
 rename {tests/input => test/fixtures}/GRCh38.chr1_chr19.fa.fai (100%)
 rename {tests/input => test/fixtures}/GRCh38.chr9_chr22.fa.gz (100%)
 rename {tests/input => test/fixtures}/GRCh38.chrY_chrM.bwa_db.tar.gz (100%)
 rename {tests/input => test/fixtures}/GRCh38.chrY_chrM.dict (100%)
 rename {tests/input => test/fixtures}/GRCh38.chrY_chrM.fa (100%)
 rename {tests/input => test/fixtures}/GRCh38.chrY_chrM.fa.fai (100%)
 rename {tests/input => test/fixtures}/Homo_sapiens_assembly38.dbsnp138.top5000.vcf (100%)
 rename {tests/input => test/fixtures}/Homo_sapiens_assembly38.dbsnp138.top5000.vcf.idx (100%)
 rename {tests/input => test/fixtures}/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz (100%)
 rename {tests/input => test/fixtures}/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi (100%)
 rename {tests/input => test/fixtures}/README.md (100%)
 rename {tests/input => test/fixtures}/chr1_chr19.interval_list (100%)
 rename {tests/input => test/fixtures}/combined_beta.csv (100%)
 rename {tests/input => test/fixtures}/filtered_beta.csv (100%)
 rename {tests/input => test/fixtures}/fusions.BCR_ABL1.tsv (100%)
 rename {tests/input => test/fixtures}/gencode.v31.chr9_chr22.gtf.gz (100%)
 rename {tests/input => test/fixtures}/gencode.v31.chrY_chrM.gene.bed (100%)
 rename {tests/input => test/fixtures}/gencode.v31.chrY_chrM.genelengths.txt (100%)
 rename {tests/input => test/fixtures}/gencode.v31.chrY_chrM.gtf.gz (100%)
 rename {tests/input => test/fixtures}/kraken2_C_elegans_library.tar.gz (100%)
 rename {tests/input => test/fixtures}/kraken2_db.mini.tar.gz (100%)
 rename {tests/input => test/fixtures}/kraken2_taxonomy.tar.gz (100%)
 rename {tests/input => test/fixtures}/random10k.r1.fq.gz (100%)
 rename {tests/input => test/fixtures}/random10k.r2.fq.gz (100%)
 rename {tests/input => test/fixtures}/star_db.chrY_chrM.tar.gz (100%)
 rename {tests/input => test/fixtures}/test.PE.2_RGs.Aligned.out.sorted.bam (100%)
 rename {tests/input => test/fixtures}/test.bam (100%)
 rename {tests/input => test/fixtures}/test.bam.bai (100%)
 rename {tests/input => test/fixtures}/test.bwa_aln_pe.chrY_chrM.TPM.txt (100%)
 rename {tests/input => test/fixtures}/test.bwa_aln_pe.chrY_chrM.bam (100%)
 rename {tests/input => test/fixtures}/test.bwa_aln_pe.chrY_chrM.bam.bai (100%)
 rename {tests/input => test/fixtures}/test.bwa_aln_pe.chrY_chrM.feature-counts.txt (100%)
 rename {tests/input => test/fixtures}/test.bwa_aln_pe.chrY_chrM.readlength.txt (100%)
 rename {tests/input => test/fixtures}/test.extra_RG.bam (100%)
 rename {tests/input => test/fixtures}/test.fa (100%)
 rename {tests/input => test/fixtures}/test.tar.gz (100%)
 rename {tests/input => test/fixtures}/test.unaccounted_read.bam (100%)
 rename {tests/input => test/fixtures}/test1.vcf.gz (100%)
 rename {tests/input => test/fixtures}/test1.vcf.gz.tbi (100%)
 rename {tests/input => test/fixtures}/test2.bam (100%)
 rename {tests/input => test/fixtures}/test2.vcf.gz (100%)
 rename {tests/input => test/fixtures}/test2.vcf.gz.tbi (100%)
 rename {tests/input => test/fixtures}/test_R1.fq.gz (100%)
 rename {tests/input => test/fixtures}/test_R2.fq.gz (100%)
 rename {tests/input => test/fixtures}/test_rnaseq_variant.bam (100%)
 rename {tests/input => test/fixtures}/test_rnaseq_variant.bam.bai (100%)
 rename {tests/input => test/fixtures}/test_rnaseq_variant.recal.txt (100%)
 rename {tests/input => test/fixtures}/umap.csv (100%)
 rename {tests/input => test/fixtures}/wgs_calling_regions.hg38.interval_list (100%)

diff --git a/sprocket-tests/custom/quickcheck.sh b/sprocket-tests/custom/quickcheck.sh
deleted file mode 100644
index 8c270cc8e..000000000
--- a/sprocket-tests/custom/quickcheck.sh
+++ /dev/null
@@ -1,10 +0,0 @@
-#!/bin/bash
-
-set -euo pipefail
-
-out_json=$1
-
-out_bam=$(jq -r .bam "$out_json")
-
-samtools quickcheck "$out_bam"
-
diff --git a/test/bin/quickcheck.sh b/test/bin/quickcheck.sh
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/input/1scattered.interval_list b/test/fixtures/1scattered.interval_list
similarity index 100%
rename from tests/input/1scattered.interval_list
rename to test/fixtures/1scattered.interval_list
diff --git a/tests/input/201533520001_R01C01.beta_swan_norm_unfiltered.genomic.csv b/test/fixtures/201533520001_R01C01.beta_swan_norm_unfiltered.genomic.csv
similarity index 100%
rename from tests/input/201533520001_R01C01.beta_swan_norm_unfiltered.genomic.csv
rename to test/fixtures/201533520001_R01C01.beta_swan_norm_unfiltered.genomic.csv
diff --git a/tests/input/201533520001_R03C01.beta_swan_norm_unfiltered.genomic.csv b/test/fixtures/201533520001_R03C01.beta_swan_norm_unfiltered.genomic.csv
similarity index 100%
rename from tests/input/201533520001_R03C01.beta_swan_norm_unfiltered.genomic.csv
rename to test/fixtures/201533520001_R03C01.beta_swan_norm_unfiltered.genomic.csv
diff --git a/tests/input/201533520001_R03C01_Grn.idat b/test/fixtures/201533520001_R03C01_Grn.idat
similarity index 100%
rename from tests/input/201533520001_R03C01_Grn.idat
rename to test/fixtures/201533520001_R03C01_Grn.idat
diff --git a/tests/input/201533520001_R03C01_Red.idat b/test/fixtures/201533520001_R03C01_Red.idat
similarity index 100%
rename from tests/input/201533520001_R03C01_Red.idat
rename to test/fixtures/201533520001_R03C01_Red.idat
diff --git a/tests/input/Aligned.sortedByCoord.chr9_chr22.bam b/test/fixtures/Aligned.sortedByCoord.chr9_chr22.bam
similarity index 100%
rename from tests/input/Aligned.sortedByCoord.chr9_chr22.bam
rename to test/fixtures/Aligned.sortedByCoord.chr9_chr22.bam
diff --git a/tests/input/Aligned.sortedByCoord.chr9_chr22.bam.bai b/test/fixtures/Aligned.sortedByCoord.chr9_chr22.bam.bai
similarity index 100%
rename from tests/input/Aligned.sortedByCoord.chr9_chr22.bam.bai
rename to test/fixtures/Aligned.sortedByCoord.chr9_chr22.bam.bai
diff --git a/tests/input/GRCh38.chr1_chr19.dict b/test/fixtures/GRCh38.chr1_chr19.dict
similarity index 100%
rename from tests/input/GRCh38.chr1_chr19.dict
rename to test/fixtures/GRCh38.chr1_chr19.dict
diff --git a/tests/input/GRCh38.chr1_chr19.fa b/test/fixtures/GRCh38.chr1_chr19.fa
similarity index 100%
rename from tests/input/GRCh38.chr1_chr19.fa
rename to test/fixtures/GRCh38.chr1_chr19.fa
diff --git a/tests/input/GRCh38.chr1_chr19.fa.fai b/test/fixtures/GRCh38.chr1_chr19.fa.fai
similarity index 100%
rename from tests/input/GRCh38.chr1_chr19.fa.fai
rename to test/fixtures/GRCh38.chr1_chr19.fa.fai
diff --git a/tests/input/GRCh38.chr9_chr22.fa.gz b/test/fixtures/GRCh38.chr9_chr22.fa.gz
similarity index 100%
rename from tests/input/GRCh38.chr9_chr22.fa.gz
rename to test/fixtures/GRCh38.chr9_chr22.fa.gz
diff --git a/tests/input/GRCh38.chrY_chrM.bwa_db.tar.gz b/test/fixtures/GRCh38.chrY_chrM.bwa_db.tar.gz
similarity index 100%
rename from tests/input/GRCh38.chrY_chrM.bwa_db.tar.gz
rename to test/fixtures/GRCh38.chrY_chrM.bwa_db.tar.gz
diff --git a/tests/input/GRCh38.chrY_chrM.dict b/test/fixtures/GRCh38.chrY_chrM.dict
similarity index 100%
rename from tests/input/GRCh38.chrY_chrM.dict
rename to test/fixtures/GRCh38.chrY_chrM.dict
diff --git a/tests/input/GRCh38.chrY_chrM.fa b/test/fixtures/GRCh38.chrY_chrM.fa
similarity index 100%
rename from tests/input/GRCh38.chrY_chrM.fa
rename to test/fixtures/GRCh38.chrY_chrM.fa
diff --git a/tests/input/GRCh38.chrY_chrM.fa.fai b/test/fixtures/GRCh38.chrY_chrM.fa.fai
similarity index 100%
rename from tests/input/GRCh38.chrY_chrM.fa.fai
rename to test/fixtures/GRCh38.chrY_chrM.fa.fai
diff --git a/tests/input/Homo_sapiens_assembly38.dbsnp138.top5000.vcf b/test/fixtures/Homo_sapiens_assembly38.dbsnp138.top5000.vcf
similarity index 100%
rename from tests/input/Homo_sapiens_assembly38.dbsnp138.top5000.vcf
rename to test/fixtures/Homo_sapiens_assembly38.dbsnp138.top5000.vcf
diff --git a/tests/input/Homo_sapiens_assembly38.dbsnp138.top5000.vcf.idx b/test/fixtures/Homo_sapiens_assembly38.dbsnp138.top5000.vcf.idx
similarity index 100%
rename from tests/input/Homo_sapiens_assembly38.dbsnp138.top5000.vcf.idx
rename to test/fixtures/Homo_sapiens_assembly38.dbsnp138.top5000.vcf.idx
diff --git a/tests/input/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz b/test/fixtures/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz
similarity index 100%
rename from tests/input/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz
rename to test/fixtures/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz
diff --git a/tests/input/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi b/test/fixtures/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi
similarity index 100%
rename from tests/input/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi
rename to test/fixtures/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi
diff --git a/tests/input/README.md b/test/fixtures/README.md
similarity index 100%
rename from tests/input/README.md
rename to test/fixtures/README.md
diff --git a/tests/input/chr1_chr19.interval_list b/test/fixtures/chr1_chr19.interval_list
similarity index 100%
rename from tests/input/chr1_chr19.interval_list
rename to test/fixtures/chr1_chr19.interval_list
diff --git a/tests/input/combined_beta.csv b/test/fixtures/combined_beta.csv
similarity index 100%
rename from tests/input/combined_beta.csv
rename to test/fixtures/combined_beta.csv
diff --git a/tests/input/filtered_beta.csv b/test/fixtures/filtered_beta.csv
similarity index 100%
rename from tests/input/filtered_beta.csv
rename to test/fixtures/filtered_beta.csv
diff --git a/tests/input/fusions.BCR_ABL1.tsv b/test/fixtures/fusions.BCR_ABL1.tsv
similarity index 100%
rename from tests/input/fusions.BCR_ABL1.tsv
rename to test/fixtures/fusions.BCR_ABL1.tsv
diff --git a/tests/input/gencode.v31.chr9_chr22.gtf.gz b/test/fixtures/gencode.v31.chr9_chr22.gtf.gz
similarity index 100%
rename from tests/input/gencode.v31.chr9_chr22.gtf.gz
rename to test/fixtures/gencode.v31.chr9_chr22.gtf.gz
diff --git a/tests/input/gencode.v31.chrY_chrM.gene.bed b/test/fixtures/gencode.v31.chrY_chrM.gene.bed
similarity index 100%
rename from tests/input/gencode.v31.chrY_chrM.gene.bed
rename to test/fixtures/gencode.v31.chrY_chrM.gene.bed
diff --git a/tests/input/gencode.v31.chrY_chrM.genelengths.txt b/test/fixtures/gencode.v31.chrY_chrM.genelengths.txt
similarity index 100%
rename from tests/input/gencode.v31.chrY_chrM.genelengths.txt
rename to test/fixtures/gencode.v31.chrY_chrM.genelengths.txt
diff --git a/tests/input/gencode.v31.chrY_chrM.gtf.gz b/test/fixtures/gencode.v31.chrY_chrM.gtf.gz
similarity index 100%
rename from tests/input/gencode.v31.chrY_chrM.gtf.gz
rename to test/fixtures/gencode.v31.chrY_chrM.gtf.gz
diff --git a/tests/input/kraken2_C_elegans_library.tar.gz b/test/fixtures/kraken2_C_elegans_library.tar.gz
similarity index 100%
rename from tests/input/kraken2_C_elegans_library.tar.gz
rename to test/fixtures/kraken2_C_elegans_library.tar.gz
diff --git a/tests/input/kraken2_db.mini.tar.gz b/test/fixtures/kraken2_db.mini.tar.gz
similarity index 100%
rename from tests/input/kraken2_db.mini.tar.gz
rename to test/fixtures/kraken2_db.mini.tar.gz
diff --git a/tests/input/kraken2_taxonomy.tar.gz b/test/fixtures/kraken2_taxonomy.tar.gz
similarity index 100%
rename from tests/input/kraken2_taxonomy.tar.gz
rename to test/fixtures/kraken2_taxonomy.tar.gz
diff --git a/tests/input/random10k.r1.fq.gz b/test/fixtures/random10k.r1.fq.gz
similarity index 100%
rename from tests/input/random10k.r1.fq.gz
rename to test/fixtures/random10k.r1.fq.gz
diff --git a/tests/input/random10k.r2.fq.gz b/test/fixtures/random10k.r2.fq.gz
similarity index 100%
rename from tests/input/random10k.r2.fq.gz
rename to test/fixtures/random10k.r2.fq.gz
diff --git a/tests/input/star_db.chrY_chrM.tar.gz b/test/fixtures/star_db.chrY_chrM.tar.gz
similarity index 100%
rename from tests/input/star_db.chrY_chrM.tar.gz
rename to test/fixtures/star_db.chrY_chrM.tar.gz
diff --git a/tests/input/test.PE.2_RGs.Aligned.out.sorted.bam b/test/fixtures/test.PE.2_RGs.Aligned.out.sorted.bam
similarity index 100%
rename from tests/input/test.PE.2_RGs.Aligned.out.sorted.bam
rename to test/fixtures/test.PE.2_RGs.Aligned.out.sorted.bam
diff --git a/tests/input/test.bam b/test/fixtures/test.bam
similarity index 100%
rename from tests/input/test.bam
rename to test/fixtures/test.bam
diff --git a/tests/input/test.bam.bai b/test/fixtures/test.bam.bai
similarity index 100%
rename from tests/input/test.bam.bai
rename to test/fixtures/test.bam.bai
diff --git a/tests/input/test.bwa_aln_pe.chrY_chrM.TPM.txt b/test/fixtures/test.bwa_aln_pe.chrY_chrM.TPM.txt
similarity index 100%
rename from tests/input/test.bwa_aln_pe.chrY_chrM.TPM.txt
rename to test/fixtures/test.bwa_aln_pe.chrY_chrM.TPM.txt
diff --git a/tests/input/test.bwa_aln_pe.chrY_chrM.bam b/test/fixtures/test.bwa_aln_pe.chrY_chrM.bam
similarity index 100%
rename from tests/input/test.bwa_aln_pe.chrY_chrM.bam
rename to test/fixtures/test.bwa_aln_pe.chrY_chrM.bam
diff --git a/tests/input/test.bwa_aln_pe.chrY_chrM.bam.bai b/test/fixtures/test.bwa_aln_pe.chrY_chrM.bam.bai
similarity index 100%
rename from tests/input/test.bwa_aln_pe.chrY_chrM.bam.bai
rename to test/fixtures/test.bwa_aln_pe.chrY_chrM.bam.bai
diff --git a/tests/input/test.bwa_aln_pe.chrY_chrM.feature-counts.txt b/test/fixtures/test.bwa_aln_pe.chrY_chrM.feature-counts.txt
similarity index 100%
rename from tests/input/test.bwa_aln_pe.chrY_chrM.feature-counts.txt
rename to test/fixtures/test.bwa_aln_pe.chrY_chrM.feature-counts.txt
diff --git a/tests/input/test.bwa_aln_pe.chrY_chrM.readlength.txt b/test/fixtures/test.bwa_aln_pe.chrY_chrM.readlength.txt
similarity index 100%
rename from tests/input/test.bwa_aln_pe.chrY_chrM.readlength.txt
rename to test/fixtures/test.bwa_aln_pe.chrY_chrM.readlength.txt
diff --git a/tests/input/test.extra_RG.bam b/test/fixtures/test.extra_RG.bam
similarity index 100%
rename from tests/input/test.extra_RG.bam
rename to test/fixtures/test.extra_RG.bam
diff --git a/tests/input/test.fa b/test/fixtures/test.fa
similarity index 100%
rename from tests/input/test.fa
rename to test/fixtures/test.fa
diff --git a/tests/input/test.tar.gz b/test/fixtures/test.tar.gz
similarity index 100%
rename from tests/input/test.tar.gz
rename to test/fixtures/test.tar.gz
diff --git a/tests/input/test.unaccounted_read.bam b/test/fixtures/test.unaccounted_read.bam
similarity index 100%
rename from tests/input/test.unaccounted_read.bam
rename to test/fixtures/test.unaccounted_read.bam
diff --git a/tests/input/test1.vcf.gz b/test/fixtures/test1.vcf.gz
similarity index 100%
rename from tests/input/test1.vcf.gz
rename to test/fixtures/test1.vcf.gz
diff --git a/tests/input/test1.vcf.gz.tbi b/test/fixtures/test1.vcf.gz.tbi
similarity index 100%
rename from tests/input/test1.vcf.gz.tbi
rename to test/fixtures/test1.vcf.gz.tbi
diff --git a/tests/input/test2.bam b/test/fixtures/test2.bam
similarity index 100%
rename from tests/input/test2.bam
rename to test/fixtures/test2.bam
diff --git a/tests/input/test2.vcf.gz b/test/fixtures/test2.vcf.gz
similarity index 100%
rename from tests/input/test2.vcf.gz
rename to test/fixtures/test2.vcf.gz
diff --git a/tests/input/test2.vcf.gz.tbi b/test/fixtures/test2.vcf.gz.tbi
similarity index 100%
rename from tests/input/test2.vcf.gz.tbi
rename to test/fixtures/test2.vcf.gz.tbi
diff --git a/tests/input/test_R1.fq.gz b/test/fixtures/test_R1.fq.gz
similarity index 100%
rename from tests/input/test_R1.fq.gz
rename to test/fixtures/test_R1.fq.gz
diff --git a/tests/input/test_R2.fq.gz b/test/fixtures/test_R2.fq.gz
similarity index 100%
rename from tests/input/test_R2.fq.gz
rename to test/fixtures/test_R2.fq.gz
diff --git a/tests/input/test_rnaseq_variant.bam b/test/fixtures/test_rnaseq_variant.bam
similarity index 100%
rename from tests/input/test_rnaseq_variant.bam
rename to test/fixtures/test_rnaseq_variant.bam
diff --git a/tests/input/test_rnaseq_variant.bam.bai b/test/fixtures/test_rnaseq_variant.bam.bai
similarity index 100%
rename from tests/input/test_rnaseq_variant.bam.bai
rename to test/fixtures/test_rnaseq_variant.bam.bai
diff --git a/tests/input/test_rnaseq_variant.recal.txt b/test/fixtures/test_rnaseq_variant.recal.txt
similarity index 100%
rename from tests/input/test_rnaseq_variant.recal.txt
rename to test/fixtures/test_rnaseq_variant.recal.txt
diff --git a/tests/input/umap.csv b/test/fixtures/umap.csv
similarity index 100%
rename from tests/input/umap.csv
rename to test/fixtures/umap.csv
diff --git a/tests/input/wgs_calling_regions.hg38.interval_list b/test/fixtures/wgs_calling_regions.hg38.interval_list
similarity index 100%
rename from tests/input/wgs_calling_regions.hg38.interval_list
rename to test/fixtures/wgs_calling_regions.hg38.interval_list

From d89fd65ae5e2de848b77e0f2aebc6385c17e9284 Mon Sep 17 00:00:00 2001
From: Ari Frantz <ari.frantz@stjude.org>
Date: Thu, 11 Dec 2025 09:51:05 -0500
Subject: [PATCH 09/47] picard: lower merge mem for testing

---
 tools/picard.wdl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/picard.wdl b/tools/picard.wdl
index 07324ebe2..33927919b 100755
--- a/tools/picard.wdl
+++ b/tools/picard.wdl
@@ -385,7 +385,7 @@ task merge_sam_files {
         String sort_order = "coordinate"
         String validation_stringency = "SILENT"
         Boolean threading = true
-        Int memory_gb = 40
+        Int memory_gb = 10
         Int modify_disk_size_gb = 0
     }
 

From 81d9608417005ebdf8fda57c69b9ba3f8af56cd3 Mon Sep 17 00:00:00 2001
From: Ari Frantz <ari.frantz@stjude.org>
Date: Thu, 11 Dec 2025 09:51:19 -0500
Subject: [PATCH 10/47] point to new fixtures

---
 tools/picard.yaml   |  2 +-
 tools/samtools.yaml | 50 +++++++++------------------------------------
 2 files changed, 11 insertions(+), 41 deletions(-)

diff --git a/tools/picard.yaml b/tools/picard.yaml
index f2b547bb4..7d031e5dd 100644
--- a/tools/picard.yaml
+++ b/tools/picard.yaml
@@ -2,7 +2,7 @@ merge_sam_files:
   - name: Merge works
     inputs:
       bams:
-          - [$FIXTURES/test1.bam, $FIXTURES/test2.bam]
+          - [test.bwa_aln_pe.chrY_chrM.bam, test.PE.2_RGs.Aligned.out.sorted.bam]
       prefix:
           - test.merged
     assertions:
diff --git a/tools/samtools.yaml b/tools/samtools.yaml
index 8f6e5276f..2f75fdb52 100644
--- a/tools/samtools.yaml
+++ b/tools/samtools.yaml
@@ -1,15 +1,11 @@
 bam_to_fastq:
   - name: kitchen_sink
     inputs:
-      $files:
-          bam:
-            - $FIXTURES/test1.bam
-            - $FIXTURES/test2.bam
-            - $FIXTURES/test3.bam
-          bam_index:
-            - $FIXTURES/test1.bam.bai
-            - $FIXTURES/test2.bam.bai
-            - $FIXTURES/test3.bam.bai
+      bam:
+        - Aligned.sortedByCoord.chr9_chr22.bam
+        - test.bam
+        - test_rnaseq_variant.bam
+        - test.bwa_aln_pe.chrY_chrM.bam
       bitwise_filter:
         - include_if_all: "0x0"
           exclude_if_any: "0x900"
@@ -38,34 +34,8 @@ bam_to_fastq:
       output_singletons:
         - true
         - false
-      $files:
-        bam:
-          - $FIXTURES/test1.bam
-          - $FIXTURES/test2.bam
-        bam_index:
-          - $FIXTURES/test1.bam.bai
-          - $FIXTURES/test2.bam.bai
-  - name: not as simple
-    inputs:
-      output_singletons:
-        - true
-        - false
-      $files:
-          bam:
-            - $FIXTURES/test1.bam
-            - $FIXTURES/test2.bam
-            - $FIXTURES/test3.bam
-          bam_index:
-            - $FIXTURES/test1.bam.bai
-            - $FIXTURES/test2.bam.bai
-            - $FIXTURES/test3.bam.bai
-      $ref:
-          ref_fasta:
-            - hg19.fasta
-            - GRCh38.fasta
-          ref_fasta_index:
-            - hg19.fa.fai
-            - GRCh38.fa.fai
-      prefix:
-        - not_as_simple
-
+      bam:
+        - Aligned.sortedByCoord.chr9_chr22.bam
+        - test.bam
+        - test_rnaseq_variant.bam
+        - test.bwa_aln_pe.chrY_chrM.bam

From 5d97f28c6fb2cf7d8e1bd1b6fde83d5a5ac05d82 Mon Sep 17 00:00:00 2001
From: Ari Frantz <ari.frantz@stjude.org>
Date: Mon, 15 Dec 2025 07:36:47 -0500
Subject: [PATCH 11/47] Revert "picard: lower merge mem for testing"

This reverts commit d89fd65ae5e2de848b77e0f2aebc6385c17e9284.
---
 tools/picard.wdl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/picard.wdl b/tools/picard.wdl
index 33927919b..07324ebe2 100755
--- a/tools/picard.wdl
+++ b/tools/picard.wdl
@@ -385,7 +385,7 @@ task merge_sam_files {
         String sort_order = "coordinate"
         String validation_stringency = "SILENT"
         Boolean threading = true
-        Int memory_gb = 10
+        Int memory_gb = 40
         Int modify_disk_size_gb = 0
     }
 

From 1b7235dd45737f977edabb1fc3aa1b66b3ebb9dd Mon Sep 17 00:00:00 2001
From: Ari Frantz <ari.frantz@stjude.org>
Date: Sat, 27 Dec 2025 16:43:34 -0500
Subject: [PATCH 12/47] WIP

---
 data_structures/read_group.yml              | 13 -------------
 data_structures/{ => test}/flag_filter.yaml | 11 +++++------
 data_structures/test/read_group.yaml        | 11 +++++++++++
 tools/{ => test}/picard.yaml                |  0
 tools/{ => test}/samtools.yaml              |  2 --
 5 files changed, 16 insertions(+), 21 deletions(-)
 delete mode 100644 data_structures/read_group.yml
 rename data_structures/{ => test}/flag_filter.yaml (79%)
 create mode 100644 data_structures/test/read_group.yaml
 rename tools/{ => test}/picard.yaml (100%)
 rename tools/{ => test}/samtools.yaml (96%)

diff --git a/data_structures/read_group.yml b/data_structures/read_group.yml
deleted file mode 100644
index c92e5c2b0..000000000
--- a/data_structures/read_group.yml
+++ /dev/null
@@ -1,13 +0,0 @@
-# Note this file has the extension `.yml` while other tests end with `.yaml`.
-# This is an intentional test that both extensions work.
-read_group_to_string:
-  - name: bad_id
-    inputs:
-      read_group:
-        - ID: id,
-          SM: sample_a
-          LB: library
-    assertions:
-      exit_code: 1
-      stdout:
-        contains: ID (id) must not match pattern
diff --git a/data_structures/flag_filter.yaml b/data_structures/test/flag_filter.yaml
similarity index 79%
rename from data_structures/flag_filter.yaml
rename to data_structures/test/flag_filter.yaml
index 1a667700a..4da14bcdd 100644
--- a/data_structures/flag_filter.yaml
+++ b/data_structures/test/flag_filter.yaml
@@ -8,8 +8,8 @@ validate_string_is_12bit_int:
       number:
         - "0x900"
     assertions:
-      stdout:
-        contains: Input number (0x900) is valid
+      stderr:
+        - Input number \(.*\) is valid
   - name: too_big_hexadecimal_fails
     inputs:
       number:
@@ -17,7 +17,7 @@ validate_string_is_12bit_int:
     assertions:
       exit_code: 42
       stderr:
-        contains: Input number (0x1000) is invalid
+        - Input number \(.*\) is invalid
   - name: too_big_decimal_fails
     inputs:
       number:
@@ -25,9 +25,8 @@ validate_string_is_12bit_int:
     assertions:
       exit_code: 42
       stderr:
-        contains:
-          - Input number (4096) interpreted as decimal
-          - But number must be less than 4096!
+        - Input number \(.*\) interpreted as decimal
+        - But number must be less than 4096!
 validate_flag_filter:
   - name: valid_FlagFilter_passes
     inputs:
diff --git a/data_structures/test/read_group.yaml b/data_structures/test/read_group.yaml
new file mode 100644
index 000000000..a4367d8af
--- /dev/null
+++ b/data_structures/test/read_group.yaml
@@ -0,0 +1,11 @@
+read_group_to_string:
+  - name: bad_id
+    inputs:
+      read_group:
+        - ID: id,
+          SM: sample_a
+          LB: library
+    assertions:
+      exit_code: 1
+      stdout:
+        - ID (.*) must not match pattern
diff --git a/tools/picard.yaml b/tools/test/picard.yaml
similarity index 100%
rename from tools/picard.yaml
rename to tools/test/picard.yaml
diff --git a/tools/samtools.yaml b/tools/test/samtools.yaml
similarity index 96%
rename from tools/samtools.yaml
rename to tools/test/samtools.yaml
index 2f75fdb52..feca44f2d 100644
--- a/tools/samtools.yaml
+++ b/tools/test/samtools.yaml
@@ -3,7 +3,6 @@ bam_to_fastq:
     inputs:
       bam:
         - Aligned.sortedByCoord.chr9_chr22.bam
-        - test.bam
         - test_rnaseq_variant.bam
         - test.bwa_aln_pe.chrY_chrM.bam
       bitwise_filter:
@@ -36,6 +35,5 @@ bam_to_fastq:
         - false
       bam:
         - Aligned.sortedByCoord.chr9_chr22.bam
-        - test.bam
         - test_rnaseq_variant.bam
         - test.bwa_aln_pe.chrY_chrM.bam

From 881fa7aa7a53eaf0c99062337d6244fe93f5d5c9 Mon Sep 17 00:00:00 2001
From: Ari Frantz <ari.frantz@stjude.org>
Date: Sun, 28 Dec 2025 08:46:42 -0500
Subject: [PATCH 13/47] delete pytest folder

---
 .../input_json/get_read_groups.json           |   3 -
 .../input_json/read_group_bad_id.json         |  18 --
 .../input_json/read_group_bad_sample.json     |  18 --
 .../input_json/read_group_good.json           |  18 --
 .../input_json/read_group_missing_sample.json |   8 -
 .../read_group_sample_with_space.json         |  18 --
 tests/data_structures/test_flag_filter.yaml   |  74 --------
 tests/data_structures/test_read_group.yaml    |  51 ------
 tests/tools/input_json/arriba.json            |   9 -
 tests/tools/input_json/bwa_aln.json           |   5 -
 tests/tools/input_json/bwa_aln_pe.json        |   6 -
 tests/tools/input_json/bwa_mem.json           |   5 -
 tests/tools/input_json/calc_tpm.json          |   4 -
 tests/tools/input_json/gatk4_apply_bqsr.json  |   5 -
 .../input_json/gatk4_base_recalibrator.json   |  15 --
 .../input_json/gatk4_haplotype_caller.json    |  10 --
 .../input_json/gatk4_split_n_cigar_reads.json |   8 -
 .../input_json/gatk4_variant_filtration.json  |   7 -
 tests/tools/input_json/kraken_build_db.json   |   6 -
 .../kraken_create_library_from_fastas.json    |   5 -
 tests/tools/input_json/multiqc.json           |   6 -
 tests/tools/input_json/multiqc_empty.json     |   4 -
 .../tools/input_json/ngsderive_encoding.json  |   6 -
 .../input_json/picard_merge_sam_files.json    |   7 -
 tests/tools/input_json/picard_merge_vcfs.json |  11 --
 tests/tools/input_json/sambamba_merge.json    |   7 -
 tests/tools/input_json/samtools_merge.json    |   7 -
 .../tools/input_json/samtools_subsample.json  |   4 -
 .../input_json/star_alignment_PE_1_pair.json  |  13 --
 .../input_json/star_alignment_PE_2_pairs.json |  16 --
 tests/tools/input_json/star_alignment_SE.json |  10 --
 .../input_json/util_add_to_bam_header.json    |   4 -
 tests/tools/input_json/util_download.json     |   6 -
 tests/tools/input_json/util_split_string.json |   4 -
 tests/tools/test_arriba.yaml                  |  40 -----
 tests/tools/test_bwa.yaml                     |  40 -----
 tests/tools/test_deeptools.yaml               |   9 -
 tests/tools/test_fastp.yaml                   |  12 --
 tests/tools/test_fastqc.yaml                  |  10 --
 tests/tools/test_fq.yaml                      |  27 ---
 tests/tools/test_gatk4.yaml                   |  65 -------
 tests/tools/test_htseq.yaml                   |  19 --
 tests/tools/test_kraken2.yaml                 |  55 ------
 tests/tools/test_librarian.yaml               |  10 --
 tests/tools/test_md5sum.yaml                  |  11 --
 tests/tools/test_mosdepth.yaml                |  10 --
 tests/tools/test_multiqc.yaml                 |  20 ---
 tests/tools/test_ngsderive.yaml               |  68 --------
 tests/tools/test_picard.yaml                  | 163 ------------------
 tests/tools/test_qualimap.yaml                |  11 --
 tests/tools/test_sambamba.yaml                |  51 ------
 tests/tools/test_samtools.py                  |  53 ------
 tests/tools/test_samtools.yaml                | 125 --------------
 tests/tools/test_star.yaml                    |  43 -----
 tests/tools/test_util.yaml                    |  94 ----------
 .../_test_methylation-preprocess.yaml         |  15 --
 tests/workflows/input_json/combine_data.json  |   7 -
 .../input_json/dnaseq-standard-fastq.json     |  18 --
 .../workflows/input_json/dnaseq-standard.json |   4 -
 tests/workflows/input_json/filter_probes.json |   4 -
 tests/workflows/input_json/generate_umap.json |   3 -
 tests/workflows/input_json/plot_umap.json     |   3 -
 .../input_json/process_raw_idats.json         |   6 -
 tests/workflows/input_json/qc-standard.json   |  10 --
 .../input_json/rnaseq-standard-fastq.json     |  21 ---
 .../workflows/input_json/rnaseq-standard.json |   6 -
 .../input_json/rnaseq-variant-calling.json    |  16 --
 tests/workflows/test_methylation-cohort.yaml  |  40 -----
 68 files changed, 1487 deletions(-)
 delete mode 100644 tests/data_structures/input_json/get_read_groups.json
 delete mode 100644 tests/data_structures/input_json/read_group_bad_id.json
 delete mode 100644 tests/data_structures/input_json/read_group_bad_sample.json
 delete mode 100644 tests/data_structures/input_json/read_group_good.json
 delete mode 100644 tests/data_structures/input_json/read_group_missing_sample.json
 delete mode 100644 tests/data_structures/input_json/read_group_sample_with_space.json
 delete mode 100644 tests/data_structures/test_flag_filter.yaml
 delete mode 100644 tests/data_structures/test_read_group.yaml
 delete mode 100644 tests/tools/input_json/arriba.json
 delete mode 100644 tests/tools/input_json/bwa_aln.json
 delete mode 100644 tests/tools/input_json/bwa_aln_pe.json
 delete mode 100644 tests/tools/input_json/bwa_mem.json
 delete mode 100644 tests/tools/input_json/calc_tpm.json
 delete mode 100644 tests/tools/input_json/gatk4_apply_bqsr.json
 delete mode 100644 tests/tools/input_json/gatk4_base_recalibrator.json
 delete mode 100644 tests/tools/input_json/gatk4_haplotype_caller.json
 delete mode 100644 tests/tools/input_json/gatk4_split_n_cigar_reads.json
 delete mode 100644 tests/tools/input_json/gatk4_variant_filtration.json
 delete mode 100644 tests/tools/input_json/kraken_build_db.json
 delete mode 100644 tests/tools/input_json/kraken_create_library_from_fastas.json
 delete mode 100644 tests/tools/input_json/multiqc.json
 delete mode 100644 tests/tools/input_json/multiqc_empty.json
 delete mode 100644 tests/tools/input_json/ngsderive_encoding.json
 delete mode 100644 tests/tools/input_json/picard_merge_sam_files.json
 delete mode 100644 tests/tools/input_json/picard_merge_vcfs.json
 delete mode 100644 tests/tools/input_json/sambamba_merge.json
 delete mode 100644 tests/tools/input_json/samtools_merge.json
 delete mode 100644 tests/tools/input_json/samtools_subsample.json
 delete mode 100644 tests/tools/input_json/star_alignment_PE_1_pair.json
 delete mode 100644 tests/tools/input_json/star_alignment_PE_2_pairs.json
 delete mode 100644 tests/tools/input_json/star_alignment_SE.json
 delete mode 100644 tests/tools/input_json/util_add_to_bam_header.json
 delete mode 100644 tests/tools/input_json/util_download.json
 delete mode 100644 tests/tools/input_json/util_split_string.json
 delete mode 100644 tests/tools/test_arriba.yaml
 delete mode 100644 tests/tools/test_bwa.yaml
 delete mode 100644 tests/tools/test_deeptools.yaml
 delete mode 100644 tests/tools/test_fastp.yaml
 delete mode 100644 tests/tools/test_fastqc.yaml
 delete mode 100644 tests/tools/test_fq.yaml
 delete mode 100644 tests/tools/test_gatk4.yaml
 delete mode 100644 tests/tools/test_htseq.yaml
 delete mode 100644 tests/tools/test_kraken2.yaml
 delete mode 100644 tests/tools/test_librarian.yaml
 delete mode 100644 tests/tools/test_md5sum.yaml
 delete mode 100644 tests/tools/test_mosdepth.yaml
 delete mode 100644 tests/tools/test_multiqc.yaml
 delete mode 100644 tests/tools/test_ngsderive.yaml
 delete mode 100644 tests/tools/test_picard.yaml
 delete mode 100644 tests/tools/test_qualimap.yaml
 delete mode 100644 tests/tools/test_sambamba.yaml
 delete mode 100644 tests/tools/test_samtools.py
 delete mode 100644 tests/tools/test_samtools.yaml
 delete mode 100644 tests/tools/test_star.yaml
 delete mode 100644 tests/tools/test_util.yaml
 delete mode 100644 tests/workflows/_test_methylation-preprocess.yaml
 delete mode 100644 tests/workflows/input_json/combine_data.json
 delete mode 100644 tests/workflows/input_json/dnaseq-standard-fastq.json
 delete mode 100644 tests/workflows/input_json/dnaseq-standard.json
 delete mode 100644 tests/workflows/input_json/filter_probes.json
 delete mode 100644 tests/workflows/input_json/generate_umap.json
 delete mode 100644 tests/workflows/input_json/plot_umap.json
 delete mode 100644 tests/workflows/input_json/process_raw_idats.json
 delete mode 100644 tests/workflows/input_json/qc-standard.json
 delete mode 100644 tests/workflows/input_json/rnaseq-standard-fastq.json
 delete mode 100644 tests/workflows/input_json/rnaseq-standard.json
 delete mode 100644 tests/workflows/input_json/rnaseq-variant-calling.json
 delete mode 100644 tests/workflows/test_methylation-cohort.yaml

diff --git a/tests/data_structures/input_json/get_read_groups.json b/tests/data_structures/input_json/get_read_groups.json
deleted file mode 100644
index 9b3f1bbc7..000000000
--- a/tests/data_structures/input_json/get_read_groups.json
+++ /dev/null
@@ -1,3 +0,0 @@
-{
-    "get_read_groups.bam": "https://github.com/stjude/CICERO/raw/master/test/data/input/test.bam"
-}
\ No newline at end of file
diff --git a/tests/data_structures/input_json/read_group_bad_id.json b/tests/data_structures/input_json/read_group_bad_id.json
deleted file mode 100644
index 41d05152c..000000000
--- a/tests/data_structures/input_json/read_group_bad_id.json
+++ /dev/null
@@ -1,18 +0,0 @@
-{
-    "read_group_to_string.read_group":{
-        "ID": "id",
-        "SM": "sample_a",
-        "LB": "library",
-        "BC": "barcode",
-        "PU": "platform_unit",
-        "PL": "ILLUMINA",
-        "CN": "center_name",
-        "DT": "date",
-        "DS": "description",
-        "PI": 1,
-        "PG": "program_group",
-        "PM": "platform_model",
-        "FO": "ACMG",
-        "KS": "key_sequence"
-    }
-}
\ No newline at end of file
diff --git a/tests/data_structures/input_json/read_group_bad_sample.json b/tests/data_structures/input_json/read_group_bad_sample.json
deleted file mode 100644
index 452d17b8d..000000000
--- a/tests/data_structures/input_json/read_group_bad_sample.json
+++ /dev/null
@@ -1,18 +0,0 @@
-{
-    "read_group_to_string.read_group":{
-        "ID": "R123",
-        "SM": "sample1",
-        "LB": "library",
-        "BC": "barcode",
-        "PU": "platform_unit",
-        "PL": "ILLUMINA",
-        "CN": "center_name",
-        "DT": "date",
-        "DS": "description",
-        "PI": 1,
-        "PG": "program_group",
-        "PM": "platform_model",
-        "FO": "ACMG",
-        "KS": "key_sequence"
-    }
-}
\ No newline at end of file
diff --git a/tests/data_structures/input_json/read_group_good.json b/tests/data_structures/input_json/read_group_good.json
deleted file mode 100644
index 81753f65d..000000000
--- a/tests/data_structures/input_json/read_group_good.json
+++ /dev/null
@@ -1,18 +0,0 @@
-{
-    "read_group_to_string.read_group":{
-        "ID": "R123",
-        "SM": "IPSC-1",
-        "LB": "library",
-        "BC": "barcode",
-        "PU": "platform_unit",
-        "PL": "ILLUMINA",
-        "CN": "center_name",
-        "DT": "date",
-        "DS": "description",
-        "PI": 1,
-        "PG": "program_group",
-        "PM": "platform_model",
-        "FO": "ACMG",
-        "KS": "key_sequence"
-    }
-}
\ No newline at end of file
diff --git a/tests/data_structures/input_json/read_group_missing_sample.json b/tests/data_structures/input_json/read_group_missing_sample.json
deleted file mode 100644
index 8a23be66a..000000000
--- a/tests/data_structures/input_json/read_group_missing_sample.json
+++ /dev/null
@@ -1,8 +0,0 @@
-{
-    "read_group_to_string.read_group":{
-        "ID": "id"
-    },
-    "read_group_to_string.required_fields": [
-        "SM"
-    ]
-}
\ No newline at end of file
diff --git a/tests/data_structures/input_json/read_group_sample_with_space.json b/tests/data_structures/input_json/read_group_sample_with_space.json
deleted file mode 100644
index f0c7d88d0..000000000
--- a/tests/data_structures/input_json/read_group_sample_with_space.json
+++ /dev/null
@@ -1,18 +0,0 @@
-{
-    "read_group_to_string.read_group": {
-        "ID": "myID",
-        "SM": "sample a",
-        "LB": "library",
-        "BC": "barcode",
-        "PU": "platform_unit",
-        "PL": "ILLUMINA",
-        "CN": "center_name",
-        "DT": "date",
-        "DS": "description",
-        "PI": 1,
-        "PG": "program_group",
-        "PM": "platform_model",
-        "FO": "ACMG",
-        "KS": "key_sequence"
-    }
-}
\ No newline at end of file
diff --git a/tests/data_structures/test_flag_filter.yaml b/tests/data_structures/test_flag_filter.yaml
deleted file mode 100644
index 20a1fb4d5..000000000
--- a/tests/data_structures/test_flag_filter.yaml
+++ /dev/null
@@ -1,74 +0,0 @@
-- name: flag_filter_0x900
-  tags:
-    - flag_filter
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t validate_string_is_12bit_int data_structures/flag_filter.wdl number="0x900"
-
-- name: flag_filter_5
-  tags:
-    - flag_filter
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t validate_string_is_12bit_int data_structures/flag_filter.wdl number="5"
-
-- name: flag_filter_01
-  tags:
-    - flag_filter
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t validate_string_is_12bit_int data_structures/flag_filter.wdl number="01"
-
-- name: flag_filter_0x1000
-  tags:
-    - flag_filter
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t validate_string_is_12bit_int data_structures/flag_filter.wdl number="0x1000"
-  exit_code: 1
-  stderr:
-    contains:
-    - "Input number (0x1000) is invalid"
-
-- name: flag_filter_neg1
-  tags:
-    - flag_filter
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t validate_string_is_12bit_int data_structures/flag_filter.wdl number="-1"
-  exit_code: 1
-  stderr:
-    contains:
-    - "Input number (-1) is invalid"
-
-- name: flag_filter_4096
-  tags:
-    - flag_filter
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t validate_string_is_12bit_int data_structures/flag_filter.wdl number="4096"
-  exit_code: 1
-  stderr:
-    contains:
-    - "Input number (4096) interpreted as decimal"
-    - "But number must be less than 4096!"
-
-- name: flag_filter_4095
-  tags:
-    - flag_filter
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t validate_string_is_12bit_int data_structures/flag_filter.wdl number="4095"
-
-- name: flag_filter_string
-  tags:
-    - flag_filter
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t validate_string_is_12bit_int data_structures/flag_filter.wdl number="string"
-  exit_code: 1
-  stderr:
-    contains:
-    - "Input number (string) is invalid"
-
-- name: flag_filter_empty_string
-  tags:
-    - flag_filter
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t validate_string_is_12bit_int data_structures/flag_filter.wdl number=""
-  exit_code: 1
-  stderr:
-    contains:
-    - "Input number () is invalid"
diff --git a/tests/data_structures/test_read_group.yaml b/tests/data_structures/test_read_group.yaml
deleted file mode 100644
index 41d09e543..000000000
--- a/tests/data_structures/test_read_group.yaml
+++ /dev/null
@@ -1,51 +0,0 @@
-- name: read_group_bad_id
-  tags:
-    - read_group
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -w read_group_to_string -i tests/data_structures/input_json/read_group_bad_id.json data_structures/read_group.wdl
-  exit_code: 1
-  stderr:
-    contains:
-      - "ID (id) must not match pattern"
-
-- name: read_group_bad_sample
-  tags:
-    - read_group
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -w read_group_to_string -i tests/data_structures/input_json/read_group_bad_sample.json data_structures/read_group.wdl
-  exit_code: 1
-  stderr:
-    contains:
-      - "SM must not match pattern"
-
-- name: read_group_good
-  tags:
-    - read_group
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -w read_group_to_string -i tests/data_structures/input_json/read_group_good.json data_structures/read_group.wdl
-
-- name: read_group_missing_sample
-  tags:
-    - read_group
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -w read_group_to_string -i tests/data_structures/input_json/read_group_missing_sample.json data_structures/read_group.wdl
-  exit_code: 1
-  stderr:
-    contains:
-      - "SM is required"
-
-- name: read_group_sample_with_space
-  tags:
-    - read_group
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -w read_group_to_string -i tests/data_structures/input_json/read_group_sample_with_space.json data_structures/read_group.wdl
-  exit_code: 1
-  stderr:
-    contains:
-      - "SM must not match pattern"
-
-- name: get_read_groups
-  tags:
-    - read_group
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t get_read_groups -i tests/data_structures/input_json/get_read_groups.json data_structures/read_group.wdl
\ No newline at end of file
diff --git a/tests/tools/input_json/arriba.json b/tests/tools/input_json/arriba.json
deleted file mode 100644
index 0721bbd93..000000000
--- a/tests/tools/input_json/arriba.json
+++ /dev/null
@@ -1,9 +0,0 @@
-{
-    "arriba.bam": "../../input/Aligned.sortedByCoord.chr9_chr22.bam",
-    "arriba.gtf": "../../input/gencode.v31.chr9_chr22.gtf.gz",
-    "arriba.reference_fasta_gz": "../../input/GRCh38.chr9_chr22.fa.gz",
-    "arriba.disable_filters": [
-        "blacklist"
-    ],
-    "arriba.prefix": "fusions"
-}
\ No newline at end of file
diff --git a/tests/tools/input_json/bwa_aln.json b/tests/tools/input_json/bwa_aln.json
deleted file mode 100644
index 3b59db584..000000000
--- a/tests/tools/input_json/bwa_aln.json
+++ /dev/null
@@ -1,5 +0,0 @@
-{
-    "bwa_aln.fastq": "../../input/test_R1.fq.gz",
-    "bwa_aln.read_group": "@RG\\tID:test\\tSM:test",
-    "bwa_aln.bwa_db_tar_gz": "../../input/GRCh38.chrY_chrM.bwa_db.tar.gz"
-}
\ No newline at end of file
diff --git a/tests/tools/input_json/bwa_aln_pe.json b/tests/tools/input_json/bwa_aln_pe.json
deleted file mode 100644
index f878cb118..000000000
--- a/tests/tools/input_json/bwa_aln_pe.json
+++ /dev/null
@@ -1,6 +0,0 @@
-{
-    "bwa_aln_pe.read_one_fastq_gz": "../../input/test_R1.fq.gz",
-    "bwa_aln_pe.read_two_fastq_gz": "../../input/test_R2.fq.gz",
-    "bwa_aln_pe.read_group": "@RG\\tID:test\\tSM:test",
-    "bwa_aln_pe.bwa_db_tar_gz": "../../input/GRCh38.chrY_chrM.bwa_db.tar.gz"
-}
\ No newline at end of file
diff --git a/tests/tools/input_json/bwa_mem.json b/tests/tools/input_json/bwa_mem.json
deleted file mode 100644
index 4563e2b8b..000000000
--- a/tests/tools/input_json/bwa_mem.json
+++ /dev/null
@@ -1,5 +0,0 @@
-{
-    "bwa_mem.read_one_fastq_gz": "../../input/test_R1.fq.gz",
-    "bwa_mem.read_group": "@RG\\tID:test\\tSM:test",
-    "bwa_mem.bwa_db_tar_gz": "../../input/GRCh38.chrY_chrM.bwa_db.tar.gz"
-}
\ No newline at end of file
diff --git a/tests/tools/input_json/calc_tpm.json b/tests/tools/input_json/calc_tpm.json
deleted file mode 100644
index d4c9fc03f..000000000
--- a/tests/tools/input_json/calc_tpm.json
+++ /dev/null
@@ -1,4 +0,0 @@
-{
-    "calc_tpm.counts": "../../input/test.bwa_aln_pe.chrY_chrM.feature-counts.txt",
-    "calc_tpm.feature_lengths": "../../input/gencode.v31.chrY_chrM.genelengths.txt"
-}
\ No newline at end of file
diff --git a/tests/tools/input_json/gatk4_apply_bqsr.json b/tests/tools/input_json/gatk4_apply_bqsr.json
deleted file mode 100644
index d73c2a3eb..000000000
--- a/tests/tools/input_json/gatk4_apply_bqsr.json
+++ /dev/null
@@ -1,5 +0,0 @@
-{
-    "apply_bqsr.bam": "../../input/test_rnaseq_variant.bam",
-    "apply_bqsr.bam_index": "../../input/test_rnaseq_variant.bam.bai",
-    "apply_bqsr.recalibration_report": "../../input/test_rnaseq_variant.recal.txt"
-}
\ No newline at end of file
diff --git a/tests/tools/input_json/gatk4_base_recalibrator.json b/tests/tools/input_json/gatk4_base_recalibrator.json
deleted file mode 100644
index e63e309b2..000000000
--- a/tests/tools/input_json/gatk4_base_recalibrator.json
+++ /dev/null
@@ -1,15 +0,0 @@
-{
-    "base_recalibrator.bam": "../../input/test_rnaseq_variant.bam",
-    "base_recalibrator.bam_index": "../../input/test_rnaseq_variant.bam.bai",
-    "base_recalibrator.fasta": "../../input/GRCh38.chr1_chr19.fa",
-    "base_recalibrator.fasta_index": "../../input/GRCh38.chr1_chr19.fa.fai",
-    "base_recalibrator.dict": "../../input/GRCh38.chr1_chr19.dict",
-    "base_recalibrator.dbSNP_vcf": "../../input/Homo_sapiens_assembly38.dbsnp138.top5000.vcf",
-    "base_recalibrator.dbSNP_vcf_index": "../../input/Homo_sapiens_assembly38.dbsnp138.top5000.vcf.idx",
-    "base_recalibrator.known_indels_sites_vcfs": [
-        "../../input/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz"
-    ],
-    "base_recalibrator.known_indels_sites_indices": [
-        "../../input/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi"
-    ]
-}
\ No newline at end of file
diff --git a/tests/tools/input_json/gatk4_haplotype_caller.json b/tests/tools/input_json/gatk4_haplotype_caller.json
deleted file mode 100644
index 653fb5714..000000000
--- a/tests/tools/input_json/gatk4_haplotype_caller.json
+++ /dev/null
@@ -1,10 +0,0 @@
-{
-    "haplotype_caller.bam": "../../input/test_rnaseq_variant.bam",
-    "haplotype_caller.bam_index": "../../input/test_rnaseq_variant.bam.bai",
-    "haplotype_caller.fasta": "../../input/GRCh38.chr1_chr19.fa",
-    "haplotype_caller.fasta_index": "../../input/GRCh38.chr1_chr19.fa.fai",
-    "haplotype_caller.dict": "../../input/GRCh38.chr1_chr19.dict",
-    "haplotype_caller.dbSNP_vcf": "../../input/Homo_sapiens_assembly38.dbsnp138.top5000.vcf",
-    "haplotype_caller.dbSNP_vcf_index": "../../input/Homo_sapiens_assembly38.dbsnp138.top5000.vcf.idx",
-    "haplotype_caller.interval_list": "../../input/chr1_chr19.interval_list"
-}
\ No newline at end of file
diff --git a/tests/tools/input_json/gatk4_split_n_cigar_reads.json b/tests/tools/input_json/gatk4_split_n_cigar_reads.json
deleted file mode 100644
index e547714cf..000000000
--- a/tests/tools/input_json/gatk4_split_n_cigar_reads.json
+++ /dev/null
@@ -1,8 +0,0 @@
-{
-    "split_n_cigar_reads.bam": "../../input/test.bam",
-    "split_n_cigar_reads.bam_index": "../../input/test.bam.bai",
-    "split_n_cigar_reads.fasta": "../../input/GRCh38.chr1_chr19.fa",
-    "split_n_cigar_reads.fasta_index": "../../input/GRCh38.chr1_chr19.fa.fai",
-    "split_n_cigar_reads.dict": "../../input/GRCh38.chr1_chr19.dict",
-    "split_n_cigar_reads.prefix": "split"
-}
\ No newline at end of file
diff --git a/tests/tools/input_json/gatk4_variant_filtration.json b/tests/tools/input_json/gatk4_variant_filtration.json
deleted file mode 100644
index ba6d698da..000000000
--- a/tests/tools/input_json/gatk4_variant_filtration.json
+++ /dev/null
@@ -1,7 +0,0 @@
-{
-    "variant_filtration.vcf": "../../input/test1.vcf.gz",
-    "variant_filtration.vcf_index": "../../input/test1.vcf.gz.tbi",
-    "variant_filtration.fasta": "../../input/GRCh38.chr1_chr19.fa",
-    "variant_filtration.fasta_index": "../../input/GRCh38.chr1_chr19.fa.fai",
-    "variant_filtration.dict": "../../input/GRCh38.chr1_chr19.dict"
-}
\ No newline at end of file
diff --git a/tests/tools/input_json/kraken_build_db.json b/tests/tools/input_json/kraken_build_db.json
deleted file mode 100644
index d3251ef77..000000000
--- a/tests/tools/input_json/kraken_build_db.json
+++ /dev/null
@@ -1,6 +0,0 @@
-{
-    "build_db.tarballs": [
-        "../../input/kraken2_taxonomy.tar.gz",
-        "../../input/kraken2_C_elegans_library.tar.gz"
-    ]
-}
\ No newline at end of file
diff --git a/tests/tools/input_json/kraken_create_library_from_fastas.json b/tests/tools/input_json/kraken_create_library_from_fastas.json
deleted file mode 100644
index 1d15046d6..000000000
--- a/tests/tools/input_json/kraken_create_library_from_fastas.json
+++ /dev/null
@@ -1,5 +0,0 @@
-{
-    "create_library_from_fastas.fastas_gz": [
-        "https://ftp.ncbi.nlm.nih.gov/genomes/refseq/invertebrate/Caenorhabditis_elegans/reference/GCF_000002985.6_WBcel235/GCF_000002985.6_WBcel235_genomic.fna.gz"
-    ]
-}
\ No newline at end of file
diff --git a/tests/tools/input_json/multiqc.json b/tests/tools/input_json/multiqc.json
deleted file mode 100644
index 556d7111f..000000000
--- a/tests/tools/input_json/multiqc.json
+++ /dev/null
@@ -1,6 +0,0 @@
-{
-    "multiqc.files": [
-        "../../input/test.bwa_aln_pe.chrY_chrM.readlength.txt"
-    ],
-    "multiqc.report_name": "test.bwa_aln_pe.chrY_chrM.multiqc"
-}
\ No newline at end of file
diff --git a/tests/tools/input_json/multiqc_empty.json b/tests/tools/input_json/multiqc_empty.json
deleted file mode 100644
index f2fbc2d6c..000000000
--- a/tests/tools/input_json/multiqc_empty.json
+++ /dev/null
@@ -1,4 +0,0 @@
-{
-    "multiqc.files": [],
-    "multiqc.report_name": "empty"
-}
\ No newline at end of file
diff --git a/tests/tools/input_json/ngsderive_encoding.json b/tests/tools/input_json/ngsderive_encoding.json
deleted file mode 100644
index 3b4d25fd5..000000000
--- a/tests/tools/input_json/ngsderive_encoding.json
+++ /dev/null
@@ -1,6 +0,0 @@
-{
-    "encoding.ngs_files": [
-        "../../input/test.bwa_aln_pe.chrY_chrM.bam"
-    ],
-    "encoding.outfile_name": "test.bwa_aln_pe.chrY_chrM.encoding.tsv"
-}
\ No newline at end of file
diff --git a/tests/tools/input_json/picard_merge_sam_files.json b/tests/tools/input_json/picard_merge_sam_files.json
deleted file mode 100644
index 59448d62d..000000000
--- a/tests/tools/input_json/picard_merge_sam_files.json
+++ /dev/null
@@ -1,7 +0,0 @@
-{
-    "merge_sam_files.bams": [
-        "../../input/test.bwa_aln_pe.chrY_chrM.bam",
-        "../../input/test.PE.2_RGs.Aligned.out.sorted.bam"
-    ],
-    "merge_sam_files.prefix": "test.merged"
-}
\ No newline at end of file
diff --git a/tests/tools/input_json/picard_merge_vcfs.json b/tests/tools/input_json/picard_merge_vcfs.json
deleted file mode 100644
index 729faa020..000000000
--- a/tests/tools/input_json/picard_merge_vcfs.json
+++ /dev/null
@@ -1,11 +0,0 @@
-{
-    "merge_vcfs.vcfs": [
-        "../../input/test1.vcf.gz",
-        "../../input/test2.vcf.gz"
-    ],
-    "merge_vcfs.vcfs_indexes": [
-        "../../input/test1.vcf.gz.tbi",
-        "../../input/test2.vcf.gz.tbi"
-    ],
-    "merge_vcfs.output_vcf_name": "test.vcf.gz"
-}
\ No newline at end of file
diff --git a/tests/tools/input_json/sambamba_merge.json b/tests/tools/input_json/sambamba_merge.json
deleted file mode 100644
index 2b8c66b13..000000000
--- a/tests/tools/input_json/sambamba_merge.json
+++ /dev/null
@@ -1,7 +0,0 @@
-{
-    "merge.bams": [
-        "../../input/test.bwa_aln_pe.chrY_chrM.bam",
-        "../../input/test.PE.2_RGs.Aligned.out.sorted.bam"
-    ],
-    "merge.prefix": "test.merged"
-}
\ No newline at end of file
diff --git a/tests/tools/input_json/samtools_merge.json b/tests/tools/input_json/samtools_merge.json
deleted file mode 100644
index 2b8c66b13..000000000
--- a/tests/tools/input_json/samtools_merge.json
+++ /dev/null
@@ -1,7 +0,0 @@
-{
-    "merge.bams": [
-        "../../input/test.bwa_aln_pe.chrY_chrM.bam",
-        "../../input/test.PE.2_RGs.Aligned.out.sorted.bam"
-    ],
-    "merge.prefix": "test.merged"
-}
\ No newline at end of file
diff --git a/tests/tools/input_json/samtools_subsample.json b/tests/tools/input_json/samtools_subsample.json
deleted file mode 100644
index 05e5fe4b5..000000000
--- a/tests/tools/input_json/samtools_subsample.json
+++ /dev/null
@@ -1,4 +0,0 @@
-{
-    "subsample.bam": "https://github.com/stjude/CICERO/raw/master/test/data/input/test.bam",
-    "subsample.desired_reads": 100
-}
\ No newline at end of file
diff --git a/tests/tools/input_json/star_alignment_PE_1_pair.json b/tests/tools/input_json/star_alignment_PE_1_pair.json
deleted file mode 100644
index befd948bd..000000000
--- a/tests/tools/input_json/star_alignment_PE_1_pair.json
+++ /dev/null
@@ -1,13 +0,0 @@
-{
-    "alignment.read_one_fastqs_gz": [
-        "../../input/test_R1.fq.gz"
-    ],
-    "alignment.read_two_fastqs_gz": [
-        "../../input/test_R2.fq.gz"
-    ],
-    "alignment.star_db_tar_gz": "../../input/star_db.chrY_chrM.tar.gz",
-    "alignment.prefix": "test",
-    "alignment.read_groups": [
-        "ID:test SM:test PL:ILLUMINA"
-    ]
-}
\ No newline at end of file
diff --git a/tests/tools/input_json/star_alignment_PE_2_pairs.json b/tests/tools/input_json/star_alignment_PE_2_pairs.json
deleted file mode 100644
index 45e479445..000000000
--- a/tests/tools/input_json/star_alignment_PE_2_pairs.json
+++ /dev/null
@@ -1,16 +0,0 @@
-{
-    "alignment.read_one_fastqs_gz": [
-        "../../input/test_R1.fq.gz",
-        "../../input/random10k.r1.fq.gz"
-    ],
-    "alignment.read_two_fastqs_gz": [
-        "../../input/test_R2.fq.gz",
-        "../../input/random10k.r2.fq.gz"
-    ],
-    "alignment.star_db_tar_gz": "../../input/star_db.chrY_chrM.tar.gz",
-    "alignment.prefix": "test",
-    "alignment.read_groups": [
-        "ID:test SM:test PL:ILLUMINA",
-        "ID:random"
-    ]
-}
\ No newline at end of file
diff --git a/tests/tools/input_json/star_alignment_SE.json b/tests/tools/input_json/star_alignment_SE.json
deleted file mode 100644
index c30d29281..000000000
--- a/tests/tools/input_json/star_alignment_SE.json
+++ /dev/null
@@ -1,10 +0,0 @@
-{
-    "alignment.read_one_fastqs_gz": [
-        "../../input/test_R1.fq.gz"
-    ],
-    "alignment.prefix": "test",
-    "alignment.read_groups": [
-        "ID:foo"
-    ],
-    "alignment.star_db_tar_gz": "../../input/star_db.chrY_chrM.tar.gz"
-}
\ No newline at end of file
diff --git a/tests/tools/input_json/util_add_to_bam_header.json b/tests/tools/input_json/util_add_to_bam_header.json
deleted file mode 100644
index 71f5e7960..000000000
--- a/tests/tools/input_json/util_add_to_bam_header.json
+++ /dev/null
@@ -1,4 +0,0 @@
-{
-    "add_to_bam_header.bam": "../../input/test.bwa_aln_pe.chrY_chrM.bam",
-    "add_to_bam_header.additional_header": "@RG\tID:3"
-}
\ No newline at end of file
diff --git a/tests/tools/input_json/util_download.json b/tests/tools/input_json/util_download.json
deleted file mode 100644
index 16dde86b1..000000000
--- a/tests/tools/input_json/util_download.json
+++ /dev/null
@@ -1,6 +0,0 @@
-{
-    "download.url": "https://raw.githubusercontent.com/stjudecloud/workflows/main/LICENSE.md",
-    "download.outfile_name": "license.txt",
-    "download.md5sum": "cf3575bd84ab3151c7e9700b5f1a9746",
-    "download.disk_size_gb": 1
-}
\ No newline at end of file
diff --git a/tests/tools/input_json/util_split_string.json b/tests/tools/input_json/util_split_string.json
deleted file mode 100644
index 68e3d245b..000000000
--- a/tests/tools/input_json/util_split_string.json
+++ /dev/null
@@ -1,4 +0,0 @@
-{
-    "split_string.string": "rg1 , rg2",
-    "split_string.delimiter": " , "
-}
\ No newline at end of file
diff --git a/tests/tools/test_arriba.yaml b/tests/tools/test_arriba.yaml
deleted file mode 100644
index 883a9a9c7..000000000
--- a/tests/tools/test_arriba.yaml
+++ /dev/null
@@ -1,40 +0,0 @@
-- name: arriba
-  tags:
-    - arriba
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t arriba -i tests/tools/input_json/arriba.json tools/arriba.wdl
-  files:
-    - path: output/outputs.json
-      contains:
-      - fusions.tsv
-      - fusions.discarded.tsv
-
-- name: arriba_tsv_to_vcf
-  tags:
-    - arriba
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t arriba_tsv_to_vcf tools/arriba.wdl fusions="tests/input/fusions.BCR_ABL1.tsv" reference_fasta="tests/input/GRCh38.chr9_chr22.fa.gz" prefix="fusions"
-  files:
-    - path: output/outputs.json
-      contains:
-      - fusions.vcf
-
-- name: arriba_extract_fusion_supporting_alignments
-  tags:
-    - arriba
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t arriba_extract_fusion_supporting_alignments tools/arriba.wdl fusions="tests/input/fusions.BCR_ABL1.tsv" bam="tests/input/Aligned.sortedByCoord.chr9_chr22.bam" bam_index="tests/input/Aligned.sortedByCoord.chr9_chr22.bam.bai" prefix="fusions"
-  files:
-    - path: output/outputs.json
-      contains:
-      - fusions_1.bam
-
-- name: arriba_annotate_exon_numbers
-  tags:
-    - arriba
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t arriba_annotate_exon_numbers tools/arriba.wdl fusions="tests/input/fusions.BCR_ABL1.tsv" gtf="tests/input/gencode.v31.chr9_chr22.gtf.gz" prefix="fusions"
-  files:
-    - path: output/outputs.json
-      contains:
-      - fusions.annotated.tsv
diff --git a/tests/tools/test_bwa.yaml b/tests/tools/test_bwa.yaml
deleted file mode 100644
index 6c38821ef..000000000
--- a/tests/tools/test_bwa.yaml
+++ /dev/null
@@ -1,40 +0,0 @@
-- name: bwa_aln
-  tags:
-    - bwa
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t bwa_aln -i tests/tools/input_json/bwa_aln.json tools/bwa.wdl
-  files:
-    - path: output/outputs.json
-      contains:
-      - test.bam
-
-- name: bwa_aln_pe
-  tags:
-    - bwa
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t bwa_aln_pe -i tests/tools/input_json/bwa_aln_pe.json tools/bwa.wdl
-  files:
-    - path: output/outputs.json
-      contains:
-      - test.bam
-
-- name: bwa_mem
-  tags:
-    - bwa
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t bwa_mem -i tests/tools/input_json/bwa_mem.json tools/bwa.wdl
-  files:
-    - path: output/outputs.json
-      contains:
-      - test.bam
-
-- name: build_bwa_db
-  tags:
-    - bwa
-    - reference
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t build_bwa_db tools/bwa.wdl reference_fasta="tests/input/GRCh38.chrY_chrM.fa"
-  files:
-    - path: output/outputs.json
-      contains:
-      - bwa_db.tar.gz
diff --git a/tests/tools/test_deeptools.yaml b/tests/tools/test_deeptools.yaml
deleted file mode 100644
index 64c671edf..000000000
--- a/tests/tools/test_deeptools.yaml
+++ /dev/null
@@ -1,9 +0,0 @@
-- name: deeptools_bam_coverage
-  tags:
-    - deeptools
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t bam_coverage tools/deeptools.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" bam_index="tests/input/test.bwa_aln_pe.chrY_chrM.bam.bai"
-  files:
-    - path: output/outputs.json
-      contains:
-      - test.bwa_aln_pe.chrY_chrM.bw
diff --git a/tests/tools/test_fastp.yaml b/tests/tools/test_fastp.yaml
deleted file mode 100644
index 006db1087..000000000
--- a/tests/tools/test_fastp.yaml
+++ /dev/null
@@ -1,12 +0,0 @@
-- name: fastp
-  tags:
-    - fastp
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t fastp tools/fastp.wdl read_one_fastq="tests/input/test_R1.fq.gz" read_two_fastq="tests/input/test_R2.fq.gz"
-  files:
-    - path: output/outputs.json
-      contains:
-      - test.trimmed.R1.fastq.gz
-      - test.trimmed.R2.fastq.gz
-      - test.trimmed.fastp.html
-      - test.trimmed.fastp.json
\ No newline at end of file
diff --git a/tests/tools/test_fastqc.yaml b/tests/tools/test_fastqc.yaml
deleted file mode 100644
index 2be254308..000000000
--- a/tests/tools/test_fastqc.yaml
+++ /dev/null
@@ -1,10 +0,0 @@
-- name: fastqc
-  tags:
-    - fastqc
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t fastqc tools/fastqc.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam"
-  files:
-    - path: output/outputs.json
-      contains:
-      - test.bwa_aln_pe.chrY_chrM_fastqc.zip
-      - test.bwa_aln_pe.chrY_chrM.fastqc_results.tar.gz
diff --git a/tests/tools/test_fq.yaml b/tests/tools/test_fq.yaml
deleted file mode 100644
index dae76f1bc..000000000
--- a/tests/tools/test_fq.yaml
+++ /dev/null
@@ -1,27 +0,0 @@
-- name: fqlint
-  tags:
-    - fq
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t fqlint tools/fq.wdl read_one_fastq=tests/input/test_R1.fq.gz read_two_fastq=tests/input/test_R2.fq.gz
-
-- name: subsample_fq
-  tags:
-    - fq
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t subsample tools/fq.wdl read_one_fastq=tests/input/test_R1.fq.gz read_two_fastq=tests/input/test_R2.fq.gz record_count=1000
-  files:
-    - path: output/outputs.json
-      contains:
-      - test.R1.subsampled.fastq.gz
-      - test.R2.subsampled.fastq.gz
-
-- name: subsample_fq_percentage
-  tags:
-    - fq
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t subsample tools/fq.wdl read_one_fastq=tests/input/test_R1.fq.gz read_two_fastq=tests/input/test_R2.fq.gz probability=0.01
-  files:
-    - path: output/outputs.json
-      contains:
-      - test.R1.subsampled.fastq.gz
-      - test.R2.subsampled.fastq.gz
diff --git a/tests/tools/test_gatk4.yaml b/tests/tools/test_gatk4.yaml
deleted file mode 100644
index 200a7f33e..000000000
--- a/tests/tools/test_gatk4.yaml
+++ /dev/null
@@ -1,65 +0,0 @@
-- name: gatk4_split_n_cigar_reads
-  tags:
-    - gatk4
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t split_n_cigar_reads -i tests/tools/input_json/gatk4_split_n_cigar_reads.json tools/gatk4.wdl
-  files:
-    - path: output/outputs.json
-      contains:
-      - split.bam
-      - split.bam.bai
-      - split.bam.md5
-
-- name: gatk4_base_recalibrator
-  tags:
-    - gatk4
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t base_recalibrator -i tests/tools/input_json/gatk4_base_recalibrator.json tools/gatk4.wdl
-  files:
-    - path: output/outputs.json
-      contains:
-      - test_rnaseq_variant.recal.txt
-
-- name: gatk4_apply_bqsr
-  tags:
-    - gatk4
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t apply_bqsr -i tests/tools/input_json/gatk4_apply_bqsr.json tools/gatk4.wdl
-  files:
-    - path: output/outputs.json
-      contains:
-      - test_rnaseq_variant.bqsr.bam
-      - test_rnaseq_variant.bqsr.bam.bai
-
-- name: gatk4_haplotype_caller
-  tags:
-    - gatk4
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t haplotype_caller -i tests/tools/input_json/gatk4_haplotype_caller.json tools/gatk4.wdl
-  files:
-    - path: output/outputs.json
-      contains:
-      - test_rnaseq_variant.vcf.gz
-
-- name: gatk4_variant_filtration
-  tags:
-    - gatk4
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t variant_filtration -i tests/tools/input_json/gatk4_variant_filtration.json tools/gatk4.wdl
-  files:
-    - path: output/outputs.json
-      contains:
-      - test1.filtered.vcf.gz
-      - test1.filtered.vcf.gz.tbi
-
-- name: gatk4_mark_duplicates_spark
-  tags:
-    - gatk4
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t mark_duplicates_spark tools/gatk4.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam"
-  files:
-    - path: output/outputs.json
-      contains:
-      - test.bwa_aln_pe.chrY_chrM.MarkDuplicates.bam
-      - test.bwa_aln_pe.chrY_chrM.MarkDuplicates.bam.bai
-      - test.bwa_aln_pe.chrY_chrM.MarkDuplicates.metrics.txt
\ No newline at end of file
diff --git a/tests/tools/test_htseq.yaml b/tests/tools/test_htseq.yaml
deleted file mode 100644
index f35b7adbd..000000000
--- a/tests/tools/test_htseq.yaml
+++ /dev/null
@@ -1,19 +0,0 @@
-- name: htseq_count
-  tags:
-    - htseq
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t count tools/htseq.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" gtf="tests/input/gencode.v31.chrY_chrM.gtf.gz" strandedness="no"
-  files:
-    - path: output/outputs.json
-      contains:
-      - test.bwa_aln_pe.chrY_chrM.feature-counts.txt
-
-- name: calc_tpm
-  tags:
-    - htseq
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t calc_tpm -i tests/tools/input_json/calc_tpm.json tools/htseq.wdl
-  files:
-    - path: output/outputs.json
-      contains:
-      - test.bwa_aln_pe.chrY_chrM.TPM.txt
diff --git a/tests/tools/test_kraken2.yaml b/tests/tools/test_kraken2.yaml
deleted file mode 100644
index 22564eaf6..000000000
--- a/tests/tools/test_kraken2.yaml
+++ /dev/null
@@ -1,55 +0,0 @@
-- name: download_taxonomy
-  tags:
-    - kraken
-    - reference
-    - slow
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t download_taxonomy tools/kraken2.wdl
-  files:
-    - path: output/outputs.json
-      contains:
-      - kraken2_taxonomy.tar.gz
-
-- name: download_library
-  tags:
-    - kraken
-    - reference
-    - slow
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t download_library tools/kraken2.wdl library_name='fungi'
-  files:
-    - path: output/outputs.json
-      contains:
-      - kraken2_fungi_library.tar.gz
-
-- name: create_library_from_fastas
-  tags:
-    - kraken
-    - reference
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t create_library_from_fastas -i tests/tools/input_json/kraken_create_library_from_fastas.json tools/kraken2.wdl
-  files:
-    - path: output/outputs.json
-      contains:
-      - kraken2_custom_library.tar.gz
-
-- name: build_db
-  tags:
-    - kraken
-    - reference
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t build_db -i tests/tools/input_json/kraken_build_db.json tools/kraken2.wdl
-  files:
-    - path: output/outputs.json
-      contains:
-      - kraken2_db.tar.gz
-
-- name: kraken
-  tags:
-    - kraken
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t kraken tools/kraken2.wdl read_one_fastq_gz="tests/input/test_R1.fq.gz" read_two_fastq_gz="tests/input/test_R2.fq.gz" db="tests/input/kraken2_db.mini.tar.gz"
-  files:
-    - path: output/outputs.json
-      contains:
-      - test.kraken2.txt
diff --git a/tests/tools/test_librarian.yaml b/tests/tools/test_librarian.yaml
deleted file mode 100644
index a0033514e..000000000
--- a/tests/tools/test_librarian.yaml
+++ /dev/null
@@ -1,10 +0,0 @@
-- name: librarian
-  tags:
-    - librarian
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t librarian tools/librarian.wdl read_one_fastq="tests/input/test_R1.fq.gz"
-  files:
-    - path: output/outputs.json
-      contains:
-      - test.librarian.tar.gz
-      - librarian_heatmap.txt
diff --git a/tests/tools/test_md5sum.yaml b/tests/tools/test_md5sum.yaml
deleted file mode 100644
index a1199b897..000000000
--- a/tests/tools/test_md5sum.yaml
+++ /dev/null
@@ -1,11 +0,0 @@
-- name: compute_checksum
-  tags:
-    - md5sum
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t compute_checksum tools/md5sum.wdl file="tests/input/test.bwa_aln_pe.chrY_chrM.bam"
-  files:
-    - path: output/outputs.json
-      contains:
-      - test.bwa_aln_pe.chrY_chrM.bam.md5
-      # contains:
-      #   - "77fa2f59b0083202c73b0c80b60b24f6"
diff --git a/tests/tools/test_mosdepth.yaml b/tests/tools/test_mosdepth.yaml
deleted file mode 100644
index e0b33e21f..000000000
--- a/tests/tools/test_mosdepth.yaml
+++ /dev/null
@@ -1,10 +0,0 @@
-- name: mosdepth_coverage
-  tags:
-    - mosdepth
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t coverage tools/mosdepth.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" bam_index="tests/input/test.bwa_aln_pe.chrY_chrM.bam.bai"
-  files:
-    - path: output/outputs.json
-      contains:
-      - test.bwa_aln_pe.chrY_chrM.mosdepth.summary.txt
-      - test.bwa_aln_pe.chrY_chrM.mosdepth.global.dist.txt
diff --git a/tests/tools/test_multiqc.yaml b/tests/tools/test_multiqc.yaml
deleted file mode 100644
index c2537df7b..000000000
--- a/tests/tools/test_multiqc.yaml
+++ /dev/null
@@ -1,20 +0,0 @@
-- name: multiqc
-  tags:
-    - multiqc
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t multiqc -i tests/tools/input_json/multiqc.json tools/multiqc.wdl
-  files:
-    - path: output/outputs.json
-      contains:
-      - test.bwa_aln_pe.chrY_chrM.multiqc.html
-
-- name: multiqc_empty
-  tags:
-    - multiqc
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t multiqc -i tests/tools/input_json/multiqc_empty.json tools/multiqc.wdl
-  exit_code: 1
-  stderr:
-    contains:
-    - "No analysis results found"
-    - "MultiQC didn't find any valid files"
\ No newline at end of file
diff --git a/tests/tools/test_ngsderive.yaml b/tests/tools/test_ngsderive.yaml
deleted file mode 100644
index b94534d5e..000000000
--- a/tests/tools/test_ngsderive.yaml
+++ /dev/null
@@ -1,68 +0,0 @@
-- name: strandedness
-  tags:
-    - ngsderive
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t strandedness tools/ngsderive.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" bam_index="tests/input/test.bwa_aln_pe.chrY_chrM.bam.bai" gene_model="tests/input/gencode.v31.chrY_chrM.gtf.gz"
-  files:
-    - path: output/outputs.json
-      contains:
-      - test.bwa_aln_pe.chrY_chrM.strandedness.tsv
-      - "Unstranded"
-
-- name: instrument
-  tags:
-    - ngsderive
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t instrument tools/ngsderive.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam"
-  files:
-    - path: output/outputs.json
-      contains:
-      - test.bwa_aln_pe.chrY_chrM.instrument.tsv
-      - "multiple instruments"
-      #   - "unknown confidence"
-
-- name: read_length
-  tags:
-    - ngsderive
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t read_length tools/ngsderive.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" bam_index="tests/input/test.bwa_aln_pe.chrY_chrM.bam.bai"
-  files:
-    - path: output/outputs.json
-      contains:
-      - test.bwa_aln_pe.chrY_chrM.readlength.tsv
-      # contains:
-      #   - "150=20000"
-
-- name: encoding
-  tags:
-    - ngsderive
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t encoding -i tests/tools/input_json/ngsderive_encoding.json tools/ngsderive.wdl
-  files:
-    - path: output/outputs.json
-      contains:
-      - test.bwa_aln_pe.chrY_chrM.encoding.tsv
-      # contains:
-      #   - "ASCII range: 74-74"
-      #   - "Illumina 1.3"
-
-- name: junction_annotation
-  tags:
-    - ngsderive
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t junction_annotation tools/ngsderive.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" bam_index="tests/input/test.bwa_aln_pe.chrY_chrM.bam.bai" gene_model="tests/input/gencode.v31.chrY_chrM.gtf.gz"
-  files:
-    - path: output/outputs.json
-      contains:
-      - test.bwa_aln_pe.chrY_chrM.junction_summary.tsv
-      - test.bwa_aln_pe.chrY_chrM.junctions.tsv.gz
-
-- name: endedness
-  tags:
-    - ngsderive
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t endedness tools/ngsderive.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam"
-  files:
-    - path: output/outputs.json
-      contains:
-      - test.bwa_aln_pe.chrY_chrM.endedness.tsv
diff --git a/tests/tools/test_picard.yaml b/tests/tools/test_picard.yaml
deleted file mode 100644
index ca99cab36..000000000
--- a/tests/tools/test_picard.yaml
+++ /dev/null
@@ -1,163 +0,0 @@
-- name: picard_mark_duplicates
-  tags:
-    - picard
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t mark_duplicates tools/picard.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam"
-  files:
-    - path: output/outputs.json
-      contains:
-      - test.bwa_aln_pe.chrY_chrM.MarkDuplicates.bam
-      - test.bwa_aln_pe.chrY_chrM.MarkDuplicates.bam.bai
-      - test.bwa_aln_pe.chrY_chrM.MarkDuplicates.bam.md5
-      - test.bwa_aln_pe.chrY_chrM.MarkDuplicates.metrics.txt
-
-- name: picard_validate_bam
-  tags:
-    - picard
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t validate_bam tools/picard.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam"
-  files:
-    - path: output/outputs.json
-      contains:
-      - test.bwa_aln_pe.chrY_chrM.ValidateSamFile.txt
-
-- name: picard_bam_to_fastq
-  tags:
-    - picard
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t bam_to_fastq tools/picard.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam"
-  files:
-    - path: output/outputs.json
-      contains:
-      - test.bwa_aln_pe.chrY_chrM.R1.fastq.gz
-      - test.bwa_aln_pe.chrY_chrM.R2.fastq.gz
-
-- name: picard_sort
-  tags:
-    - picard
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t sort tools/picard.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" memory_gb=16
-  files:
-    - path: output/outputs.json
-      contains:
-      - test.bwa_aln_pe.chrY_chrM.sorted.bam
-      - test.bwa_aln_pe.chrY_chrM.sorted.bam.bai
-
-- name: picard_sort_queryname
-  tags:
-    - picard
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t sort tools/picard.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" memory_gb=16 sort_order="queryname"
-  files:
-    - path: output/outputs.json
-      contains:
-      - test.bwa_aln_pe.chrY_chrM.sorted.bam
-      must_not_contain:
-      - test.bwa_aln_pe.chrY_chrM.sorted.bam.bai
-
-- name: picard_merge_sam_files
-  tags:
-    - picard
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t merge_sam_files -i tests/tools/input_json/picard_merge_sam_files.json tools/picard.wdl
-  files:
-    - path: output/outputs.json
-      contains:
-      - test.merged.bam
-
-- name: picard_clean_sam
-  tags:
-    - picard
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t clean_sam tools/picard.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam"
-  files:
-    - path: output/outputs.json
-      contains:
-      - test.bwa_aln_pe.chrY_chrM.cleaned.bam
-
-- name: picard_collect_wgs_metrics
-  tags:
-    - picard
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t collect_wgs_metrics tools/picard.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" reference_fasta="tests/input/GRCh38.chrY_chrM.fa"
-  files:
-    - path: output/outputs.json
-      contains:
-      - test.bwa_aln_pe.chrY_chrM.CollectWgsMetrics.txt
-
-- name: picard_collect_alignment_summary_metrics
-  tags:
-    - picard
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t collect_alignment_summary_metrics tools/picard.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam"
-  files:
-    - path: output/outputs.json
-      contains:
-      - test.bwa_aln_pe.chrY_chrM.CollectAlignmentSummaryMetrics.txt
-      - test.bwa_aln_pe.chrY_chrM.CollectAlignmentSummaryMetrics.pdf
-
-- name: picard_collect_gc_bias_metrics
-  tags:
-    - picard
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t collect_gc_bias_metrics tools/picard.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" reference_fasta=tests/input/GRCh38.chrY_chrM.fa
-  files:
-    - path: output/outputs.json
-      contains:
-      - test.bwa_aln_pe.chrY_chrM.CollectGcBiasMetrics.txt
-      - test.bwa_aln_pe.chrY_chrM.CollectGcBiasMetrics.summary.txt
-      - test.bwa_aln_pe.chrY_chrM.CollectGcBiasMetrics.pdf
-
-- name: picard_collect_insert_size_metrics
-  tags:
-    - picard
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t collect_insert_size_metrics tools/picard.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam"
-  files:
-    - path: output/outputs.json
-      contains:
-      - test.bwa_aln_pe.chrY_chrM.CollectInsertSizeMetrics.txt
-      - test.bwa_aln_pe.chrY_chrM.CollectInsertSizeMetrics.pdf
-
-- name: picard_quality_score_distribution
-  tags:
-    - picard
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t quality_score_distribution tools/picard.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam"
-  files:
-    - path: output/outputs.json
-      contains:
-      - test.bwa_aln_pe.chrY_chrM.QualityScoreDistribution.txt
-      - test.bwa_aln_pe.chrY_chrM.QualityScoreDistribution.pdf
-
-- name: picard_merge_vcfs
-  tags:
-    - picard
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t merge_vcfs -i tests/tools/input_json/picard_merge_vcfs.json tools/picard.wdl
-  files:
-    - path: output/outputs.json
-      contains:
-      - test.vcf.gz
-
-- name: picard_scatter_interval_list
-  tags:
-    - picard
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t scatter_interval_list tools/picard.wdl interval_list="tests/input/wgs_calling_regions.hg38.interval_list" scatter_count=3
-  files:
-    - path: output/outputs.json
-      contains:
-      - 1scattered.interval_list
-      - 2scattered.interval_list
-      - 3scattered.interval_list
-
-- name: picard_create_sequence_dictionary
-  tags:
-    - picard
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t create_sequence_dictionary tools/picard.wdl fasta="tests/input/GRCh38.chrY_chrM.fa" outfile_name="GRCh38.chrY_chrM.dict"
-  files:
-    - path: output/outputs.json
-      contains:
-      - GRCh38.chrY_chrM.dict
\ No newline at end of file
diff --git a/tests/tools/test_qualimap.yaml b/tests/tools/test_qualimap.yaml
deleted file mode 100644
index 405b31517..000000000
--- a/tests/tools/test_qualimap.yaml
+++ /dev/null
@@ -1,11 +0,0 @@
-- name: qualimap_rnaseq
-  tags:
-    - qualimap
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t rnaseq tools/qualimap.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" gtf="tests/input/gencode.v31.chrY_chrM.gtf.gz"
-  files:
-    - path: output/outputs.json
-      contains:
-      - test.bwa_aln_pe.chrY_chrM.qualimap_rnaseq_results.tar.gz
-      - rnaseq_qc_results.txt
-      - coverage_profile_along_genes_(total).txt
diff --git a/tests/tools/test_sambamba.yaml b/tests/tools/test_sambamba.yaml
deleted file mode 100644
index 4a1464a2b..000000000
--- a/tests/tools/test_sambamba.yaml
+++ /dev/null
@@ -1,51 +0,0 @@
-- name: sambamba_index
-  tags:
-    - sambamba
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t index tools/sambamba.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam"
-  files:
-    - path: output/outputs.json
-      contains:
-      - test.bwa_aln_pe.chrY_chrM.bam.bai
-
-- name: sambamba_merge
-  tags:
-    - sambamba
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t merge -i tests/tools/input_json/sambamba_merge.json tools/sambamba.wdl
-  files:
-    - path:  output/outputs.json
-      contains:
-      - test.merged.bam
-
-- name: sambamba_sort
-  tags:
-    - sambamba
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t sort tools/sambamba.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam"
-  files:
-    - path: output/outputs.json
-      contains:
-      - test.bwa_aln_pe.chrY_chrM.sorted.bam
-
-- name: sambamba_flagstat
-  tags:
-    - sambamba
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t flagstat tools/sambamba.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam"
-  files:
-    - path: output/outputs.json
-      contains:
-      - test.bwa_aln_pe.chrY_chrM.flagstat.txt
-
-- name: sambamba_markdup
-  tags:
-    - sambamba
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t markdup tools/sambamba.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam"
-  files:
-    - path: output/outputs.json
-      contains:
-      - test.bwa_aln_pe.chrY_chrM.markdup.bam
-      - test.bwa_aln_pe.chrY_chrM.markdup.bam.bai
-      - test.bwa_aln_pe.chrY_chrM.markdup_log.txt
\ No newline at end of file
diff --git a/tests/tools/test_samtools.py b/tests/tools/test_samtools.py
deleted file mode 100644
index 272ed7b76..000000000
--- a/tests/tools/test_samtools.py
+++ /dev/null
@@ -1,53 +0,0 @@
-"""TODO: rewrite this so it is runner agnostic"""
-
-# import pytest
-# import pathlib
-# from collections import OrderedDict
-
-# import pysam
-# import fastq
-
-
-# @pytest.mark.workflow('samtools_split')
-# def test_samtools_split(workflow_dir):
-#     bam = pathlib.Path(workflow_dir, 'test-output/out/split_bams/0/test.1.bam')
-#     samfile = pysam.AlignmentFile(bam, "rb")
-#     bam_header = OrderedDict((k, v) for k, v in samfile.header.items())
-#     read_groups = [read_group['ID'] for read_group in bam_header.get('RG', []) if 'ID' in read_group]
-#     assert len(read_groups) == 1
-#     assert read_groups[0] == "1"
-
-#     second_bam = pathlib.Path(workflow_dir, 'test-output/out/split_bams/1/test.2.bam')
-#     second_samfile = pysam.AlignmentFile(second_bam, "rb")
-#     second_bam_header = OrderedDict((k, v) for k, v in second_samfile.header.items())
-#     second_read_groups = [read_group['ID'] for read_group in second_bam_header.get('RG', []) if 'ID' in read_group]
-#     assert len(second_read_groups) == 1
-#     assert second_read_groups[0] == "2"
-
-# @pytest.mark.workflow('samtools_merge')
-# def test_samtools_merge(workflow_dir):
-#     bam = pathlib.Path(workflow_dir, 'test-output/out/merged_bam/test.bam')
-#     samfile = pysam.AlignmentFile(bam, "rb")
-#     bam_header = OrderedDict((k, v) for k, v in samfile.header.items())
-#     read_groups = [read_group['ID'] for read_group in bam_header.get('RG', []) if 'ID' in read_group]
-#     assert len(read_groups) == 2
-#     assert read_groups[0] == "test2"
-#     assert read_groups[1] == "test.bwa_aln_pe.chrY_chrM"
-
-# @pytest.mark.workflow('samtools_collate', 'samtools_collate_to_fastq')
-# def test_samtools_collate(workflow_dir):
-#     bam = pathlib.Path(workflow_dir, 'test-output/out/collated_bam/test.bwa_aln_pe.chrY_chrM.collated.bam')
-#     samfile = pysam.AlignmentFile(bam, "rb")
-
-#     reads = list(samfile.fetch(until_eof=True))
-#     for c in range(0, 100, 2):
-#         assert reads[c].query_name == reads[c+1].query_name
-#         assert reads[c].is_read1 != reads[c+1].is_read1
-
-# @pytest.mark.workflow('samtools_bam_to_fastq', 'samtools_collate_to_fastq')
-# def test_samtools_bam_to_fastq(workflow_dir):
-#     fq1 = fastq.read(pathlib.Path(workflow_dir, 'test-output/out/read_one_fastq_gz/test.bwa_aln_pe.chrY_chrM.R1.fastq.gz'))
-#     fq2 = fastq.read(pathlib.Path(workflow_dir, 'test-output/out/read_two_fastq_gz/test.bwa_aln_pe.chrY_chrM.R2.fastq.gz'))
-
-#     for r1, r2 in zip(fq1, fq2):
-#         assert r1.head.removesuffix("/1") == r2.head.removesuffix("/2")
diff --git a/tests/tools/test_samtools.yaml b/tests/tools/test_samtools.yaml
deleted file mode 100644
index 4d08c8583..000000000
--- a/tests/tools/test_samtools.yaml
+++ /dev/null
@@ -1,125 +0,0 @@
-- name: samtools_quickcheck
-  tags:
-    - samtools
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t quickcheck tools/samtools.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam"
-
-- name: samtools_split
-  tags:
-    - samtools
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t split tools/samtools.wdl bam="tests/input/test.bam"
-  files:
-    - path: output/outputs.json
-      contains:
-      - test.1.bam
-      - test.2.bam
-
-- name: samtools_split_unaccounted
-  tags:
-    - samtools
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t split tools/samtools.wdl bam="tests/input/test.unaccounted_read.bam"
-  exit_code: 1
-  stderr:
-    contains:
-    - "There are reads present with bad or missing RG tags!"
-
-- name: samtools_split_extra_RG
-  tags:
-    - samtools
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t split tools/samtools.wdl bam="tests/input/test.extra_RG.bam"
-  exit_code: 1
-  stderr:
-    contains:
-    - "No reads are in output BAM test.extra_RG.no_match.bam!"
-    - "This is likely caused by malformed RG records."
-
-- name: samtools_flagstat
-  tags:
-    - samtools
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t flagstat tools/samtools.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam"
-  files:
-    - path: output/outputs.json
-      contains:
-      - test.bwa_aln_pe.chrY_chrM.flagstat.txt
-      # contains:
-      #   - "20000"
-      #   - "0 + 0 secondary"
-      #   - "20000 + 0 in total (QC-passed reads + QC-failed reads)"
-      #   - "10000 + 0 read1"
-      #   - "10000 + 0 read2"
-
-- name: samtools_index
-  tags:
-    - samtools
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t index tools/samtools.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam"
-  files:
-    - path: output/outputs.json
-      contains:
-      - test.bwa_aln_pe.chrY_chrM.bam.bai
-
-- name: samtools_subsample
-  tags:
-    - samtools
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t subsample -i tests/tools/input_json/samtools_subsample.json tools/samtools.wdl
-  files:
-    - path: output/outputs.json
-      contains:
-      - test.sampled.bam
-
-- name: samtools_merge
-  tags:
-    - samtools
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t merge -i tests/tools/input_json/samtools_merge.json tools/samtools.wdl
-  files:
-    - path: output/outputs.json
-      contains:
-      - test.merged.bam
-
-- name: samtools_addreplacerg
-  tags:
-    - samtools
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t addreplacerg tools/samtools.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" read_group_id="test"
-  files:
-    - path: output/outputs.json
-      contains:
-      - test.bwa_aln_pe.chrY_chrM.addreplacerg.bam
-
-- name: samtools_collate
-  tags:
-    - samtools
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t collate tools/samtools.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam"
-  files:
-    - path: output/outputs.json
-      contains:
-      - test.bwa_aln_pe.chrY_chrM.collated.bam
-
-- name: samtools_bam_to_fastq
-  tags:
-    - samtools
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t bam_to_fastq tools/samtools.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam" retain_collated_bam=true
-  files:
-    - path: output/outputs.json
-      contains:
-      - test.bwa_aln_pe.chrY_chrM.collated.bam
-      - test.bwa_aln_pe.chrY_chrM.R1.fastq.gz
-      - test.bwa_aln_pe.chrY_chrM.R2.fastq.gz
-
-- name: samtools_faidx
-  tags:
-    - samtools
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t faidx tools/samtools.wdl fasta="tests/input/test.fa"
-  files:
-    - path: output/outputs.json
-      contains:
-      - test.fa.fai
diff --git a/tests/tools/test_star.yaml b/tests/tools/test_star.yaml
deleted file mode 100644
index 159f217e7..000000000
--- a/tests/tools/test_star.yaml
+++ /dev/null
@@ -1,43 +0,0 @@
-- name: build_star_db
-  tags:
-    - star
-    - reference
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t build_star_db tools/star.wdl reference_fasta=tests/input/GRCh38.chrY_chrM.fa gtf=tests/input/gencode.v31.chrY_chrM.gtf.gz
-  files:
-    - path: output/outputs.json
-      contains:
-      - star_db.tar.gz
-
-- name: star_alignment_PE_1_pair
-  tags:
-    - star
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t alignment -i tests/tools/input_json/star_alignment_PE_1_pair.json tools/star.wdl
-  files:
-    - path: output/outputs.json
-      contains:
-      - test.Log.final.out
-      - test.Aligned.out.bam
-
-- name: star_alignment_PE_2_pairs
-  tags:
-    - star
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t alignment -i tests/tools/input_json/star_alignment_PE_2_pairs.json tools/star.wdl
-  files:
-    - path: output/outputs.json
-      contains:
-      - test.Log.final.out
-      - test.Aligned.out.bam
-
-- name: star_alignment_SE
-  tags:
-    - star
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t alignment -i tests/tools/input_json/star_alignment_SE.json tools/star.wdl
-  files:
-    - path: output/outputs.json
-      contains:
-      - test.Log.final.out
-      - test.Aligned.out.bam
\ No newline at end of file
diff --git a/tests/tools/test_util.yaml b/tests/tools/test_util.yaml
deleted file mode 100644
index 695da9859..000000000
--- a/tests/tools/test_util.yaml
+++ /dev/null
@@ -1,94 +0,0 @@
-- name: download
-  tags:
-    - util
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -i tests/tools/input_json/util_download.json -t download tools/util.wdl
-  files:
-    - path: output/outputs.json
-      contains:
-      - license.txt
-      # md5sum: cf3575bd84ab3151c7e9700b5f1a9746
-      # contains:
-      #   - "MIT License"
-
-- name: split_string
-  tags:
-    - util
-  command: >-
-        ./developer_scripts/run_sprocket_or_miniwdl.sh -t split_string -i tests/tools/input_json/util_split_string.json tools/util.wdl
-  stdout:
-    contains:
-      - "rg1"
-      - "rg2"
-    must_not_contain:
-      - "rg1 , rg2"
-
-- name: calc_feature_lengths
-  tags:
-    - util
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t calc_feature_lengths tools/util.wdl gtf="tests/input/gencode.v31.chrY_chrM.gtf.gz"
-  files:
-    - path: output/outputs.json
-      contains:
-      - gencode.v31.chrY_chrM.genelengths.txt
-      # contains:
-      #   - "AL954722.1"
-
-- name: compression_integrity
-  tags:
-    - util
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t compression_integrity tools/util.wdl bgzipped_file="tests/input/test.bwa_aln_pe.chrY_chrM.bam"
-
-# TODO: This does not test that the record was properly added to the header.
-- name: add_to_bam_header
-  tags:
-    - util
-  command: >-
-      ./developer_scripts/run_sprocket_or_miniwdl.sh -t add_to_bam_header -i tests/tools/input_json/util_add_to_bam_header.json tools/util.wdl
-  files:
-    - path: output/outputs.json
-      contains:
-      - test.bwa_aln_pe.chrY_chrM.reheader.bam
-
-- name: unpack_tarball
-  tags:
-    - util
-  command: >-
-      ./developer_scripts/run_sprocket_or_miniwdl.sh -t unpack_tarball tools/util.wdl tarball="tests/input/test.tar.gz"
-  files:
-    - path: output/outputs.json
-      contains:
-      - test_file_a
-      - test_file_b
-
-- name: make_coverage_regions_bed
-  tags:
-    - util
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t make_coverage_regions_bed tools/util.wdl gtf="tests/input/gencode.v31.chrY_chrM.gtf.gz" feature_type="exon"
-  files:
-    - path: output/outputs.json
-      contains:
-      - gencode.v31.chrY_chrM.exon.bed
-
-- name: global_phred_scores
-  tags:
-    - util
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t global_phred_scores tools/util.wdl bam="tests/input/test.bwa_aln_pe.chrY_chrM.bam"
-  files:
-    - path: output/outputs.json
-      contains:
-      - test.bwa_aln_pe.chrY_chrM.global_PHRED_scores.tsv
-
-- name: split_fastq
-  tags:
-    - util
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t split_fastq tools/util.wdl fastq="tests/input/test_R1.fq.gz" prefix="test.R1."
-  files:
-    - path: output/outputs.json
-      contains:
-      - test.R1.000000.fastq.gz
\ No newline at end of file
diff --git a/tests/workflows/_test_methylation-preprocess.yaml b/tests/workflows/_test_methylation-preprocess.yaml
deleted file mode 100644
index 3bfe379b8..000000000
--- a/tests/workflows/_test_methylation-preprocess.yaml
+++ /dev/null
@@ -1,15 +0,0 @@
-- name: process_raw_idats
-  tags:
-    - methylation
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t process_raw_idats -i tests/workflows/input_json/process_raw_idats.json workflows/methylation/methylation-preprocess.wdl
-  files:
-    - path: output/outputs.json
-      contains:
-      - 201533520001_R03C01.beta_swan_norm_unfiltered.csv
-      - 201533520001_R03C01.beta_swan_norm_unfiltered.genomic.csv
-      - 201533520001_R03C01.annotation.csv
-      - 201533520001_R03C01.beta.csv
-      - 201533520001_R03C01.cn_values.csv
-      - 201533520001_R03C01.m_values.csv
-      - 201533520001_R03C01.probeNames.csv
\ No newline at end of file
diff --git a/tests/workflows/input_json/combine_data.json b/tests/workflows/input_json/combine_data.json
deleted file mode 100644
index ec1edd324..000000000
--- a/tests/workflows/input_json/combine_data.json
+++ /dev/null
@@ -1,7 +0,0 @@
-{
-    "combine_data.files_to_combine": [
-        "../../input/201533520001_R01C01.beta_swan_norm_unfiltered.genomic.csv",
-        "../../input/201533520001_R03C01.beta_swan_norm_unfiltered.genomic.csv"
-    ],
-    "combine_data.combined_file_name": "combined_beta.csv"
-}
\ No newline at end of file
diff --git a/tests/workflows/input_json/dnaseq-standard-fastq.json b/tests/workflows/input_json/dnaseq-standard-fastq.json
deleted file mode 100644
index f1db97b82..000000000
--- a/tests/workflows/input_json/dnaseq-standard-fastq.json
+++ /dev/null
@@ -1,18 +0,0 @@
-{
-    "dnaseq_standard_fastq_experimental.read_one_fastqs_gz": [
-        "../../input/test_R1.fq.gz"
-    ],
-    "dnaseq_standard_fastq_experimental.read_two_fastqs_gz": [
-        "../../input/test_R2.fq.gz"
-    ],
-    "dnaseq_standard_fastq_experimental.read_groups": [
-        {
-            "ID": "test",
-            "PI": 150,
-            "PL": "ILLUMINA",
-            "SM": "Sample",
-            "LB": "Sample"
-        }
-    ],
-    "dnaseq_standard_fastq_experimental.bwa_db": "../../input/GRCh38.chrY_chrM.bwa_db.tar.gz"
-}
\ No newline at end of file
diff --git a/tests/workflows/input_json/dnaseq-standard.json b/tests/workflows/input_json/dnaseq-standard.json
deleted file mode 100644
index 06e8d8cac..000000000
--- a/tests/workflows/input_json/dnaseq-standard.json
+++ /dev/null
@@ -1,4 +0,0 @@
-{
-    "dnaseq_standard_experimental.bam": "../../input/test.bwa_aln_pe.chrY_chrM.bam",
-    "dnaseq_standard_experimental.bwa_db": "../../input/GRCh38.chrY_chrM.bwa_db.tar.gz"
-}
\ No newline at end of file
diff --git a/tests/workflows/input_json/filter_probes.json b/tests/workflows/input_json/filter_probes.json
deleted file mode 100644
index 8b7d0a024..000000000
--- a/tests/workflows/input_json/filter_probes.json
+++ /dev/null
@@ -1,4 +0,0 @@
-{
-    "filter_probes.beta_values": "../../input/combined_beta.csv",
-    "filter_probes.num_probes": 1000
-}
\ No newline at end of file
diff --git a/tests/workflows/input_json/generate_umap.json b/tests/workflows/input_json/generate_umap.json
deleted file mode 100644
index 670c02298..000000000
--- a/tests/workflows/input_json/generate_umap.json
+++ /dev/null
@@ -1,3 +0,0 @@
-{
-    "generate_umap.filtered_beta_values": "../../input/filtered_beta.csv"
-}
\ No newline at end of file
diff --git a/tests/workflows/input_json/plot_umap.json b/tests/workflows/input_json/plot_umap.json
deleted file mode 100644
index 86444c490..000000000
--- a/tests/workflows/input_json/plot_umap.json
+++ /dev/null
@@ -1,3 +0,0 @@
-{
-    "plot_umap.umap": "../../input/umap.csv"
-}
\ No newline at end of file
diff --git a/tests/workflows/input_json/process_raw_idats.json b/tests/workflows/input_json/process_raw_idats.json
deleted file mode 100644
index b1dbbbc77..000000000
--- a/tests/workflows/input_json/process_raw_idats.json
+++ /dev/null
@@ -1,6 +0,0 @@
-{
-    "process_raw_idats.idats": {
-        "left": "../../input/201533520001_R03C01_Grn.idat",
-        "right": "../../input/201533520001_R03C01_Red.idat"
-    }
-}
\ No newline at end of file
diff --git a/tests/workflows/input_json/qc-standard.json b/tests/workflows/input_json/qc-standard.json
deleted file mode 100644
index 0f552166e..000000000
--- a/tests/workflows/input_json/qc-standard.json
+++ /dev/null
@@ -1,10 +0,0 @@
-{
-    "quality_check_standard.bam": "../../input/test.bwa_aln_pe.chrY_chrM.bam",
-    "quality_check_standard.bam_index": "../../input/test.bwa_aln_pe.chrY_chrM.bam.bai",
-    "quality_check_standard.gtf": "../../input/gencode.v31.chrY_chrM.gtf.gz",
-    "quality_check_standard.kraken_db": "../../input/kraken2_db.mini.tar.gz",
-    "quality_check_standard.rna": true,
-    "quality_check_standard.coverage_beds": [
-        "../../input/gencode.v31.chrY_chrM.gene.bed"
-    ]
-}
\ No newline at end of file
diff --git a/tests/workflows/input_json/rnaseq-standard-fastq.json b/tests/workflows/input_json/rnaseq-standard-fastq.json
deleted file mode 100644
index 452d4c932..000000000
--- a/tests/workflows/input_json/rnaseq-standard-fastq.json
+++ /dev/null
@@ -1,21 +0,0 @@
-{
-    "rnaseq_standard_fastq.read_one_fastqs_gz": [
-        "../../input/test_R1.fq.gz"
-    ],
-    "rnaseq_standard_fastq.read_two_fastqs_gz": [
-        "../../input/test_R2.fq.gz"
-    ],
-    "rnaseq_standard_fastq.read_groups": [
-        {
-            "ID": "test",
-            "PI": 150,
-            "PL": "ILLUMINA",
-            "SM": "Sample",
-            "LB": "Sample"
-        }
-    ],
-    "rnaseq_standard_fastq.prefix": "test",
-    "rnaseq_standard_fastq.gtf": "../../input/gencode.v31.chrY_chrM.gtf.gz",
-    "rnaseq_standard_fastq.star_db": "../../input/star_db.chrY_chrM.tar.gz",
-    "rnaseq_standard_fastq.strandedness": "Unstranded"
-}
\ No newline at end of file
diff --git a/tests/workflows/input_json/rnaseq-standard.json b/tests/workflows/input_json/rnaseq-standard.json
deleted file mode 100644
index a42cbc5a1..000000000
--- a/tests/workflows/input_json/rnaseq-standard.json
+++ /dev/null
@@ -1,6 +0,0 @@
-{
-    "rnaseq_standard.bam": "../../input/test.bwa_aln_pe.chrY_chrM.bam",
-    "rnaseq_standard.gtf": "../../input/gencode.v31.chrY_chrM.gtf.gz",
-    "rnaseq_standard.star_db": "../../input/star_db.chrY_chrM.tar.gz",
-    "rnaseq_standard.strandedness": "Unstranded"
-}
\ No newline at end of file
diff --git a/tests/workflows/input_json/rnaseq-variant-calling.json b/tests/workflows/input_json/rnaseq-variant-calling.json
deleted file mode 100644
index e108c5298..000000000
--- a/tests/workflows/input_json/rnaseq-variant-calling.json
+++ /dev/null
@@ -1,16 +0,0 @@
-{
-    "rnaseq_variant_calling.bam": "../../input/test.bwa_aln_pe.chrY_chrM.bam",
-    "rnaseq_variant_calling.bam_index": "../../input/test.bwa_aln_pe.chrY_chrM.bam.bai",
-    "rnaseq_variant_calling.fasta": "../../input/GRCh38.chrY_chrM.fa",
-    "rnaseq_variant_calling.fasta_index": "../../input/GRCh38.chrY_chrM.fa.fai",
-    "rnaseq_variant_calling.dict": "../../input/GRCh38.chrY_chrM.dict",
-    "rnaseq_variant_calling.calling_interval_list": "../../input/wgs_calling_regions.hg38.interval_list",
-    "rnaseq_variant_calling.known_vcfs": [
-        "../../input/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz"
-    ],
-    "rnaseq_variant_calling.known_vcf_indexes": [
-        "../../input/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi"
-    ],
-    "rnaseq_variant_calling.dbSNP_vcf": "../../input/Homo_sapiens_assembly38.dbsnp138.top5000.vcf",
-    "rnaseq_variant_calling.dbSNP_vcf_index": "../../input/Homo_sapiens_assembly38.dbsnp138.top5000.vcf.idx"
-}
\ No newline at end of file
diff --git a/tests/workflows/test_methylation-cohort.yaml b/tests/workflows/test_methylation-cohort.yaml
deleted file mode 100644
index f981b75a7..000000000
--- a/tests/workflows/test_methylation-cohort.yaml
+++ /dev/null
@@ -1,40 +0,0 @@
-- name: combine_data
-  tags:
-    - methylation
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t combine_data -i tests/workflows/input_json/combine_data.json workflows/methylation/methylation-cohort.wdl
-  files:
-    - path: output/outputs.json
-      contains:
-      - combined_beta.csv
-
-- name: filter_probes
-  tags:
-    - methylation
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t filter_probes -i tests/workflows/input_json/filter_probes.json workflows/methylation/methylation-cohort.wdl
-  files:
-    - path: output/outputs.json
-      contains:
-      - filtered.beta.csv
-      - filtered.probes.csv
-
-- name: generate_umap
-  tags:
-    - methylation
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t generate_umap -i tests/workflows/input_json/generate_umap.json workflows/methylation/methylation-cohort.wdl
-  files:
-    - path: output/outputs.json
-      contains:
-      - umap.csv
-
-- name: plot_umap
-  tags:
-    - methylation
-  command: >-
-    ./developer_scripts/run_sprocket_or_miniwdl.sh -t plot_umap -i tests/workflows/input_json/plot_umap.json workflows/methylation/methylation-cohort.wdl
-  files:
-    - path: output/outputs.json
-      contains:
-      - umap.png

From d2898f06c9751ed306fba91bfe2e3fe3b6738059 Mon Sep 17 00:00:00 2001
From: Ari Frantz <ari.frantz@stjude.org>
Date: Sun, 28 Dec 2025 08:47:05 -0500
Subject: [PATCH 14/47] revise: change validate_read_group logic

---
 data_structures/read_group.wdl | 16 +++++-----------
 1 file changed, 5 insertions(+), 11 deletions(-)

diff --git a/data_structures/read_group.wdl b/data_structures/read_group.wdl
index 151f58ac8..719fdb409 100644
--- a/data_structures/read_group.wdl
+++ b/data_structures/read_group.wdl
@@ -156,15 +156,13 @@ task validate_read_group {
 
     input {
         ReadGroup read_group
-        Array[String] required_fields = []
+        Array[String] required_fields = ["SM"]
         Boolean restrictive = true
     }
 
     # The SAM spec allows any printable ASCII character in header fields.
     String sam_spec_pattern = "[\\ -~]+"
     # We have the opinion that is too permissive for ID and SM.
-    String id_pattern = "id"
-    String sample_pattern = "sample.?"
     String restrictive_pattern = "\\ "  # Disallow spaces
     Array[String] platforms = [
         "CAPILLARY", "DNBSEQ", "ELEMENT", "HELICOS", "ILLUMINA", "IONTORRENT", "LS454",
@@ -174,11 +172,9 @@ task validate_read_group {
     command <<<
         exit_code=0
         if ~{restrictive}; then
-            if [[ ~{read_group.ID} =~ ^~{id_pattern}$ ]] \
-                || [[ ~{read_group.ID} =~ ~{restrictive_pattern} ]]
+            if [[ "~{read_group.ID}" =~ ~{restrictive_pattern} ]]
             then
-                >&2 echo "ID (~{read_group.ID}) must not match patterns:"
-                >&2 echo "'~{id_pattern}' or '~{restrictive_pattern}'"
+                >&2 echo "ID must not contain spaces"
                 exit_code=1
             fi
         fi
@@ -194,11 +190,9 @@ task validate_read_group {
         fi
         if ~{defined(read_group.SM)}; then
             if ~{restrictive}; then
-                if [[ "~{read_group.SM}" =~ ^~{sample_pattern}$ ]] \
-                    || [[ "~{read_group.SM}" =~ ~{restrictive_pattern} ]]
+                if [[ "~{read_group.SM}" =~ ~{restrictive_pattern} ]]
                 then
-                    >&2 echo "SM must not match patterns:"
-                    >&2 echo "'~{sample_pattern}' or '~{restrictive_pattern}'"
+                    >&2 echo "SM must not contain spaces"
                     exit_code=1
                 fi
             fi

From 6c910ce58f812ce5e512ad4c61286d669aa539f3 Mon Sep 17 00:00:00 2001
From: Ari Frantz <ari.frantz@stjude.org>
Date: Sun, 28 Dec 2025 08:47:21 -0500
Subject: [PATCH 15/47] test data_structures

---
 data_structures/test/flag_filter.yaml | 22 ++++---
 data_structures/test/read_group.yaml  | 90 +++++++++++++++++++++++++--
 2 files changed, 98 insertions(+), 14 deletions(-)

diff --git a/data_structures/test/flag_filter.yaml b/data_structures/test/flag_filter.yaml
index 4da14bcdd..fb6a55927 100644
--- a/data_structures/test/flag_filter.yaml
+++ b/data_structures/test/flag_filter.yaml
@@ -1,19 +1,23 @@
 validate_string_is_12bit_int:
-  - name: decimal_passes
+  - name: valid_numbers
     inputs:
       number:
         - "5"
-  - name: hexadecimal_passes
-    inputs:
-      number:
         - "0x900"
+        - "01"
+        - "4095"
     assertions:
       stderr:
         - Input number \(.*\) is valid
-  - name: too_big_hexadecimal_fails
+  - name: invalid_numbers
     inputs:
       number:
         - "0x1000"
+        - ""
+        - "string"
+        - this is not a number
+        - "000000000011"
+        - "-1"
     assertions:
       exit_code: 42
       stderr:
@@ -38,9 +42,9 @@ validate_flag_filter:
   - name: invalid_FlagFilter_fails
     inputs:
       flags:
-        - include_if_all: ""
-          exclude_if_any: this is not a number
-          include_if_any: "000000000011"
-          exclude_if_all: "4095"
+        - include_if_all: "3"
+          exclude_if_any: "0xF04"
+          include_if_any: "03"
+          exclude_if_all: "" # empty string should trigger a fail
     assertions:
       should_fail: true
diff --git a/data_structures/test/read_group.yaml b/data_structures/test/read_group.yaml
index a4367d8af..a5435be74 100644
--- a/data_structures/test/read_group.yaml
+++ b/data_structures/test/read_group.yaml
@@ -1,11 +1,91 @@
-read_group_to_string:
-  - name: bad_id
+validate_read_group:
+  - name: valid_read_groups
     inputs:
       read_group:
-        - ID: id,
+        - ID: id
+          SM: sample
+        - ID: R2
+          SM: sampleA
+          LB: spaces are allowed in LB
+          BC: barcode with a space
+          PU: platform_unit
+          PL: ILLUMINA
+          CN: center_name
+          DT: date
+          DS: description
+          PI: 1
+          PG: program_group
+          PM: platform_model
+          FO: ACMG
+          KS: key_sequence
+  - name: id_with_spaces
+    inputs:
+      read_group:
+        - ID: ids should not have spaces # this is a problem
           SM: sample_a
           LB: library
+          BC: barcode
+          PU: platform_unit
+          PL: ILLUMINA
+          CN: center_name
+          DT: date
+          DS: description
+          PI: 1
+          PG: program_group
+          PM: platform_model
+          FO: ACMG
+          KS: key_sequence
+    assertions:
+      exit_code: 1
+      stderr:
+        - ID must not contain spaces
+  - name: sample_with_spaces
+    inputs:
+      read_group:
+        - ID: R123
+          SM: samples should not have spaces
+          LB: library can have spaces though
+          BC: barcode
+          PU: platform_unit
+          PL: ILLUMINA
+          CN: center_name
+          DT: date
+          DS: description
+          PI: 1
+          PG: program_group
+          PM: platform_model
+          FO: ACMG
+          KS: key_sequence
+    assertions:
+      exit_code: 1
+      stderr:
+        - SM must not contain spaces
+  - name: spaces_allowed
+    inputs:
+      read_group:
+        - ID: technically permissable but gross
+          SM: same here!
+      restrictive:
+        - false
+  - name: missing_sample
+    inputs:
+      read_group:
+        - ID: R123
+          LB: library
     assertions:
       exit_code: 1
-      stdout:
-        - ID (.*) must not match pattern
+      stderr:
+        - SM is required
+  - name: missing_sample_allowed
+    inputs:
+      read_group:
+        - ID: R1
+          LB: lib
+      required_fields:
+        - []
+
+get_read_groups:
+  - name: works
+    inputs:
+      bam:
+        - test.bam

From 64476ad2a7ed9412dd26ae52e1bde13ae8ba8bc5 Mon Sep 17 00:00:00 2001
From: Ari Frantz <ari.frantz@stjude.org>
Date: Sun, 28 Dec 2025 09:20:28 -0500
Subject: [PATCH 16/47] chore: log TODO

---
 data_structures/test/read_group.yaml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/data_structures/test/read_group.yaml b/data_structures/test/read_group.yaml
index a5435be74..d255bb66b 100644
--- a/data_structures/test/read_group.yaml
+++ b/data_structures/test/read_group.yaml
@@ -89,3 +89,5 @@ get_read_groups:
     inputs:
       bam:
         - test.bam
+
+# TODO: test read_group_to_string

From c76f2016c009db0999dd8b835c4471c635782a12 Mon Sep 17 00:00:00 2001
From: Ari Frantz <ari.frantz@stjude.org>
Date: Sun, 28 Dec 2025 12:10:33 -0500
Subject: [PATCH 17/47] more tests

---
 tools/test/arriba.yaml    | 42 +++++++++++++++++++++++++++++++++++++++
 tools/test/bwa.yaml       | 35 ++++++++++++++++++++++++++++++++
 tools/test/deeptools.yaml |  7 +++++++
 3 files changed, 84 insertions(+)
 create mode 100644 tools/test/arriba.yaml
 create mode 100644 tools/test/bwa.yaml
 create mode 100644 tools/test/deeptools.yaml

diff --git a/tools/test/arriba.yaml b/tools/test/arriba.yaml
new file mode 100644
index 000000000..b274dd692
--- /dev/null
+++ b/tools/test/arriba.yaml
@@ -0,0 +1,42 @@
+arriba:
+  - name: works
+    inputs:
+      bam:
+        - Aligned.sortedByCoord.chr9_chr22.bam
+      gtf:
+        - gencode.v31.chr9_chr22.gtf.gz
+      reference_fasta_gz:
+        - GRCh38.chr9_chr22.fa.gz
+      disable_filters:
+        - [ blacklist ]
+      prefix:
+        - fusions
+arriba_tsv_to_vcf:
+  - name: works
+    inputs:
+      fusions:
+        - fusions.BCR_ABL1.tsv
+      reference_fasta:
+        - GRCh38.chr9_chr22.fa.gz
+      prefix:
+        - fusions
+arriba_extract_fusion_supporting_alignments:
+  - name: works
+    inputs:
+      fusions:
+        - fusions.BCR_ABL1.tsv
+      bam:
+        - Aligned.sortedByCoord.chr9_chr22.bam
+      bam_index:
+        - Aligned.sortedByCoord.chr9_chr22.bam.bai
+      prefix:
+        - fusions
+arriba_annotate_exon_numbers:
+  - name: works
+    inputs:
+      fusions:
+        - fusions.BCR_ABL1.tsv
+      gtf:
+        - gencode.v31.chr9_chr22.gtf.gz
+      prefix:
+        - fusions
\ No newline at end of file
diff --git a/tools/test/bwa.yaml b/tools/test/bwa.yaml
new file mode 100644
index 000000000..55004379e
--- /dev/null
+++ b/tools/test/bwa.yaml
@@ -0,0 +1,35 @@
+bwa_aln:
+  - name: works
+    inputs: 
+      fastq:
+        - test_R1.fq.gz
+      read_group:
+        - "@RG\\tID:test\\tSM:test"
+      bwa_db_tar_gz:
+        - GRCh38.chrY_chrM.bwa_db.tar.gz
+bwa_aln_pe:
+  - name: works
+    inputs:
+      read_one_fastq_gz:
+        - test_R1.fq.gz
+      read_two_fastq_gz:
+        - test_R2.fq.gz
+      read_group:
+        - "@RG\\tID:test\\tSM:test"
+      bwa_db_tar_gz:
+        - GRCh38.chrY_chrM.bwa_db.tar.gz
+bwa_mem:
+  - name: works
+    inputs:
+      read_one_fastq_gz:
+        - test_R1.fq.gz
+      read_group:
+        - "@RG\\tID:test\\tSM:test"
+      bwa_db_tar_gz:
+        - GRCh38.chrY_chrM.bwa_db.tar.gz
+build_bwa_db:
+  - name: works
+    tags: [ reference, slow ]
+    inputs:
+      reference_fasta:
+        - GRCh38.chrY_chrM.fa
\ No newline at end of file
diff --git a/tools/test/deeptools.yaml b/tools/test/deeptools.yaml
new file mode 100644
index 000000000..5b71a1a26
--- /dev/null
+++ b/tools/test/deeptools.yaml
@@ -0,0 +1,7 @@
+bam_coverage:
+  - name: works
+    inputs:
+      bam:
+        - test.bwa_aln_pe.chrY_chrM.bam
+      bam_index:
+        - test.bwa_aln_pe.chrY_chrM.bam.bai
\ No newline at end of file

From 2fb9d234124ff6cbd3536a83defeede9a6dccc29 Mon Sep 17 00:00:00 2001
From: Ari Frantz <ari.frantz@stjude.org>
Date: Sun, 28 Dec 2025 12:11:51 -0500
Subject: [PATCH 18/47] WIP: remove broken CI elements

---
 .github/workflows/build-and-test.yaml |  3 --
 .github/workflows/pytest.yaml         | 56 ---------------------------
 2 files changed, 59 deletions(-)
 delete mode 100644 .github/workflows/pytest.yaml

diff --git a/.github/workflows/build-and-test.yaml b/.github/workflows/build-and-test.yaml
index 6d042f359..04c5a79bc 100644
--- a/.github/workflows/build-and-test.yaml
+++ b/.github/workflows/build-and-test.yaml
@@ -7,6 +7,3 @@ jobs:
     call-build:
         uses: ./.github/workflows/docker-build.yaml
         secrets: inherit
-    call-test:
-        uses: ./.github/workflows/pytest.yaml
-        needs: call-build
diff --git a/.github/workflows/pytest.yaml b/.github/workflows/pytest.yaml
deleted file mode 100644
index 64c0b310b..000000000
--- a/.github/workflows/pytest.yaml
+++ /dev/null
@@ -1,56 +0,0 @@
-name: pytest-workflow Check
-
-on:
-  workflow_call:
-
-jobs:
-  list-tags:
-    runs-on: ubuntu-latest
-    outputs:
-        tags: ${{ steps.set-tags.outputs.tags }}
-    steps:
-        - name: checkout
-          uses: actions/checkout@v4
-        - name: set tags
-          id: set-tags
-          # remove the "reference" tag as it's redundant with other tags
-          run: echo "tags=$(find tests -name '*.yaml' -exec yq --output-format yaml '.[].tags[] ' {} \; | sort | uniq | grep -vE 'reference|slow' | jq -ncR '[inputs]')" >> $GITHUB_OUTPUT
-  pytest_check:
-    needs: list-tags
-    runs-on: ubuntu-latest
-    strategy:
-      matrix:
-        tag: ${{ fromJson(needs.list-tags.outputs.tags) }}
-        runner: [sprocket, miniwdl]
-      fail-fast: false
-    steps:
-    - uses: actions/checkout@v4
-      with:
-        lfs: true
-    - name: Update Rust
-      if: matrix.runner == 'sprocket'
-      run: rustup update stable && rustup default stable
-    - name: Build Sprocket
-      if: matrix.runner == 'sprocket'
-      run: |
-        cargo install sprocket --locked
-    - name: Set up Python
-      uses: actions/setup-python@v5
-      with:
-        python-version: '3.10'
-    - name: Install miniwdl and pytest-workflow
-      run: |
-        python -m pip install --upgrade pip
-        pip install -r requirements-ci.txt
-    - name: filter tests
-      # don't run slow tests in CI
-      run: |
-        find tests -name '*.yaml' -exec yq --output-format yaml -i 'del(.[] | select(.tags[] | test("slow") ) )' {} \;
-    - name: Update containers
-      run: |
-        ./developer_scripts/update_container_tags.sh ${GITHUB_REF##*/}
-    - name: Run pytest-workflow
-      env: 
-        RUNNER: ${{ matrix.runner }}
-      run: |
-        pytest --basetemp /home/runner/work/pytest --tag ${{ matrix.tag }}

From d1e0d998d4b084a9de9002f147d3e6b219cbb08d Mon Sep 17 00:00:00 2001
From: Ari Frantz <ari.frantz@stjude.org>
Date: Mon, 29 Dec 2025 07:23:23 -0500
Subject: [PATCH 19/47] add test depth (more bams and fastqs tested)

---
 tools/test/arriba.yaml    | 13 +++++++------
 tools/test/bwa.yaml       | 17 +++++++++++++----
 tools/test/deeptools.yaml | 15 +++++++++++----
 3 files changed, 31 insertions(+), 14 deletions(-)

diff --git a/tools/test/arriba.yaml b/tools/test/arriba.yaml
index b274dd692..e55e9e9d2 100644
--- a/tools/test/arriba.yaml
+++ b/tools/test/arriba.yaml
@@ -23,12 +23,13 @@ arriba_tsv_to_vcf:
 arriba_extract_fusion_supporting_alignments:
   - name: works
     inputs:
-      fusions:
-        - fusions.BCR_ABL1.tsv
-      bam:
-        - Aligned.sortedByCoord.chr9_chr22.bam
-      bam_index:
-        - Aligned.sortedByCoord.chr9_chr22.bam.bai
+      $files:
+        fusions:
+          - fusions.BCR_ABL1.tsv
+        bam:
+          - Aligned.sortedByCoord.chr9_chr22.bam
+        bam_index:
+          - Aligned.sortedByCoord.chr9_chr22.bam.bai
       prefix:
         - fusions
 arriba_annotate_exon_numbers:
diff --git a/tools/test/bwa.yaml b/tools/test/bwa.yaml
index 55004379e..6dc3b3d9e 100644
--- a/tools/test/bwa.yaml
+++ b/tools/test/bwa.yaml
@@ -3,6 +3,9 @@ bwa_aln:
     inputs: 
       fastq:
         - test_R1.fq.gz
+        - test_R2.fq.gz
+        - random10k.r1.fq.gz
+        - random10k.r2.fq.gz
       read_group:
         - "@RG\\tID:test\\tSM:test"
       bwa_db_tar_gz:
@@ -10,10 +13,13 @@ bwa_aln:
 bwa_aln_pe:
   - name: works
     inputs:
-      read_one_fastq_gz:
-        - test_R1.fq.gz
-      read_two_fastq_gz:
-        - test_R2.fq.gz
+      $files:
+        read_one_fastq_gz:
+          - test_R1.fq.gz
+          - random10k.r1.fq.gz
+        read_two_fastq_gz:
+          - test_R2.fq.gz
+          - random10k.r2.fq.gz
       read_group:
         - "@RG\\tID:test\\tSM:test"
       bwa_db_tar_gz:
@@ -23,6 +29,9 @@ bwa_mem:
     inputs:
       read_one_fastq_gz:
         - test_R1.fq.gz
+        - test_R2.fq.gz
+        - random10k.r1.fq.gz
+        - random10k.r2.fq.gz
       read_group:
         - "@RG\\tID:test\\tSM:test"
       bwa_db_tar_gz:
diff --git a/tools/test/deeptools.yaml b/tools/test/deeptools.yaml
index 5b71a1a26..cba7fff3a 100644
--- a/tools/test/deeptools.yaml
+++ b/tools/test/deeptools.yaml
@@ -1,7 +1,14 @@
 bam_coverage:
   - name: works
     inputs:
-      bam:
-        - test.bwa_aln_pe.chrY_chrM.bam
-      bam_index:
-        - test.bwa_aln_pe.chrY_chrM.bam.bai
\ No newline at end of file
+      $files:
+        bam:
+          - test.bwa_aln_pe.chrY_chrM.bam
+          - Aligned.sortedByCoord.chr9_chr22.bam
+          - test_rnaseq_variant.bam
+          - test.bam
+        bam_index:
+          - test.bwa_aln_pe.chrY_chrM.bam.bai
+          - Aligned.sortedByCoord.chr9_chr22.bam.bai
+          - test_rnaseq_variant.bam.bai
+          - test.bam.bai
\ No newline at end of file

From 51ad295b4907811c5461f3a537e6fc9cd0aac643 Mon Sep 17 00:00:00 2001
From: Ari Frantz <ari.frantz@stjude.org>
Date: Mon, 29 Dec 2025 07:26:17 -0500
Subject: [PATCH 20/47] Update read_group.yaml

---
 data_structures/test/read_group.yaml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/data_structures/test/read_group.yaml b/data_structures/test/read_group.yaml
index d255bb66b..d91c53972 100644
--- a/data_structures/test/read_group.yaml
+++ b/data_structures/test/read_group.yaml
@@ -88,6 +88,9 @@ get_read_groups:
   - name: works
     inputs:
       bam:
+        - test.bwa_aln_pe.chrY_chrM.bam
+        - Aligned.sortedByCoord.chr9_chr22.bam
+        - test_rnaseq_variant.bam
         - test.bam
 
 # TODO: test read_group_to_string

From 2b7d0ddedf9e9f1dae67eaaf5b98f15a78de4168 Mon Sep 17 00:00:00 2001
From: Ari Frantz <ari.frantz@stjude.org>
Date: Mon, 29 Dec 2025 07:40:07 -0500
Subject: [PATCH 21/47] nesting for test fixtures

---
 data_structures/test/read_group.yaml          |   8 ++---
 .../Aligned.sortedByCoord.chr9_chr22.bam      |   0
 .../Aligned.sortedByCoord.chr9_chr22.bam.bai  |   0
 .../test.PE.2_RGs.Aligned.out.sorted.bam      |   0
 test/fixtures/{ => bams}/test.bam             |   0
 test/fixtures/{ => bams}/test.bam.bai         |   0
 .../{ => bams}/test.bwa_aln_pe.chrY_chrM.bam  |   0
 .../test.bwa_aln_pe.chrY_chrM.bam.bai         |   0
 test/fixtures/{ => bams}/test.extra_RG.bam    |   0
 .../{ => bams}/test.unaccounted_read.bam      |   0
 test/fixtures/{ => bams}/test2.bam            |   0
 .../{ => bams}/test_rnaseq_variant.bam        |   0
 .../{ => bams}/test_rnaseq_variant.bam.bai    |   0
 test/fixtures/{ => fastqs}/random10k.r1.fq.gz |   0
 test/fixtures/{ => fastqs}/random10k.r2.fq.gz |   0
 test/fixtures/{ => fastqs}/test_R1.fq.gz      |   0
 test/fixtures/{ => fastqs}/test_R2.fq.gz      |   0
 .../{ => reference}/GRCh38.chr1_chr19.dict    |   0
 .../{ => reference}/GRCh38.chr1_chr19.fa      |   0
 .../{ => reference}/GRCh38.chr1_chr19.fa.fai  |   0
 .../{ => reference}/GRCh38.chr9_chr22.fa.gz   |   0
 .../GRCh38.chrY_chrM.bwa_db.tar.gz            |   0
 .../{ => reference}/GRCh38.chrY_chrM.dict     |   0
 .../{ => reference}/GRCh38.chrY_chrM.fa       |   0
 .../{ => reference}/GRCh38.chrY_chrM.fa.fai   |   0
 .../gencode.v31.chr9_chr22.gtf.gz             |   0
 .../gencode.v31.chrY_chrM.gene.bed            |   0
 .../gencode.v31.chrY_chrM.genelengths.txt     |   0
 .../gencode.v31.chrY_chrM.gtf.gz              |   0
 .../kraken2_C_elegans_library.tar.gz          |   0
 .../{ => reference}/kraken2_db.mini.tar.gz    |   0
 .../{ => reference}/kraken2_taxonomy.tar.gz   |   0
 .../{ => reference}/star_db.chrY_chrM.tar.gz  |   0
 test/fixtures/{ => reference}/test.fa         |   0
 ...mo_sapiens_assembly38.dbsnp138.top5000.vcf |   0
 ...apiens_assembly38.dbsnp138.top5000.vcf.idx | Bin
 ...and_1000G_gold_standard.indels.hg38.vcf.gz |   0
 ...1000G_gold_standard.indels.hg38.vcf.gz.tbi | Bin
 test/fixtures/{ => vcfs}/test1.vcf.gz         |   0
 test/fixtures/{ => vcfs}/test1.vcf.gz.tbi     | Bin
 test/fixtures/{ => vcfs}/test2.vcf.gz         |   0
 test/fixtures/{ => vcfs}/test2.vcf.gz.tbi     | Bin
 tools/test/arriba.yaml                        |  14 ++++----
 tools/test/bwa.yaml                           |  32 +++++++++---------
 tools/test/deeptools.yaml                     |  16 ++++-----
 tools/test/picard.yaml                        |   2 +-
 tools/test/samtools.yaml                      |  12 +++----
 47 files changed, 42 insertions(+), 42 deletions(-)
 rename test/fixtures/{ => bams}/Aligned.sortedByCoord.chr9_chr22.bam (100%)
 rename test/fixtures/{ => bams}/Aligned.sortedByCoord.chr9_chr22.bam.bai (100%)
 rename test/fixtures/{ => bams}/test.PE.2_RGs.Aligned.out.sorted.bam (100%)
 rename test/fixtures/{ => bams}/test.bam (100%)
 rename test/fixtures/{ => bams}/test.bam.bai (100%)
 rename test/fixtures/{ => bams}/test.bwa_aln_pe.chrY_chrM.bam (100%)
 rename test/fixtures/{ => bams}/test.bwa_aln_pe.chrY_chrM.bam.bai (100%)
 rename test/fixtures/{ => bams}/test.extra_RG.bam (100%)
 rename test/fixtures/{ => bams}/test.unaccounted_read.bam (100%)
 rename test/fixtures/{ => bams}/test2.bam (100%)
 rename test/fixtures/{ => bams}/test_rnaseq_variant.bam (100%)
 rename test/fixtures/{ => bams}/test_rnaseq_variant.bam.bai (100%)
 rename test/fixtures/{ => fastqs}/random10k.r1.fq.gz (100%)
 rename test/fixtures/{ => fastqs}/random10k.r2.fq.gz (100%)
 rename test/fixtures/{ => fastqs}/test_R1.fq.gz (100%)
 rename test/fixtures/{ => fastqs}/test_R2.fq.gz (100%)
 rename test/fixtures/{ => reference}/GRCh38.chr1_chr19.dict (100%)
 rename test/fixtures/{ => reference}/GRCh38.chr1_chr19.fa (100%)
 rename test/fixtures/{ => reference}/GRCh38.chr1_chr19.fa.fai (100%)
 rename test/fixtures/{ => reference}/GRCh38.chr9_chr22.fa.gz (100%)
 rename test/fixtures/{ => reference}/GRCh38.chrY_chrM.bwa_db.tar.gz (100%)
 rename test/fixtures/{ => reference}/GRCh38.chrY_chrM.dict (100%)
 rename test/fixtures/{ => reference}/GRCh38.chrY_chrM.fa (100%)
 rename test/fixtures/{ => reference}/GRCh38.chrY_chrM.fa.fai (100%)
 rename test/fixtures/{ => reference}/gencode.v31.chr9_chr22.gtf.gz (100%)
 rename test/fixtures/{ => reference}/gencode.v31.chrY_chrM.gene.bed (100%)
 rename test/fixtures/{ => reference}/gencode.v31.chrY_chrM.genelengths.txt (100%)
 rename test/fixtures/{ => reference}/gencode.v31.chrY_chrM.gtf.gz (100%)
 rename test/fixtures/{ => reference}/kraken2_C_elegans_library.tar.gz (100%)
 rename test/fixtures/{ => reference}/kraken2_db.mini.tar.gz (100%)
 rename test/fixtures/{ => reference}/kraken2_taxonomy.tar.gz (100%)
 rename test/fixtures/{ => reference}/star_db.chrY_chrM.tar.gz (100%)
 rename test/fixtures/{ => reference}/test.fa (100%)
 rename test/fixtures/{ => vcfs}/Homo_sapiens_assembly38.dbsnp138.top5000.vcf (100%)
 rename test/fixtures/{ => vcfs}/Homo_sapiens_assembly38.dbsnp138.top5000.vcf.idx (100%)
 rename test/fixtures/{ => vcfs}/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz (100%)
 rename test/fixtures/{ => vcfs}/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi (100%)
 rename test/fixtures/{ => vcfs}/test1.vcf.gz (100%)
 rename test/fixtures/{ => vcfs}/test1.vcf.gz.tbi (100%)
 rename test/fixtures/{ => vcfs}/test2.vcf.gz (100%)
 rename test/fixtures/{ => vcfs}/test2.vcf.gz.tbi (100%)

diff --git a/data_structures/test/read_group.yaml b/data_structures/test/read_group.yaml
index d91c53972..b5257e9e0 100644
--- a/data_structures/test/read_group.yaml
+++ b/data_structures/test/read_group.yaml
@@ -88,9 +88,9 @@ get_read_groups:
   - name: works
     inputs:
       bam:
-        - test.bwa_aln_pe.chrY_chrM.bam
-        - Aligned.sortedByCoord.chr9_chr22.bam
-        - test_rnaseq_variant.bam
-        - test.bam
+        - bams/test.bwa_aln_pe.chrY_chrM.bam
+        - bams/Aligned.sortedByCoord.chr9_chr22.bam
+        - bams/test_rnaseq_variant.bam
+        - bams/test.bam
 
 # TODO: test read_group_to_string
diff --git a/test/fixtures/Aligned.sortedByCoord.chr9_chr22.bam b/test/fixtures/bams/Aligned.sortedByCoord.chr9_chr22.bam
similarity index 100%
rename from test/fixtures/Aligned.sortedByCoord.chr9_chr22.bam
rename to test/fixtures/bams/Aligned.sortedByCoord.chr9_chr22.bam
diff --git a/test/fixtures/Aligned.sortedByCoord.chr9_chr22.bam.bai b/test/fixtures/bams/Aligned.sortedByCoord.chr9_chr22.bam.bai
similarity index 100%
rename from test/fixtures/Aligned.sortedByCoord.chr9_chr22.bam.bai
rename to test/fixtures/bams/Aligned.sortedByCoord.chr9_chr22.bam.bai
diff --git a/test/fixtures/test.PE.2_RGs.Aligned.out.sorted.bam b/test/fixtures/bams/test.PE.2_RGs.Aligned.out.sorted.bam
similarity index 100%
rename from test/fixtures/test.PE.2_RGs.Aligned.out.sorted.bam
rename to test/fixtures/bams/test.PE.2_RGs.Aligned.out.sorted.bam
diff --git a/test/fixtures/test.bam b/test/fixtures/bams/test.bam
similarity index 100%
rename from test/fixtures/test.bam
rename to test/fixtures/bams/test.bam
diff --git a/test/fixtures/test.bam.bai b/test/fixtures/bams/test.bam.bai
similarity index 100%
rename from test/fixtures/test.bam.bai
rename to test/fixtures/bams/test.bam.bai
diff --git a/test/fixtures/test.bwa_aln_pe.chrY_chrM.bam b/test/fixtures/bams/test.bwa_aln_pe.chrY_chrM.bam
similarity index 100%
rename from test/fixtures/test.bwa_aln_pe.chrY_chrM.bam
rename to test/fixtures/bams/test.bwa_aln_pe.chrY_chrM.bam
diff --git a/test/fixtures/test.bwa_aln_pe.chrY_chrM.bam.bai b/test/fixtures/bams/test.bwa_aln_pe.chrY_chrM.bam.bai
similarity index 100%
rename from test/fixtures/test.bwa_aln_pe.chrY_chrM.bam.bai
rename to test/fixtures/bams/test.bwa_aln_pe.chrY_chrM.bam.bai
diff --git a/test/fixtures/test.extra_RG.bam b/test/fixtures/bams/test.extra_RG.bam
similarity index 100%
rename from test/fixtures/test.extra_RG.bam
rename to test/fixtures/bams/test.extra_RG.bam
diff --git a/test/fixtures/test.unaccounted_read.bam b/test/fixtures/bams/test.unaccounted_read.bam
similarity index 100%
rename from test/fixtures/test.unaccounted_read.bam
rename to test/fixtures/bams/test.unaccounted_read.bam
diff --git a/test/fixtures/test2.bam b/test/fixtures/bams/test2.bam
similarity index 100%
rename from test/fixtures/test2.bam
rename to test/fixtures/bams/test2.bam
diff --git a/test/fixtures/test_rnaseq_variant.bam b/test/fixtures/bams/test_rnaseq_variant.bam
similarity index 100%
rename from test/fixtures/test_rnaseq_variant.bam
rename to test/fixtures/bams/test_rnaseq_variant.bam
diff --git a/test/fixtures/test_rnaseq_variant.bam.bai b/test/fixtures/bams/test_rnaseq_variant.bam.bai
similarity index 100%
rename from test/fixtures/test_rnaseq_variant.bam.bai
rename to test/fixtures/bams/test_rnaseq_variant.bam.bai
diff --git a/test/fixtures/random10k.r1.fq.gz b/test/fixtures/fastqs/random10k.r1.fq.gz
similarity index 100%
rename from test/fixtures/random10k.r1.fq.gz
rename to test/fixtures/fastqs/random10k.r1.fq.gz
diff --git a/test/fixtures/random10k.r2.fq.gz b/test/fixtures/fastqs/random10k.r2.fq.gz
similarity index 100%
rename from test/fixtures/random10k.r2.fq.gz
rename to test/fixtures/fastqs/random10k.r2.fq.gz
diff --git a/test/fixtures/test_R1.fq.gz b/test/fixtures/fastqs/test_R1.fq.gz
similarity index 100%
rename from test/fixtures/test_R1.fq.gz
rename to test/fixtures/fastqs/test_R1.fq.gz
diff --git a/test/fixtures/test_R2.fq.gz b/test/fixtures/fastqs/test_R2.fq.gz
similarity index 100%
rename from test/fixtures/test_R2.fq.gz
rename to test/fixtures/fastqs/test_R2.fq.gz
diff --git a/test/fixtures/GRCh38.chr1_chr19.dict b/test/fixtures/reference/GRCh38.chr1_chr19.dict
similarity index 100%
rename from test/fixtures/GRCh38.chr1_chr19.dict
rename to test/fixtures/reference/GRCh38.chr1_chr19.dict
diff --git a/test/fixtures/GRCh38.chr1_chr19.fa b/test/fixtures/reference/GRCh38.chr1_chr19.fa
similarity index 100%
rename from test/fixtures/GRCh38.chr1_chr19.fa
rename to test/fixtures/reference/GRCh38.chr1_chr19.fa
diff --git a/test/fixtures/GRCh38.chr1_chr19.fa.fai b/test/fixtures/reference/GRCh38.chr1_chr19.fa.fai
similarity index 100%
rename from test/fixtures/GRCh38.chr1_chr19.fa.fai
rename to test/fixtures/reference/GRCh38.chr1_chr19.fa.fai
diff --git a/test/fixtures/GRCh38.chr9_chr22.fa.gz b/test/fixtures/reference/GRCh38.chr9_chr22.fa.gz
similarity index 100%
rename from test/fixtures/GRCh38.chr9_chr22.fa.gz
rename to test/fixtures/reference/GRCh38.chr9_chr22.fa.gz
diff --git a/test/fixtures/GRCh38.chrY_chrM.bwa_db.tar.gz b/test/fixtures/reference/GRCh38.chrY_chrM.bwa_db.tar.gz
similarity index 100%
rename from test/fixtures/GRCh38.chrY_chrM.bwa_db.tar.gz
rename to test/fixtures/reference/GRCh38.chrY_chrM.bwa_db.tar.gz
diff --git a/test/fixtures/GRCh38.chrY_chrM.dict b/test/fixtures/reference/GRCh38.chrY_chrM.dict
similarity index 100%
rename from test/fixtures/GRCh38.chrY_chrM.dict
rename to test/fixtures/reference/GRCh38.chrY_chrM.dict
diff --git a/test/fixtures/GRCh38.chrY_chrM.fa b/test/fixtures/reference/GRCh38.chrY_chrM.fa
similarity index 100%
rename from test/fixtures/GRCh38.chrY_chrM.fa
rename to test/fixtures/reference/GRCh38.chrY_chrM.fa
diff --git a/test/fixtures/GRCh38.chrY_chrM.fa.fai b/test/fixtures/reference/GRCh38.chrY_chrM.fa.fai
similarity index 100%
rename from test/fixtures/GRCh38.chrY_chrM.fa.fai
rename to test/fixtures/reference/GRCh38.chrY_chrM.fa.fai
diff --git a/test/fixtures/gencode.v31.chr9_chr22.gtf.gz b/test/fixtures/reference/gencode.v31.chr9_chr22.gtf.gz
similarity index 100%
rename from test/fixtures/gencode.v31.chr9_chr22.gtf.gz
rename to test/fixtures/reference/gencode.v31.chr9_chr22.gtf.gz
diff --git a/test/fixtures/gencode.v31.chrY_chrM.gene.bed b/test/fixtures/reference/gencode.v31.chrY_chrM.gene.bed
similarity index 100%
rename from test/fixtures/gencode.v31.chrY_chrM.gene.bed
rename to test/fixtures/reference/gencode.v31.chrY_chrM.gene.bed
diff --git a/test/fixtures/gencode.v31.chrY_chrM.genelengths.txt b/test/fixtures/reference/gencode.v31.chrY_chrM.genelengths.txt
similarity index 100%
rename from test/fixtures/gencode.v31.chrY_chrM.genelengths.txt
rename to test/fixtures/reference/gencode.v31.chrY_chrM.genelengths.txt
diff --git a/test/fixtures/gencode.v31.chrY_chrM.gtf.gz b/test/fixtures/reference/gencode.v31.chrY_chrM.gtf.gz
similarity index 100%
rename from test/fixtures/gencode.v31.chrY_chrM.gtf.gz
rename to test/fixtures/reference/gencode.v31.chrY_chrM.gtf.gz
diff --git a/test/fixtures/kraken2_C_elegans_library.tar.gz b/test/fixtures/reference/kraken2_C_elegans_library.tar.gz
similarity index 100%
rename from test/fixtures/kraken2_C_elegans_library.tar.gz
rename to test/fixtures/reference/kraken2_C_elegans_library.tar.gz
diff --git a/test/fixtures/kraken2_db.mini.tar.gz b/test/fixtures/reference/kraken2_db.mini.tar.gz
similarity index 100%
rename from test/fixtures/kraken2_db.mini.tar.gz
rename to test/fixtures/reference/kraken2_db.mini.tar.gz
diff --git a/test/fixtures/kraken2_taxonomy.tar.gz b/test/fixtures/reference/kraken2_taxonomy.tar.gz
similarity index 100%
rename from test/fixtures/kraken2_taxonomy.tar.gz
rename to test/fixtures/reference/kraken2_taxonomy.tar.gz
diff --git a/test/fixtures/star_db.chrY_chrM.tar.gz b/test/fixtures/reference/star_db.chrY_chrM.tar.gz
similarity index 100%
rename from test/fixtures/star_db.chrY_chrM.tar.gz
rename to test/fixtures/reference/star_db.chrY_chrM.tar.gz
diff --git a/test/fixtures/test.fa b/test/fixtures/reference/test.fa
similarity index 100%
rename from test/fixtures/test.fa
rename to test/fixtures/reference/test.fa
diff --git a/test/fixtures/Homo_sapiens_assembly38.dbsnp138.top5000.vcf b/test/fixtures/vcfs/Homo_sapiens_assembly38.dbsnp138.top5000.vcf
similarity index 100%
rename from test/fixtures/Homo_sapiens_assembly38.dbsnp138.top5000.vcf
rename to test/fixtures/vcfs/Homo_sapiens_assembly38.dbsnp138.top5000.vcf
diff --git a/test/fixtures/Homo_sapiens_assembly38.dbsnp138.top5000.vcf.idx b/test/fixtures/vcfs/Homo_sapiens_assembly38.dbsnp138.top5000.vcf.idx
similarity index 100%
rename from test/fixtures/Homo_sapiens_assembly38.dbsnp138.top5000.vcf.idx
rename to test/fixtures/vcfs/Homo_sapiens_assembly38.dbsnp138.top5000.vcf.idx
diff --git a/test/fixtures/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz b/test/fixtures/vcfs/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz
similarity index 100%
rename from test/fixtures/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz
rename to test/fixtures/vcfs/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz
diff --git a/test/fixtures/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi b/test/fixtures/vcfs/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi
similarity index 100%
rename from test/fixtures/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi
rename to test/fixtures/vcfs/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi
diff --git a/test/fixtures/test1.vcf.gz b/test/fixtures/vcfs/test1.vcf.gz
similarity index 100%
rename from test/fixtures/test1.vcf.gz
rename to test/fixtures/vcfs/test1.vcf.gz
diff --git a/test/fixtures/test1.vcf.gz.tbi b/test/fixtures/vcfs/test1.vcf.gz.tbi
similarity index 100%
rename from test/fixtures/test1.vcf.gz.tbi
rename to test/fixtures/vcfs/test1.vcf.gz.tbi
diff --git a/test/fixtures/test2.vcf.gz b/test/fixtures/vcfs/test2.vcf.gz
similarity index 100%
rename from test/fixtures/test2.vcf.gz
rename to test/fixtures/vcfs/test2.vcf.gz
diff --git a/test/fixtures/test2.vcf.gz.tbi b/test/fixtures/vcfs/test2.vcf.gz.tbi
similarity index 100%
rename from test/fixtures/test2.vcf.gz.tbi
rename to test/fixtures/vcfs/test2.vcf.gz.tbi
diff --git a/tools/test/arriba.yaml b/tools/test/arriba.yaml
index e55e9e9d2..e96b4d8ce 100644
--- a/tools/test/arriba.yaml
+++ b/tools/test/arriba.yaml
@@ -2,11 +2,11 @@ arriba:
   - name: works
     inputs:
       bam:
-        - Aligned.sortedByCoord.chr9_chr22.bam
+        - bams/Aligned.sortedByCoord.chr9_chr22.bam
       gtf:
-        - gencode.v31.chr9_chr22.gtf.gz
+        - reference/gencode.v31.chr9_chr22.gtf.gz
       reference_fasta_gz:
-        - GRCh38.chr9_chr22.fa.gz
+        - reference/GRCh38.chr9_chr22.fa.gz
       disable_filters:
         - [ blacklist ]
       prefix:
@@ -17,7 +17,7 @@ arriba_tsv_to_vcf:
       fusions:
         - fusions.BCR_ABL1.tsv
       reference_fasta:
-        - GRCh38.chr9_chr22.fa.gz
+        - reference/GRCh38.chr9_chr22.fa.gz
       prefix:
         - fusions
 arriba_extract_fusion_supporting_alignments:
@@ -27,9 +27,9 @@ arriba_extract_fusion_supporting_alignments:
         fusions:
           - fusions.BCR_ABL1.tsv
         bam:
-          - Aligned.sortedByCoord.chr9_chr22.bam
+          - bams/Aligned.sortedByCoord.chr9_chr22.bam
         bam_index:
-          - Aligned.sortedByCoord.chr9_chr22.bam.bai
+          - bams/Aligned.sortedByCoord.chr9_chr22.bam.bai
       prefix:
         - fusions
 arriba_annotate_exon_numbers:
@@ -38,6 +38,6 @@ arriba_annotate_exon_numbers:
       fusions:
         - fusions.BCR_ABL1.tsv
       gtf:
-        - gencode.v31.chr9_chr22.gtf.gz
+        - reference/gencode.v31.chr9_chr22.gtf.gz
       prefix:
         - fusions
\ No newline at end of file
diff --git a/tools/test/bwa.yaml b/tools/test/bwa.yaml
index 6dc3b3d9e..bc05f3524 100644
--- a/tools/test/bwa.yaml
+++ b/tools/test/bwa.yaml
@@ -2,43 +2,43 @@ bwa_aln:
   - name: works
     inputs: 
       fastq:
-        - test_R1.fq.gz
-        - test_R2.fq.gz
-        - random10k.r1.fq.gz
-        - random10k.r2.fq.gz
+        - fastqs/test_R1.fq.gz
+        - fastqs/test_R2.fq.gz
+        - fastqs/random10k.r1.fq.gz
+        - fastqs/random10k.r2.fq.gz
       read_group:
         - "@RG\\tID:test\\tSM:test"
       bwa_db_tar_gz:
-        - GRCh38.chrY_chrM.bwa_db.tar.gz
+        - reference/GRCh38.chrY_chrM.bwa_db.tar.gz
 bwa_aln_pe:
   - name: works
     inputs:
       $files:
         read_one_fastq_gz:
-          - test_R1.fq.gz
-          - random10k.r1.fq.gz
+          - fastqs/test_R1.fq.gz
+          - fastqs/random10k.r1.fq.gz
         read_two_fastq_gz:
-          - test_R2.fq.gz
-          - random10k.r2.fq.gz
+          - fastqs/test_R2.fq.gz
+          - fastqs/random10k.r2.fq.gz
       read_group:
         - "@RG\\tID:test\\tSM:test"
       bwa_db_tar_gz:
-        - GRCh38.chrY_chrM.bwa_db.tar.gz
+        - reference/GRCh38.chrY_chrM.bwa_db.tar.gz
 bwa_mem:
   - name: works
     inputs:
       read_one_fastq_gz:
-        - test_R1.fq.gz
-        - test_R2.fq.gz
-        - random10k.r1.fq.gz
-        - random10k.r2.fq.gz
+        - fastqs/test_R1.fq.gz
+        - fastqs/test_R2.fq.gz
+        - fastqs/random10k.r1.fq.gz
+        - fastqs/random10k.r2.fq.gz
       read_group:
         - "@RG\\tID:test\\tSM:test"
       bwa_db_tar_gz:
-        - GRCh38.chrY_chrM.bwa_db.tar.gz
+        - reference/GRCh38.chrY_chrM.bwa_db.tar.gz
 build_bwa_db:
   - name: works
     tags: [ reference, slow ]
     inputs:
       reference_fasta:
-        - GRCh38.chrY_chrM.fa
\ No newline at end of file
+        - reference/GRCh38.chrY_chrM.fa
\ No newline at end of file
diff --git a/tools/test/deeptools.yaml b/tools/test/deeptools.yaml
index cba7fff3a..e44caf285 100644
--- a/tools/test/deeptools.yaml
+++ b/tools/test/deeptools.yaml
@@ -3,12 +3,12 @@ bam_coverage:
     inputs:
       $files:
         bam:
-          - test.bwa_aln_pe.chrY_chrM.bam
-          - Aligned.sortedByCoord.chr9_chr22.bam
-          - test_rnaseq_variant.bam
-          - test.bam
+          - bams/test.bwa_aln_pe.chrY_chrM.bam
+          - bams/Aligned.sortedByCoord.chr9_chr22.bam
+          - bams/test_rnaseq_variant.bam
+          - bams/test.bam
         bam_index:
-          - test.bwa_aln_pe.chrY_chrM.bam.bai
-          - Aligned.sortedByCoord.chr9_chr22.bam.bai
-          - test_rnaseq_variant.bam.bai
-          - test.bam.bai
\ No newline at end of file
+          - bams/test.bwa_aln_pe.chrY_chrM.bam.bai
+          - bams/Aligned.sortedByCoord.chr9_chr22.bam.bai
+          - bams/test_rnaseq_variant.bam.bai
+          - bams/test.bam.bai
\ No newline at end of file
diff --git a/tools/test/picard.yaml b/tools/test/picard.yaml
index 7d031e5dd..1c9f80721 100644
--- a/tools/test/picard.yaml
+++ b/tools/test/picard.yaml
@@ -2,7 +2,7 @@ merge_sam_files:
   - name: Merge works
     inputs:
       bams:
-          - [test.bwa_aln_pe.chrY_chrM.bam, test.PE.2_RGs.Aligned.out.sorted.bam]
+          - [bams/test.bwa_aln_pe.chrY_chrM.bam, bams/test.PE.2_RGs.Aligned.out.sorted.bam]
       prefix:
           - test.merged
     assertions:
diff --git a/tools/test/samtools.yaml b/tools/test/samtools.yaml
index feca44f2d..52a5de1b1 100644
--- a/tools/test/samtools.yaml
+++ b/tools/test/samtools.yaml
@@ -2,9 +2,9 @@ bam_to_fastq:
   - name: kitchen_sink
     inputs:
       bam:
-        - Aligned.sortedByCoord.chr9_chr22.bam
-        - test_rnaseq_variant.bam
-        - test.bwa_aln_pe.chrY_chrM.bam
+        - bams/Aligned.sortedByCoord.chr9_chr22.bam
+        - bams/test_rnaseq_variant.bam
+        - bams/test.bwa_aln_pe.chrY_chrM.bam
       bitwise_filter:
         - include_if_all: "0x0"
           exclude_if_any: "0x900"
@@ -34,6 +34,6 @@ bam_to_fastq:
         - true
         - false
       bam:
-        - Aligned.sortedByCoord.chr9_chr22.bam
-        - test_rnaseq_variant.bam
-        - test.bwa_aln_pe.chrY_chrM.bam
+        - bams/Aligned.sortedByCoord.chr9_chr22.bam
+        - bams/test_rnaseq_variant.bam
+        - bams/test.bwa_aln_pe.chrY_chrM.bam

From b3b9c2702a1f3b3b470cc078ac931f3b599d0236 Mon Sep 17 00:00:00 2001
From: Ari Frantz <ari.frantz@stjude.org>
Date: Mon, 29 Dec 2025 10:27:29 -0500
Subject: [PATCH 22/47] deprecate fastqc

---
 tools/fastqc.wdl | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tools/fastqc.wdl b/tools/fastqc.wdl
index a79de80b8..cf6fe238c 100755
--- a/tools/fastqc.wdl
+++ b/tools/fastqc.wdl
@@ -5,6 +5,8 @@ version 1.1
 task fastqc {
     meta {
         description: "Generates a FastQC quality control metrics report for the input BAM file"
+        warning: "**[DEPRECATED]** We prefer the analysis provided by `fastp` which computes similar metrics but is faster and more robust. Please see the `fastp` task in `fastp.wdl` instead of using FastQC!"
+        deprecated: true
         outputs: {
             raw_data: "A zip archive of raw FastQC data. Can be parsed by MultiQC.",
             results: "A gzipped tar archive of all FastQC output files",

From 038909d57b539665a829f7c97756886483b31bde Mon Sep 17 00:00:00 2001
From: Ari Frantz <ari.frantz@stjude.org>
Date: Mon, 29 Dec 2025 10:27:38 -0500
Subject: [PATCH 23/47] log TODO

---
 tools/test/arriba.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/test/arriba.yaml b/tools/test/arriba.yaml
index e96b4d8ce..8f51f6456 100644
--- a/tools/test/arriba.yaml
+++ b/tools/test/arriba.yaml
@@ -1,3 +1,4 @@
+# TODO: test advanced options
 arriba:
   - name: works
     inputs:

From 77271b361496d1cf2bc137e8910acd17ec902432 Mon Sep 17 00:00:00 2001
From: Ari Frantz <ari.frantz@stjude.org>
Date: Mon, 29 Dec 2025 10:28:04 -0500
Subject: [PATCH 24/47] tests for fastp and fq

---
 tools/test/fastp.yaml | 11 +++++++++
 tools/test/fq.yaml    | 56 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 67 insertions(+)
 create mode 100644 tools/test/fastp.yaml
 create mode 100644 tools/test/fq.yaml

diff --git a/tools/test/fastp.yaml b/tools/test/fastp.yaml
new file mode 100644
index 000000000..851d0ce33
--- /dev/null
+++ b/tools/test/fastp.yaml
@@ -0,0 +1,11 @@
+# TODO: test advanced options
+fastp:
+  - name: works
+    inputs:
+      $files:
+        read_one_fastq:
+          - fastqs/test_R1.fq.gz
+          - fastqs/random10k.r1.fq.gz
+        read_two_fastq:
+          - fastqs/test_R2.fq.gz
+          - fastqs/random10k.r2.fq.gz
\ No newline at end of file
diff --git a/tools/test/fq.yaml b/tools/test/fq.yaml
new file mode 100644
index 000000000..9f5fc6095
--- /dev/null
+++ b/tools/test/fq.yaml
@@ -0,0 +1,56 @@
+# TODO: add lint tests for malformed fastqs
+fqlint:
+  - name: valid_fastqs
+    inputs:
+      $files:
+        read_one_fastq:
+          - fastqs/test_R1.fq.gz
+          - fastqs/random10k.r1.fq.gz
+        read_two_fastq:
+          - fastqs/test_R2.fq.gz
+          - fastqs/random10k.r2.fq.gz
+subsample:
+  - name: works
+    inputs:
+      $files:
+        read_one_fastq:
+          - fastqs/test_R1.fq.gz
+          - fastqs/random10k.r1.fq.gz
+        read_two_fastq:
+          - fastqs/test_R2.fq.gz
+          - fastqs/random10k.r2.fq.gz
+      $sampling_controls:
+        record_count:
+          - 1000
+          - 500
+          - -1 # negative should disable
+          - 0 # so should zero
+        probability:
+          - 0.0 # 0 should disable
+          - 1.0 # 1 should also disable
+          - 0.5
+          - 0.25
+  - name: conflicting_args
+    inputs:
+      $files:
+        read_one_fastq:
+          - fastqs/random10k.r1.fq.gz
+        read_two_fastq:
+          - fastqs/random10k.r2.fq.gz
+      record_count:
+        - -1
+        - 0
+      probability:
+        - 0.0
+        - 1.0
+    assertions:
+      exit_code: 2
+  - name: neither_count_nor_probability_specified
+    inputs:
+      $files:
+        read_one_fastq:
+          - fastqs/random10k.r1.fq.gz
+        read_two_fastq:
+          - fastqs/random10k.r2.fq.gz
+    assertions:
+      exit_code: 2

From 5ed086613d0dd5eae4687c61fb32a8fc6843b420 Mon Sep 17 00:00:00 2001
From: Ari Frantz <ari.frantz@stjude.org>
Date: Mon, 29 Dec 2025 10:58:11 -0500
Subject: [PATCH 25/47] mv reference vcfs to right dir

---
 .../Homo_sapiens_assembly38.dbsnp138.top5000.vcf    |   0
 ...Homo_sapiens_assembly38.dbsnp138.top5000.vcf.idx | Bin
 ...Mills_and_1000G_gold_standard.indels.hg38.vcf.gz |   0
 ...s_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi | Bin
 4 files changed, 0 insertions(+), 0 deletions(-)
 rename test/fixtures/{vcfs => reference}/Homo_sapiens_assembly38.dbsnp138.top5000.vcf (100%)
 rename test/fixtures/{vcfs => reference}/Homo_sapiens_assembly38.dbsnp138.top5000.vcf.idx (100%)
 rename test/fixtures/{vcfs => reference}/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz (100%)
 rename test/fixtures/{vcfs => reference}/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi (100%)

diff --git a/test/fixtures/vcfs/Homo_sapiens_assembly38.dbsnp138.top5000.vcf b/test/fixtures/reference/Homo_sapiens_assembly38.dbsnp138.top5000.vcf
similarity index 100%
rename from test/fixtures/vcfs/Homo_sapiens_assembly38.dbsnp138.top5000.vcf
rename to test/fixtures/reference/Homo_sapiens_assembly38.dbsnp138.top5000.vcf
diff --git a/test/fixtures/vcfs/Homo_sapiens_assembly38.dbsnp138.top5000.vcf.idx b/test/fixtures/reference/Homo_sapiens_assembly38.dbsnp138.top5000.vcf.idx
similarity index 100%
rename from test/fixtures/vcfs/Homo_sapiens_assembly38.dbsnp138.top5000.vcf.idx
rename to test/fixtures/reference/Homo_sapiens_assembly38.dbsnp138.top5000.vcf.idx
diff --git a/test/fixtures/vcfs/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz b/test/fixtures/reference/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz
similarity index 100%
rename from test/fixtures/vcfs/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz
rename to test/fixtures/reference/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz
diff --git a/test/fixtures/vcfs/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi b/test/fixtures/reference/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi
similarity index 100%
rename from test/fixtures/vcfs/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi
rename to test/fixtures/reference/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi

From 8a186b64a95f38074dd51b518aefcee1e27b70bc Mon Sep 17 00:00:00 2001
From: Ari Frantz <ari.frantz@stjude.org>
Date: Mon, 29 Dec 2025 11:34:13 -0500
Subject: [PATCH 26/47] Create gatk4.yaml

---
 tools/test/gatk4.yaml | 100 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 100 insertions(+)
 create mode 100644 tools/test/gatk4.yaml

diff --git a/tools/test/gatk4.yaml b/tools/test/gatk4.yaml
new file mode 100644
index 000000000..73bd4fe9d
--- /dev/null
+++ b/tools/test/gatk4.yaml
@@ -0,0 +1,100 @@
+apply_bqsr:
+  - name: works
+    inputs:
+      $files:
+        bam:
+          - bams/test_rnaseq_variant.bam
+        bam_index:
+          - bams/test_rnaseq_variant.bam.bai
+        recalibration_report:
+          - test_rnaseq_variant.recal.txt
+base_recalibrator:
+  - name: works
+    inputs:
+      $sample:
+        bam:
+          - bams/test_rnaseq_variant.bam
+        bam_index:
+          - bams/test_rnaseq_variant.bam.bai
+      $reference:
+        fasta:
+          - reference/GRCh38.chr1_chr19.fa
+        fasta_index:
+          - reference/GRCh38.chr1_chr19.fa.fai
+        dict:
+          - reference/GRCh38.chr1_chr19.dict
+      $dbsnp:
+        dbSNP_vcf: 
+          - reference/Homo_sapiens_assembly38.dbsnp138.top5000.vcf
+        dbSNP_vcf_index:
+          - reference/Homo_sapiens_assembly38.dbsnp138.top5000.vcf.idx
+      $known_indels:
+        known_indels_sites_vcfs:
+          - [ reference/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz ]
+        known_indels_sites_indices:
+          - [ reference/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi ]
+haplotype_caller:
+  - name: works
+    tags: [ slow ]
+    inputs:
+      $sample:
+        bam:
+          - bams/test_rnaseq_variant.bam
+        bam_index:
+          - bams/test_rnaseq_variant.bam.bai
+      $reference:
+        fasta:
+          - reference/GRCh38.chr1_chr19.fa
+        fasta_index:
+          - reference/GRCh38.chr1_chr19.fa.fai
+        dict:
+          - reference/GRCh38.chr1_chr19.dict
+      $dbsnp:
+        dbSNP_vcf: 
+          - reference/Homo_sapiens_assembly38.dbsnp138.top5000.vcf
+        dbSNP_vcf_index:
+          - reference/Homo_sapiens_assembly38.dbsnp138.top5000.vcf.idx
+      interval_list:
+        - chr1_chr19.interval_list
+split_n_cigar_reads:
+  - name: works
+    inputs:
+      $sample:
+        bam:
+          - bams/test_rnaseq_variant.bam
+        bam_index:
+          - bams/test_rnaseq_variant.bam.bai
+      $reference:
+        fasta:
+          - reference/GRCh38.chr1_chr19.fa
+        fasta_index:
+          - reference/GRCh38.chr1_chr19.fa.fai
+        dict:
+          - reference/GRCh38.chr1_chr19.dict
+      prefix:
+        - split
+variant_filtration:
+  - name: works
+    inputs:
+      $sample:
+        vcf:
+          - vcfs/test1.vcf.gz
+          - vcfs/test2.vcf.gz
+        vcf_index:
+          - vcfs/test1.vcf.gz.tbi
+          - vcfs/test2.vcf.gz.tbi
+      $reference:
+        fasta:
+          - reference/GRCh38.chr1_chr19.fa
+        fasta_index:
+          - reference/GRCh38.chr1_chr19.fa.fai
+        dict:
+          - reference/GRCh38.chr1_chr19.dict
+mark_duplicates_spark:
+  - name: works
+    inputs:
+      bam:
+        - bams/test.bwa_aln_pe.chrY_chrM.bam
+        - bams/Aligned.sortedByCoord.chr9_chr22.bam
+        - bams/test_rnaseq_variant.bam
+        - bams/test.bam
\ No newline at end of file

From 52da94cb055553ea3e6f0e821a682e6ddcba75c5 Mon Sep 17 00:00:00 2001
From: Ari Frantz <ari.frantz@stjude.org>
Date: Mon, 29 Dec 2025 14:31:25 -0500
Subject: [PATCH 27/47] WIP

---
 .gitignore            |  1 +
 tools/htseq.wdl       | 15 +++------------
 tools/test/gatk4.yaml |  1 +
 tools/test/htseq.yaml | 37 +++++++++++++++++++++++++++++++++++++
 4 files changed, 42 insertions(+), 12 deletions(-)
 create mode 100644 tools/test/htseq.yaml

diff --git a/.gitignore b/.gitignore
index c1ff16be3..2bab65b34 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
+pytest/
 # Ignore common bioinformatics formats used in these workflows.
 # But only if they are in the root of this repo
 /*.fastq.gz
diff --git a/tools/htseq.wdl b/tools/htseq.wdl
index 95d604e02..bcaca9ce0 100755
--- a/tools/htseq.wdl
+++ b/tools/htseq.wdl
@@ -34,15 +34,6 @@ task count {
             description: "GFF attribute to be used as feature ID",
             group: "Common",
         }
-        mode: {
-            description: "Mode to handle reads overlapping more than one feature. `union` is recommended for most use-cases.",
-            external_help: "https://htseq.readthedocs.io/en/latest/htseqcount.html#htseq-count-counting-reads-within-features",
-            choices: [
-                "union",
-                "intersection-strict",
-                "intersection-nonempty",
-            ],
-        }
         include_custom_header: {
             description: "Include a custom header for the output file? If true, the first line of the output file will be `~{idattr}\t~{prefix}`.",
             warning: "This is not an official feature of HTSeq. This may break downstream tools that expect the typical headerless HTSeq output format.",
@@ -80,7 +71,6 @@ task count {
         String prefix = basename(bam, ".bam")
         String feature_type = "exon"
         String idattr = "gene_name"
-        String mode = "union"
         Boolean include_custom_header = true
         Boolean pos_sorted = false
         Boolean nonunique = false
@@ -93,6 +83,9 @@ task count {
 
     String outfile_name = prefix + ".feature-counts.txt"
 
+    # the docs recommend this for most use cases, so we hardcode
+    String mode = "union"
+
     Float bam_size = size(bam, "GiB")
     Float gtf_size = size(gtf, "GiB")
 
@@ -107,8 +100,6 @@ task count {
 
         if ~{include_custom_header}; then
             echo -e "~{idattr}\t~{prefix}" > "~{outfile_name}"
-        else
-            true > "~{outfile_name}"  # ensure file is empty
         fi
 
         # 9223372036854776000 == max 64 bit Float
diff --git a/tools/test/gatk4.yaml b/tools/test/gatk4.yaml
index 73bd4fe9d..bfb256ae6 100644
--- a/tools/test/gatk4.yaml
+++ b/tools/test/gatk4.yaml
@@ -1,3 +1,4 @@
+# TODO: advanced options
 apply_bqsr:
   - name: works
     inputs:
diff --git a/tools/test/htseq.yaml b/tools/test/htseq.yaml
new file mode 100644
index 000000000..1a2356883
--- /dev/null
+++ b/tools/test/htseq.yaml
@@ -0,0 +1,37 @@
+count:
+  - name: kitchen_sink
+    tags: [ slow ]
+    inputs:
+      $files:
+        bam:
+          - bams/test.bwa_aln_pe.chrY_chrM.bam
+        gtf:
+          - reference/gencode.v31.chrY_chrM.gtf.gz
+        pos_sorted:
+          - false
+      strandedness:
+        - yes
+        - no
+        - reverse
+      include_custom_header:
+        - true
+        - false
+      nonunique:
+        - true
+        - false
+      secondary_alignments:
+        - true
+        - false
+      supplementary_alignments:
+        - true
+        - false
+      minaqual:
+        - 0
+        - 10
+calc_tpm:
+  - name: works
+    inputs:
+      counts:
+        - test.bwa_aln_pe.chrY_chrM.feature-counts.txt
+      feature_lengths:
+        - reference/gencode.v31.chrY_chrM.genelengths.txt
\ No newline at end of file

From 8658e212da835e8437c332eb1a3e2f987152bec6 Mon Sep 17 00:00:00 2001
From: Ari Frantz <ari.frantz@stjude.org>
Date: Sat, 3 Jan 2026 11:28:20 -0500
Subject: [PATCH 28/47] Update flag_filter.yaml

---
 data_structures/test/flag_filter.yaml | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/data_structures/test/flag_filter.yaml b/data_structures/test/flag_filter.yaml
index fb6a55927..cd64d0cca 100644
--- a/data_structures/test/flag_filter.yaml
+++ b/data_structures/test/flag_filter.yaml
@@ -6,6 +6,8 @@ validate_string_is_12bit_int:
         - "0x900"
         - "01"
         - "4095"
+        - "0"
+        - "072"
     assertions:
       stderr:
         - Input number \(.*\) is valid
@@ -14,10 +16,11 @@ validate_string_is_12bit_int:
       number:
         - "0x1000"
         - ""
-        - "string"
+        - string
         - this is not a number
         - "000000000011"
         - "-1"
+        - "08"
     assertions:
       exit_code: 42
       stderr:
@@ -26,6 +29,7 @@ validate_string_is_12bit_int:
     inputs:
       number:
         - "4096"
+        - "9999"
     assertions:
       exit_code: 42
       stderr:

From 275865919980ee0b24f91b8b7239e5de66dc29df Mon Sep 17 00:00:00 2001
From: Ari Frantz <ari.frantz@stjude.org>
Date: Sun, 4 Jan 2026 11:41:04 -0500
Subject: [PATCH 29/47] fix: SE works in addition to PE

---
 tools/fq.wdl          |  2 +-
 tools/test/bwa.yaml   | 16 +++++++++++-----
 tools/test/fastp.yaml |  6 +++++-
 tools/test/fq.yaml    |  8 ++++++++
 4 files changed, 25 insertions(+), 7 deletions(-)

diff --git a/tools/fq.wdl b/tools/fq.wdl
index 55148a99d..e5eab4763 100755
--- a/tools/fq.wdl
+++ b/tools/fq.wdl
@@ -154,7 +154,7 @@ task subsample {
             ~{probability_arg} \
             ~{record_count_arg} \
             --r1-dst "~{r1_dst}" \
-            ~{"--r2-dst '" + r2_dst + "'"} \
+            ~{if defined(read_two_fastq) then "--r2-dst '" + r2_dst + "'" else ""} \
             "~{read_one_fastq}" \
             ~{"'" + read_two_fastq + "'"}
     >>>
diff --git a/tools/test/bwa.yaml b/tools/test/bwa.yaml
index bc05f3524..f5e1d7c15 100644
--- a/tools/test/bwa.yaml
+++ b/tools/test/bwa.yaml
@@ -27,11 +27,17 @@ bwa_aln_pe:
 bwa_mem:
   - name: works
     inputs:
-      read_one_fastq_gz:
-        - fastqs/test_R1.fq.gz
-        - fastqs/test_R2.fq.gz
-        - fastqs/random10k.r1.fq.gz
-        - fastqs/random10k.r2.fq.gz
+      $samples:
+        read_one_fastq_gz:
+          - fastqs/test_R1.fq.gz
+          - fastqs/test_R2.fq.gz
+          - fastqs/random10k.r1.fq.gz
+          - fastqs/random10k.r2.fq.gz
+        read_two_fastq_gz:
+          - fastqs/test_R2.fq.gz
+          - null
+          - fastqs/random10k.r2.fq.gz
+          - null
       read_group:
         - "@RG\\tID:test\\tSM:test"
       bwa_db_tar_gz:
diff --git a/tools/test/fastp.yaml b/tools/test/fastp.yaml
index 851d0ce33..272624876 100644
--- a/tools/test/fastp.yaml
+++ b/tools/test/fastp.yaml
@@ -6,6 +6,10 @@ fastp:
         read_one_fastq:
           - fastqs/test_R1.fq.gz
           - fastqs/random10k.r1.fq.gz
+          - fastqs/test_R2.fq.gz
+          - fastqs/random10k.r2.fq.gz
         read_two_fastq:
           - fastqs/test_R2.fq.gz
-          - fastqs/random10k.r2.fq.gz
\ No newline at end of file
+          - fastqs/random10k.r2.fq.gz
+          - null
+          - null
\ No newline at end of file
diff --git a/tools/test/fq.yaml b/tools/test/fq.yaml
index 9f5fc6095..c8c48084a 100644
--- a/tools/test/fq.yaml
+++ b/tools/test/fq.yaml
@@ -6,9 +6,13 @@ fqlint:
         read_one_fastq:
           - fastqs/test_R1.fq.gz
           - fastqs/random10k.r1.fq.gz
+          - fastqs/test_R2.fq.gz
+          - fastqs/random10k.r2.fq.gz
         read_two_fastq:
           - fastqs/test_R2.fq.gz
           - fastqs/random10k.r2.fq.gz
+          - null
+          - null
 subsample:
   - name: works
     inputs:
@@ -16,9 +20,13 @@ subsample:
         read_one_fastq:
           - fastqs/test_R1.fq.gz
           - fastqs/random10k.r1.fq.gz
+          - fastqs/test_R2.fq.gz
+          - fastqs/random10k.r2.fq.gz
         read_two_fastq:
           - fastqs/test_R2.fq.gz
           - fastqs/random10k.r2.fq.gz
+          - null
+          - null
       $sampling_controls:
         record_count:
           - 1000

From 108983536cfe8e6ab678eb226516fd78b0388dd6 Mon Sep 17 00:00:00 2001
From: Ari Frantz <ari.frantz@stjude.org>
Date: Tue, 27 Jan 2026 11:18:46 -0500
Subject: [PATCH 30/47] chore: don't specify defaults in two (disagreeing)
 places

---
 data_structures/read_group.wdl | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/data_structures/read_group.wdl b/data_structures/read_group.wdl
index 719fdb409..e0e75d715 100644
--- a/data_structures/read_group.wdl
+++ b/data_structures/read_group.wdl
@@ -83,7 +83,7 @@ workflow read_group_to_string {
 
     input {
         ReadGroup read_group
-        Array[String] required_fields = []
+        Array[String] required_fields = ["SM"]
         Boolean format_as_sam_record = false
         Boolean restrictive = true
     }
@@ -156,8 +156,8 @@ task validate_read_group {
 
     input {
         ReadGroup read_group
-        Array[String] required_fields = ["SM"]
-        Boolean restrictive = true
+        Array[String] required_fields
+        Boolean restrictive
     }
 
     # The SAM spec allows any printable ASCII character in header fields.
@@ -374,7 +374,7 @@ task inner_read_group_to_string {
 
     input {
         ReadGroup read_group
-        Boolean format_as_sam_record = false
+        Boolean format_as_sam_record
     }
 
     String delimiter = if format_as_sam_record then "\\t" else " "

From 5b690422c77bad542d0ab60b7ac7ca5e550e1bdb Mon Sep 17 00:00:00 2001
From: Ari Frantz <ari.frantz@stjude.org>
Date: Wed, 28 Jan 2026 12:46:35 -0500
Subject: [PATCH 31/47] add a few of (experimental) output assertions

---
 data_structures/test/read_group.yaml | 29 +++++-----
 tools/test/fastp.yaml                | 79 +++++++++++++++++++++++-----
 2 files changed, 79 insertions(+), 29 deletions(-)

diff --git a/data_structures/test/read_group.yaml b/data_structures/test/read_group.yaml
index b5257e9e0..12ef8e5ad 100644
--- a/data_structures/test/read_group.yaml
+++ b/data_structures/test/read_group.yaml
@@ -1,11 +1,11 @@
-validate_read_group:
+read_group_to_string:
   - name: valid_read_groups
     inputs:
       read_group:
-        - ID: id
-          SM: sample
-        - ID: R2
-          SM: sampleA
+        - ID: R1
+          SM: sampleFoo
+        - ID: R1
+          SM: sampleFoo
           LB: spaces are allowed in LB
           BC: barcode with a space
           PU: platform_unit
@@ -18,6 +18,11 @@ validate_read_group:
           PM: platform_model
           FO: ACMG
           KS: key_sequence
+    assertions:
+      outputs:
+        validated_read_group:
+          - Contains: R1
+          - Contains: sampleFoo
   - name: id_with_spaces
     inputs:
       read_group:
@@ -36,9 +41,7 @@ validate_read_group:
           FO: ACMG
           KS: key_sequence
     assertions:
-      exit_code: 1
-      stderr:
-        - ID must not contain spaces
+      should_fail: true
   - name: sample_with_spaces
     inputs:
       read_group:
@@ -57,9 +60,7 @@ validate_read_group:
           FO: ACMG
           KS: key_sequence
     assertions:
-      exit_code: 1
-      stderr:
-        - SM must not contain spaces
+      should_fail: true
   - name: spaces_allowed
     inputs:
       read_group:
@@ -73,9 +74,7 @@ validate_read_group:
         - ID: R123
           LB: library
     assertions:
-      exit_code: 1
-      stderr:
-        - SM is required
+      should_fail: true
   - name: missing_sample_allowed
     inputs:
       read_group:
@@ -92,5 +91,3 @@ get_read_groups:
         - bams/Aligned.sortedByCoord.chr9_chr22.bam
         - bams/test_rnaseq_variant.bam
         - bams/test.bam
-
-# TODO: test read_group_to_string
diff --git a/tools/test/fastp.yaml b/tools/test/fastp.yaml
index 272624876..56429b143 100644
--- a/tools/test/fastp.yaml
+++ b/tools/test/fastp.yaml
@@ -1,15 +1,68 @@
 # TODO: test advanced options
 fastp:
-  - name: works
-    inputs:
-      $files:
-        read_one_fastq:
-          - fastqs/test_R1.fq.gz
-          - fastqs/random10k.r1.fq.gz
-          - fastqs/test_R2.fq.gz
-          - fastqs/random10k.r2.fq.gz
-        read_two_fastq:
-          - fastqs/test_R2.fq.gz
-          - fastqs/random10k.r2.fq.gz
-          - null
-          - null
\ No newline at end of file
+  - name: SE_trimming
+    inputs:
+      read_one_fastq:
+        - fastqs/test_R1.fq.gz
+        - fastqs/random10k.r1.fq.gz
+        - fastqs/test_R2.fq.gz
+        - fastqs/random10k.r2.fq.gz
+    assertions:
+      outputs:
+        single_end_reads_fastq_gz:
+          - Defined: true
+        read_one_fastq_gz:
+          - Defined: false
+        read_two_fastq_gz:
+          - Defined: false
+  - name: PE_trimming
+    inputs:
+      read_one_fastq:
+        - fastqs/test_R1.fq.gz
+        - fastqs/random10k.r1.fq.gz
+      read_two_fastq:
+        - fastqs/test_R2.fq.gz
+        - fastqs/random10k.r2.fq.gz
+    assertions:
+      outputs:
+        single_end_reads_fastq_gz:
+          - Defined: false
+        read_one_fastq_gz:
+          - Defined: true
+        read_two_fastq_gz:
+          - Defined: true
+  - name: SE_qc
+    inputs:
+      read_one_fastq:
+        - fastqs/test_R1.fq.gz
+        - fastqs/random10k.r1.fq.gz
+        - fastqs/test_R2.fq.gz
+        - fastqs/random10k.r2.fq.gz
+      output_fastq:
+        - false
+    assertions:
+      outputs:
+        single_end_reads_fastq_gz:
+          - Defined: false
+        read_one_fastq_gz:
+          - Defined: false
+        read_two_fastq_gz:
+          - Defined: false
+  - name: PE_qc
+    inputs:
+      read_one_fastq:
+        - fastqs/test_R1.fq.gz
+        - fastqs/random10k.r1.fq.gz
+      read_two_fastq:
+        - fastqs/test_R2.fq.gz
+        - fastqs/random10k.r2.fq.gz
+      output_fastq:
+        - false
+    assertions:
+      outputs:
+        single_end_reads_fastq_gz:
+          - Defined: false
+        read_one_fastq_gz:
+          - Defined: false
+        read_two_fastq_gz:
+          - Defined: false
\ No newline at end of file

From b1f9d8fc2e58ff87263466b74dfed3487e2aafcc Mon Sep 17 00:00:00 2001
From: Ari Frantz <ari.frantz@stjude.org>
Date: Thu, 19 Mar 2026 14:27:24 -0400
Subject: [PATCH 32/47] sprocket format

---
 data_structures/flag_filter.wdl               |   9 +-
 data_structures/read_group.wdl                |  35 ++-
 tools/arriba.wdl                              |  75 +++++--
 tools/bwa.wdl                                 |  59 ++---
 tools/deeptools.wdl                           |   3 +-
 tools/fastp.wdl                               |  79 ++++---
 tools/fastqc.wdl                              |   1 -
 tools/fq.wdl                                  |  26 ++-
 tools/gatk4.wdl                               | 106 +++++----
 tools/htseq.wdl                               |  38 +++-
 tools/kraken2.wdl                             |  91 ++++----
 tools/librarian.wdl                           |   9 +-
 tools/md5sum.wdl                              |   3 +-
 tools/mosdepth.wdl                            |   6 +-
 tools/ngsderive.wdl                           |  33 ++-
 tools/picard.wdl                              |  51 +++--
 tools/qualimap.wdl                            |  23 +-
 tools/sambamba.wdl                            |  21 +-
 tools/samtools.wdl                            | 210 ++++++++++++------
 tools/star.wdl                                |  64 +++---
 tools/util.wdl                                |  34 +--
 workflows/chipseq/chipseq-standard.wdl        |  53 +++--
 workflows/dnaseq/dnaseq-core.wdl              |  47 ++--
 workflows/dnaseq/dnaseq-standard-fastq.wdl    |  15 +-
 workflows/dnaseq/dnaseq-standard.wdl          |  13 +-
 workflows/general/alignment-post.wdl          |  32 ++-
 workflows/general/bam-to-fastqs.wdl           |  22 +-
 workflows/general/samtools-merge.wdl          |  27 +--
 workflows/methylation/methylation-cohort.wdl  |  81 ++++---
 .../methylation/methylation-preprocess.wdl    |   6 +-
 .../methylation/methylation-standard.wdl      |   8 +-
 workflows/qc/markdups-post.wdl                |   6 +-
 workflows/qc/quality-check-standard.wdl       | 163 +++++++++-----
 workflows/reference/bwa-db-build.wdl          |   4 +-
 workflows/reference/gatk-reference.wdl        |  20 +-
 workflows/reference/qc-reference.wdl          |  25 ++-
 workflows/reference/star-db-build.wdl         |   6 +-
 workflows/rnaseq/rnaseq-core.wdl              |  36 ++-
 workflows/rnaseq/rnaseq-standard-fastq.wdl    |  16 +-
 workflows/rnaseq/rnaseq-standard.wdl          |   5 +-
 workflows/rnaseq/rnaseq-variant-calling.wdl   |  12 +-
 41 files changed, 934 insertions(+), 639 deletions(-)

diff --git a/data_structures/flag_filter.wdl b/data_structures/flag_filter.wdl
index 27d81a0be..263b7f456 100644
--- a/data_structures/flag_filter.wdl
+++ b/data_structures/flag_filter.wdl
@@ -58,7 +58,6 @@
 ## In short, those are all flags corresponding to the quality of the read
 ## and them being `true` may indicate that the read is of low quality and
 ## should be excluded.
-
 version 1.1
 
 struct FlagFilter {
@@ -127,15 +126,15 @@ workflow validate_flag_filter {
     }
 
     call validate_string_is_12bit_int as validate_include_if_any { input:
-        number = flags.include_if_any
+        number = flags.include_if_any,
     }
     call validate_string_is_12bit_int as validate_include_if_all { input:
-        number = flags.include_if_all
+        number = flags.include_if_all,
     }
     call validate_string_is_12bit_int as validate_exclude_if_any { input:
-        number = flags.exclude_if_any
+        number = flags.exclude_if_any,
     }
     call validate_string_is_12bit_int as validate_exclude_if_all { input:
-        number = flags.exclude_if_all
+        number = flags.exclude_if_all,
     }
 }
diff --git a/data_structures/read_group.wdl b/data_structures/read_group.wdl
index e0e75d715..df81982d6 100644
--- a/data_structures/read_group.wdl
+++ b/data_structures/read_group.wdl
@@ -40,7 +40,6 @@
 ##     }
 ## }
 ## ```
-
 version 1.1
 
 #@ except: SnakeCase
@@ -83,7 +82,9 @@ workflow read_group_to_string {
 
     input {
         ReadGroup read_group
-        Array[String] required_fields = ["SM"]
+        Array[String] required_fields = [
+            "SM",
+        ]
         Boolean format_as_sam_record = false
         Boolean restrictive = true
     }
@@ -99,8 +100,7 @@ workflow read_group_to_string {
     }
 
     output {
-        String validated_read_group
-            = inner_read_group_to_string.stringified_read_group
+        String validated_read_group = inner_read_group_to_string.stringified_read_group
     }
 }
 
@@ -109,7 +109,7 @@ task get_read_groups {
         description: "Gets read group information from a BAM file and writes it out as JSON which is converted to a WDL struct."
         warning: "This task will uppercase any lowercase `PL` values it finds, as is required by the [SAM specification](https://samtools.github.io/hts-specs/SAMv1.pdf)."
         outputs: {
-            read_groups: "An array of `ReadGroup` structs containing read group information."
+            read_groups: "An array of `ReadGroup` structs containing read group information.",
         }
     }
 
@@ -165,8 +165,18 @@ task validate_read_group {
     # We have the opinion that is too permissive for ID and SM.
     String restrictive_pattern = "\\ "  # Disallow spaces
     Array[String] platforms = [
-        "CAPILLARY", "DNBSEQ", "ELEMENT", "HELICOS", "ILLUMINA", "IONTORRENT", "LS454",
-        "ONT", "PACBIO", "SINGULAR", "SOLID", "ULTIMA",
+        "CAPILLARY",
+        "DNBSEQ",
+        "ELEMENT",
+        "HELICOS",
+        "ILLUMINA",
+        "IONTORRENT",
+        "LS454",
+        "ONT",
+        "PACBIO",
+        "SINGULAR",
+        "SOLID",
+        "ULTIMA",
     ]
 
     command <<<
@@ -262,7 +272,10 @@ task validate_read_group {
             fi
         fi
         if [ "$(echo "~{sep(" ", required_fields)}" | grep -Ewc "KS")" -eq 1 ]; then
-            if [ -z "~{if defined(read_group.KS) then read_group.KS else ""}" ]; then
+            if [ -z "~{if defined(read_group.KS)
+                then read_group.KS
+                else ""
+            }" ]; then
                 >&2 echo "KS is required"
                 exit_code=1
             fi
@@ -360,7 +373,7 @@ task inner_read_group_to_string {
         description: "Converts a `ReadGroup` struct to a `String` **without any validation**."
         warning: "Please use the `read_group_to_string` workflow, which has validation of the `ReadGroup` contents."
         outputs: {
-            stringified_read_group: "Input `ReadGroup` as a string"
+            stringified_read_group: "Input `ReadGroup` as a string",
         }
     }
 
@@ -377,7 +390,9 @@ task inner_read_group_to_string {
         Boolean format_as_sam_record
     }
 
-    String delimiter = if format_as_sam_record then "\\t" else " "
+    String delimiter = if format_as_sam_record
+        then "\\t"
+        else " "
 
     command <<<
         if ~{format_as_sam_record}; then
diff --git a/tools/arriba.wdl b/tools/arriba.wdl
index 8ea5c8e3c..84da082c0 100644
--- a/tools/arriba.wdl
+++ b/tools/arriba.wdl
@@ -1,5 +1,4 @@
 ## [Homepage](https://arriba.readthedocs.io/en/latest/)
-
 version 1.1
 
 task arriba {
@@ -138,14 +137,40 @@ task arriba {
         File? protein_domains
         File? wgs_svs
         Array[String] interesting_contigs = [
-            "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14",
-            "15", "16", "17", "18", "19", "20", "21", "22", "X", "Y", "AC_*", "NC_*",
+            "1",
+            "2",
+            "3",
+            "4",
+            "5",
+            "6",
+            "7",
+            "8",
+            "9",
+            "10",
+            "11",
+            "12",
+            "13",
+            "14",
+            "15",
+            "16",
+            "17",
+            "18",
+            "19",
+            "20",
+            "21",
+            "22",
+            "X",
+            "Y",
+            "AC_*",
+            "NC_*",
+        ]
+        Array[String] viral_contigs = [
+            "AC_*",
+            "NC_*",
         ]
-        Array[String] viral_contigs = ["AC_*", "NC_*"]
         Array[String] disable_filters = []
         #@ except: LineWidth
-        String feature_name
-            = "gene_name=gene_name|gene_id,gene_id=gene_id,transcript_id=transcript_id,feature_exon=exon,feature_CDS=CDS"
+        String feature_name = "gene_name=gene_name|gene_id,gene_id=gene_id,transcript_id=transcript_id,feature_exon=exon,feature_CDS=CDS"
         String prefix = basename(bam, ".bam") + ".fusions"
         String strandedness = "auto"
         Boolean mark_duplicates = true
@@ -176,10 +201,8 @@ task arriba {
     }
 
     Int bam_size_gb = ceil(size(bam, "GiB"))
-    Int disk_size_gb = bam_size_gb
-        + ceil(size(gtf, "GiB"))
-        + ceil(size(reference_fasta_gz, "GiB"))
-        + modify_disk_size_gb
+    Int disk_size_gb = bam_size_gb + ceil(size(gtf, "GiB")) + ceil(size(reference_fasta_gz,
+        "GiB")) + modify_disk_size_gb
     Int memory_gb = bam_size_gb + modify_memory_gb
 
     command <<<
@@ -198,18 +221,15 @@ task arriba {
             ~{"-d '" + wgs_svs + "'"} \
             -D ~{max_genomic_breakpoint_distance} \
             -s "~{strandedness}" \
-            ~{(
-                if length(interesting_contigs) > 0
+            ~{(if length(interesting_contigs) > 0
                 then "-i " + sep(",", quote(interesting_contigs))
                 else ""
             )} \
-            ~{(
-                if length(viral_contigs) > 0
+            ~{(if length(viral_contigs) > 0
                 then "-v " + sep(",", quote(viral_contigs))
                 else ""
             )} \
-            ~{(
-                if length(disable_filters) > 0
+            ~{(if length(disable_filters) > 0
                 then "-f " + sep(",", quote(disable_filters))
                 else ""
             )} \
@@ -232,9 +252,18 @@ task arriba {
             -l ~{max_itd_length} \
             -z ~{min_itd_allele_fraction} \
             -Z ~{min_itd_supporting_reads} \
-            ~{if mark_duplicates then "" else "-u"} \
-            ~{if report_additional_columns then "-X" else ""} \
-            ~{if fill_gaps then "-I" else ""}
+            ~{if mark_duplicates
+                then ""
+                else "-u"
+            } \
+            ~{if report_additional_columns
+                then "-X"
+                else ""
+            } \
+            ~{if fill_gaps
+                then "-I"
+                else ""
+            }
     >>>
 
     output {
@@ -255,7 +284,7 @@ task arriba_tsv_to_vcf {
     meta {
         description: "Convert Arriba TSV format fusions to VCF format."
         outputs: {
-            fusions_vcf: "Output file of fusions in VCF format"
+            fusions_vcf: "Output file of fusions in VCF format",
         }
     }
 
@@ -274,9 +303,7 @@ task arriba_tsv_to_vcf {
     }
 
     Int input_size_gb = ceil(size(fusions, "GiB"))
-    Int disk_size_gb = ceil(input_size_gb)
-        + (ceil(size(reference_fasta, "GiB")) * 3)
-        + modify_disk_size_gb
+    Int disk_size_gb = ceil(input_size_gb) + (ceil(size(reference_fasta, "GiB")) * 3) + modify_disk_size_gb
 
     command <<<
         set -euo pipefail
@@ -356,7 +383,7 @@ task arriba_annotate_exon_numbers {
     meta {
         description: "Annotate fusions with exon numbers."
         outputs: {
-            fusion_tsv: "TSV file with fusions annotated with exon numbers"
+            fusion_tsv: "TSV file with fusions annotated with exon numbers",
         }
     }
 
diff --git a/tools/bwa.wdl b/tools/bwa.wdl
index dbba3f2e7..3a7b3cb0d 100644
--- a/tools/bwa.wdl
+++ b/tools/bwa.wdl
@@ -1,12 +1,11 @@
 ## [Homepage](https://github.com/lh3/bwa)
-
 version 1.1
 
 task bwa_aln {
     meta {
         description: "Maps Single-End FASTQ files to BAM format using bwa aln"
         outputs: {
-            bam: "Aligned BAM format file"
+            bam: "Aligned BAM format file",
         }
     }
 
@@ -36,9 +35,7 @@ task bwa_aln {
         File fastq
         File bwa_db_tar_gz
         String read_group
-        String prefix = sub(
-            basename(fastq),
-            "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$",
+        String prefix = sub(basename(fastq), "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$",
             ""  # Once replacing with capturing groups is supported, replace with group 3
         )
         Boolean use_all_cores = false
@@ -50,8 +47,7 @@ task bwa_aln {
 
     Float input_fastq_size = size(fastq, "GiB")
     Float reference_size = size(bwa_db_tar_gz, "GiB")
-    Int disk_size_gb = (
-        ceil((input_fastq_size + reference_size) * 2) + 10 + modify_disk_size_gb
+    Int disk_size_gb = (ceil((input_fastq_size + reference_size) * 2) + 10 + modify_disk_size_gb
     )
 
     command <<<
@@ -98,7 +94,7 @@ task bwa_aln_pe {
     meta {
         description: "Maps Paired-End FASTQ files to BAM format using bwa aln"
         outputs: {
-            bam: "Aligned BAM format file"
+            bam: "Aligned BAM format file",
         }
     }
 
@@ -132,11 +128,8 @@ task bwa_aln_pe {
         File read_two_fastq_gz
         File bwa_db_tar_gz
         String read_group
-        String prefix = sub(
-            basename(read_one_fastq_gz),
-            "([_\\.][rR][12])?(\\.subsampled)?\\.(fastq|fq)(\\.gz)?$",
-            ""
-        )
+        String prefix = sub(basename(read_one_fastq_gz), "([_\\.][rR][12])?(\\.subsampled)?\\.(fastq|fq)(\\.gz)?$",
+            "")
         Boolean use_all_cores = false
         Int ncpu = 4
         Int modify_disk_size_gb = 0
@@ -144,12 +137,10 @@ task bwa_aln_pe {
 
     String output_bam = prefix + ".bam"
 
-    Float input_fastq_size = (
-        size(read_one_fastq_gz, "GiB") + size(read_two_fastq_gz, "GiB")
-    )
+    Float input_fastq_size = (size(read_one_fastq_gz, "GiB") + size(read_two_fastq_gz, "GiB"
+    ))
     Float reference_size = size(bwa_db_tar_gz, "GiB")
-    Int disk_size_gb = (
-        ceil((input_fastq_size + reference_size) * 2) + 5 + modify_disk_size_gb
+    Int disk_size_gb = (ceil((input_fastq_size + reference_size) * 2) + 5 + modify_disk_size_gb
     )
 
     command <<<
@@ -202,7 +193,7 @@ task bwa_mem {
     meta {
         description: "Maps FASTQ files to BAM format using bwa mem"
         outputs: {
-            bam: "Aligned BAM format file"
+            bam: "Aligned BAM format file",
         }
     }
 
@@ -230,11 +221,8 @@ task bwa_mem {
         File bwa_db_tar_gz
         String read_group
         File? read_two_fastq_gz
-        String prefix = sub(
-            basename(read_one_fastq_gz),
-            "([_\\.][rR][12])?(\\.subsampled)?\\.(fastq|fq)(\\.gz)?$",
-            ""
-        )
+        String prefix = sub(basename(read_one_fastq_gz), "([_\\.][rR][12])?(\\.subsampled)?\\.(fastq|fq)(\\.gz)?$",
+            "")
         Boolean use_all_cores = false
         Int ncpu = 4
         Int modify_disk_size_gb = 0
@@ -242,11 +230,10 @@ task bwa_mem {
 
     String output_bam = prefix + ".bam"
 
-    Float input_fastq_size = size(read_one_fastq_gz, "GiB")
-        + size(read_two_fastq_gz, "GiB")
+    Float input_fastq_size = size(read_one_fastq_gz, "GiB") + size(read_two_fastq_gz, "GiB"
+    )
     Float reference_size = size(bwa_db_tar_gz, "GiB")
-    Int disk_size_gb = (
-        ceil((input_fastq_size + reference_size) * 2) + 10 + modify_disk_size_gb
+    Int disk_size_gb = (ceil((input_fastq_size + reference_size) * 2) + 10 + modify_disk_size_gb
     )
 
     command <<<
@@ -271,9 +258,10 @@ task bwa_mem {
             -R "~{read_group}" \
             bwa_db/"$PREFIX" \
             "~{basename(read_one_fastq_gz)}" \
-            ~{(
-                if defined(read_two_fastq_gz)
-                then "'" + basename(select_first([read_two_fastq_gz])) + "'"
+            ~{(if defined(read_two_fastq_gz)
+                then "'" + basename(select_first([
+                    read_two_fastq_gz,
+                ])) + "'"
                 else ""
             )} \
             | samtools view --no-PG --threads "$samtools_cores" -hb - \
@@ -281,9 +269,10 @@ task bwa_mem {
 
         rm -r bwa_db
         rm "~{basename(read_one_fastq_gz)}"
-        ~{(
-            if defined(read_two_fastq_gz)
-            then "rm '" + basename(select_first([read_two_fastq_gz])) + "'"
+        ~{(if defined(read_two_fastq_gz)
+            then "rm '" + basename(select_first([
+                read_two_fastq_gz,
+            ])) + "'"
             else ""
         )}
     >>>
@@ -305,7 +294,7 @@ task build_bwa_db {
     meta {
         description: "Creates a BWA index and returns it as a compressed tar archive"
         outputs: {
-            bwa_db_tar_gz: "Tarballed bwa reference files"
+            bwa_db_tar_gz: "Tarballed bwa reference files",
         }
     }
 
diff --git a/tools/deeptools.wdl b/tools/deeptools.wdl
index 91b5e1b06..d30738abc 100755
--- a/tools/deeptools.wdl
+++ b/tools/deeptools.wdl
@@ -1,12 +1,11 @@
 ## [Homepage](https://deeptools.readthedocs.io/en/develop/index.html)
-
 version 1.1
 
 task bam_coverage {
     meta {
         description: "Generates a BigWig coverage track using bamCoverage from DeepTools"
         outputs: {
-            bigwig: "BigWig format coverage file"
+            bigwig: "BigWig format coverage file",
         }
     }
 
diff --git a/tools/fastp.wdl b/tools/fastp.wdl
index 42b78e34e..64d1ea545 100644
--- a/tools/fastp.wdl
+++ b/tools/fastp.wdl
@@ -95,9 +95,7 @@ task fastp {
     input {
         File read_one_fastq
         File? read_two_fastq
-        String prefix = sub(
-            basename(read_one_fastq),
-            "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$",
+        String prefix = sub(basename(read_one_fastq), "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$",
             ""  # Once replacing with capturing groups is supported, replace with group 3
         ) + ".trimmed"
         Boolean output_fastq = true
@@ -114,7 +112,9 @@ task fastp {
         Boolean phred64 = false
         Boolean use_all_cores = false
         Int first_n_reads = 0
-        Int duplicate_accuracy = if deduplicate then 3 else 1
+        Int duplicate_accuracy = if deduplicate
+            then 3
+            else 1
         Int n_base_limit = 5
         Int qualified_quality = 15
         Int unqualified_percent = 40
@@ -148,9 +148,9 @@ task fastp {
     Float input_size = size(read_one_fastq, "GB") + size(read_two_fastq, "GB")
     Int disk_size_gb = ceil(input_size) * 2 + 10 + modify_disk_size_gb
 
-    command <<< 
+    command <<<
         set -euo pipefail
-        
+
         n_cores=~{ncpu}
         if ~{use_all_cores}; then
             n_cores=$(nproc)
@@ -159,41 +159,71 @@ task fastp {
         fastp \
             -i "~{read_one_fastq}" \
             ~{"-I '" + read_two_fastq + "'"} \
-            ~{(
-                if output_fastq
-                then "-o '" + (
-                    if defined(read_two_fastq)
+            ~{(if output_fastq
+                then "-o '" + (if defined(read_two_fastq)
                     then "~{prefix}.R1.fastq.gz"
                     else "~{prefix}.fastq.gz"
                 ) + "'"
                 else ""
             )} \
-            ~{(
-                if (defined(read_two_fastq) && output_fastq)
+            ~{(if (defined(read_two_fastq) && output_fastq)
                 then "-O '" + prefix + ".R2.fastq.gz'"
                 else ""
             )} \
             --reads_to_process ~{first_n_reads} \
-            ~{if deduplicate then "--dedup" else ""} \
+            ~{if deduplicate
+                then "--dedup"
+                else ""
+            } \
             --dup_calc_accuracy ~{duplicate_accuracy} \
-            ~{if disable_duplicate_eval then "--dont_eval_duplication" else ""} \
-            ~{if phred64 then "--phred64" else ""} \
-            ~{if disable_quality_filter then "--disable_quality_filtering" else ""} \
+            ~{if disable_duplicate_eval
+                then "--dont_eval_duplication"
+                else ""
+            } \
+            ~{if phred64
+                then "--phred64"
+                else ""
+            } \
+            ~{if disable_quality_filter
+                then "--disable_quality_filtering"
+                else ""
+            } \
             -n ~{n_base_limit} \
             -q ~{qualified_quality} \
             -u ~{unqualified_percent} \
             -e ~{average_quality} \
-            ~{if disable_length_filter then "--disable_length_filtering" else ""} \
+            ~{if disable_length_filter
+                then "--disable_length_filtering"
+                else ""
+            } \
             -l ~{length_required} \
             --length_limit ~{length_limit} \
-            ~{if enable_complexity_filter then "-y" else ""} \
+            ~{if enable_complexity_filter
+                then "-y"
+                else ""
+            } \
             -Y ~{complexity_threshold} \
-            ~{if enable_overrepresentation_eval then "-p" else ""} \
+            ~{if enable_overrepresentation_eval
+                then "-p"
+                else ""
+            } \
             -P ~{overrepresentation_sampling} \
-            ~{if disable_adapter_trimming then "--disable_adapter_trimming" else ""} \
-            ~{if enable_pe_adapter_trimming then "-2" else ""} \
-            ~{if allow_gap_overlap_trimming then "--allow_gap_overlap_trimming" else ""} \
-            ~{if enable_base_correction then "-c" else ""} \
+            ~{if disable_adapter_trimming
+                then "--disable_adapter_trimming"
+                else ""
+            } \
+            ~{if enable_pe_adapter_trimming
+                then "-2"
+                else ""
+            } \
+            ~{if allow_gap_overlap_trimming
+                then "--allow_gap_overlap_trimming"
+                else ""
+            } \
+            ~{if enable_base_correction
+                then "-c"
+                else ""
+            } \
             --overlap_len_require ~{overlap_len_require} \
             --overlap_diff_limit ~{overlap_diff_limit} \
             --overlap_diff_percent_limit ~{overlap_diff_percent_limit} \
@@ -219,8 +249,7 @@ task fastp {
 
     runtime {
         cpu: ncpu
-        memory: (
-            if disable_duplicate_eval
+        memory: (if disable_duplicate_eval
             then "4 GB"
             else dup_acc_to_mem[duplicate_accuracy]
         )
diff --git a/tools/fastqc.wdl b/tools/fastqc.wdl
index cf6fe238c..06aee4034 100755
--- a/tools/fastqc.wdl
+++ b/tools/fastqc.wdl
@@ -1,5 +1,4 @@
 ## [Homepage](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)
-
 version 1.1
 
 task fastqc {
diff --git a/tools/fq.wdl b/tools/fq.wdl
index e5eab4763..b353597f2 100755
--- a/tools/fq.wdl
+++ b/tools/fq.wdl
@@ -1,5 +1,4 @@
 ## [Homepage](https://github.com/stjude-rust-labs/fq)
-
 version 1.1
 
 task fqlint {
@@ -67,9 +66,7 @@ task fqlint {
     Float read1_size = size(read_one_fastq, "GiB")
     Float read2_size = size(read_two_fastq, "GiB")
 
-    Int memory_gb = (
-        ceil((read1_size + read2_size) * 0.25) + 1 + modify_memory_gb
-    )
+    Int memory_gb = (ceil((read1_size + read2_size) * 0.25) + 1 + modify_memory_gb)
 
     Int disk_size_gb = ceil((read1_size + read2_size) * 2) + modify_disk_size_gb
 
@@ -78,7 +75,10 @@ task fqlint {
             ~{sep(" ", prefix("--disable-validator ", squote(disable_validator_codes)))} \
             --single-read-validation-level "~{single_read_validation_level}" \
             --paired-read-validation-level "~{paired_read_validation_level}" \
-            --lint-mode ~{if panic then "panic" else "log"} \
+            --lint-mode ~{if panic
+                then "panic"
+                else "log"
+            } \
             "~{read_one_fastq}" \
             ~{"'" + read_two_fastq + "'"}
     >>>
@@ -123,9 +123,7 @@ task subsample {
     input {
         File read_one_fastq
         File? read_two_fastq
-        String prefix = sub(
-            basename(read_one_fastq),
-            "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$",
+        String prefix = sub(basename(read_one_fastq), "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$",
             ""  # Once replacing with capturing groups is supported, replace with group 3
         )
         Float probability = 1.0
@@ -138,12 +136,13 @@ task subsample {
 
     Int disk_size_gb = ceil((read1_size + read2_size) * 2) + modify_disk_size_gb
 
-    String probability_arg = (
-        if (probability < 1.0 && probability > 0)
+    String probability_arg = (if (probability < 1.0 && probability > 0)
         then "-p ~{probability}"
         else ""
     )
-    String record_count_arg = if (record_count > 0) then "-n ~{record_count}" else ""
+    String record_count_arg = if (record_count > 0)
+        then "-n ~{record_count}"
+        else ""
 
     String r1_dst = prefix + ".R1.subsampled.fastq.gz"
     String r2_dst = prefix + ".R2.subsampled.fastq.gz"
@@ -154,7 +153,10 @@ task subsample {
             ~{probability_arg} \
             ~{record_count_arg} \
             --r1-dst "~{r1_dst}" \
-            ~{if defined(read_two_fastq) then "--r2-dst '" + r2_dst + "'" else ""} \
+            ~{if defined(read_two_fastq)
+                then "--r2-dst '" + r2_dst + "'"
+                else ""
+            } \
             "~{read_one_fastq}" \
             ~{"'" + read_two_fastq + "'"}
     >>>
diff --git a/tools/gatk4.wdl b/tools/gatk4.wdl
index fe0bac91f..e4a709e18 100644
--- a/tools/gatk4.wdl
+++ b/tools/gatk4.wdl
@@ -1,5 +1,4 @@
 ## [Homepage](https://software.broadinstitute.org/gatk)
-
 version 1.1
 
 task split_n_cigar_reads {
@@ -13,7 +12,7 @@ task split_n_cigar_reads {
         }
     }
 
-    parameter_meta  {
+    parameter_meta {
         bam: "Input BAM format file to with unsplit reads containing Ns in their CIGAR strings."
         bam_index: "BAM index file corresponding to the input BAM"
         fasta: "Reference genome in FASTA format. Must be uncompressed."
@@ -37,23 +36,21 @@ task split_n_cigar_reads {
         Int ncpu = 8
     }
 
-    Int disk_size_gb = ceil(size(bam, "GB") + 1) * 3
-        + ceil(size(fasta, "GB"))
-        + modify_disk_size_gb
+    Int disk_size_gb = ceil(size(bam, "GB") + 1) * 3 + ceil(size(fasta, "GB")) + modify_disk_size_gb
     Int java_heap_size = ceil(memory_gb * 0.9)
 
     command <<<
-        set -euo pipefail
-
-        gatk \
-            --java-options "-Xms4000m -Xmx~{java_heap_size}g" \
-            SplitNCigarReads \
-            -R "~{fasta}" \
-            -I "~{bam}" \
-            -O "~{prefix}.bam" \
-            -OBM true
-       # GATK is unreasonable and uses the plain ".bai" suffix.
-       mv "~{prefix}.bai" "~{prefix}.bam.bai"
+         set -euo pipefail
+
+         gatk \
+             --java-options "-Xms4000m -Xmx~{java_heap_size}g" \
+             SplitNCigarReads \
+             -R "~{fasta}" \
+             -I "~{bam}" \
+             -O "~{prefix}.bam" \
+             -OBM true
+        # GATK is unreasonable and uses the plain ".bai" suffix.
+        mv "~{prefix}.bai" "~{prefix}.bam.bai"
     >>>
 
     output {
@@ -76,11 +73,11 @@ task base_recalibrator {
         description: "Generates recalibration report for base quality score recalibration."
         external_help: "https://gatk.broadinstitute.org/hc/en-us/articles/360036897372-BaseRecalibratorSpark-BETA"
         outputs: {
-            recalibration_report: "Recalibration report file"
+            recalibration_report: "Recalibration report file",
         }
     }
 
-    parameter_meta  {
+    parameter_meta {
         bam: "Input BAM format file on which to recabilbrate base quality scores"
         bam_index: "BAM index file corresponding to the input BAM"
         fasta: "Reference genome in FASTA format"
@@ -114,23 +111,22 @@ task base_recalibrator {
         Int memory_gb = 25
         Int modify_disk_size_gb = 0
         Int ncpu = 4
-        }
+    }
 
-    Int disk_size_gb = ceil(size(bam, "GB") + 1) * 3
-        + ceil(size(fasta, "GB"))
-        + modify_disk_size_gb
+    Int disk_size_gb = ceil(size(bam, "GB") + 1) * 3 + ceil(size(fasta, "GB")) + modify_disk_size_gb
     Int java_heap_size = ceil(memory_gb * 0.9)
 
     #@ except: LineWidth
     command <<<
         # shellcheck disable=SC2102
         gatk \
-            --java-options "-XX:GCTimeLimit=50 -XX:GCHeapFreeLimit=10 -Xms4000m -Xmx~{java_heap_size}g" \
+            --java-options "-XX:GCTimeLimit=50 -XX:GCHeapFreeLimit=10 -Xms4000m -Xmx~{
+                java_heap_size
+            }g" \
             BaseRecalibratorSpark \
             -R "~{fasta}" \
             -I "~{bam}" \
-            ~{(
-                if use_original_quality_scores
+            ~{(if use_original_quality_scores
                 then "--use-original-qualities"
                 else ""
             )} \
@@ -163,7 +159,7 @@ task apply_bqsr {
         }
     }
 
-    parameter_meta  {
+    parameter_meta {
         bam: "Input BAM format file on which to apply base quality score recalibration"
         bam_index: "BAM index file corresponding to the input BAM"
         recalibration_report: "Recalibration report file"
@@ -194,11 +190,16 @@ task apply_bqsr {
 
         # shellcheck disable=SC2102
         gatk \
-            --java-options "-XX:GCTimeLimit=50 -XX:GCHeapFreeLimit=10 -Xms3000m -Xmx~{java_heap_size}g" \
+            --java-options "-XX:GCTimeLimit=50 -XX:GCHeapFreeLimit=10 -Xms3000m -Xmx~{
+                java_heap_size
+            }g" \
             ApplyBQSRSpark \
             --spark-master local[~{ncpu}] \
             -I "~{bam}" \
-            ~{if use_original_quality_scores then "--use-original-qualities" else "" } \
+            ~{if use_original_quality_scores
+                then "--use-original-qualities"
+                else ""
+            } \
             -O "~{prefix}.bqsr.bam" \
             --bqsr-recal-file "~{recalibration_report}"
     >>>
@@ -227,7 +228,7 @@ task haplotype_caller {
         }
     }
 
-    parameter_meta  {
+    parameter_meta {
         bam: "Input BAM format file on which to call variants"
         bam_index: "BAM index file corresponding to the input BAM"
         interval_list: {
@@ -269,10 +270,7 @@ task haplotype_caller {
         Int ncpu = 4
     }
 
-    Int disk_size_gb = ceil(size(bam, "GB") * 2)
-        + 30
-        + ceil(size(fasta, "GB"))
-        + modify_disk_size_gb
+    Int disk_size_gb = ceil(size(bam, "GB") * 2) + 30 + ceil(size(fasta, "GB")) + modify_disk_size_gb
     Int java_heap_size = ceil(memory_gb * 0.9)
 
     #@ except: LineWidth
@@ -284,7 +282,10 @@ task haplotype_caller {
             -I "~{bam}" \
             -L "~{interval_list}" \
             -O "~{prefix}.vcf.gz" \
-            ~{if use_soft_clipped_bases then "" else "--dont-use-soft-clipped-bases"} \
+            ~{if use_soft_clipped_bases
+                then ""
+                else "--dont-use-soft-clipped-bases"
+            } \
             --standard-min-confidence-threshold-for-calling ~{stand_call_conf} \
             --dbsnp "~{dbSNP_vcf}"
     >>>
@@ -313,7 +314,7 @@ task variant_filtration {
         }
     }
 
-    parameter_meta  {
+    parameter_meta {
         vcf: "Input VCF format file to filter"
         vcf_index: "VCF index file corresponding to the input VCF"
         fasta: "Reference genome in FASTA format"
@@ -340,8 +341,14 @@ task variant_filtration {
         File fasta
         File fasta_index
         File dict
-        Array[String] filter_names = ["FS", "QD"]
-        Array[String] filter_expressions = ["FS > 30.0", "QD < 2.0"]
+        Array[String] filter_names = [
+            "FS",
+            "QD",
+        ]
+        Array[String] filter_expressions = [
+            "FS > 30.0",
+            "QD < 2.0",
+        ]
         String prefix = basename(vcf, ".vcf.gz")
         Int cluster = 3
         Int window = 35
@@ -377,7 +384,7 @@ task variant_filtration {
 }
 
 task mark_duplicates_spark {
-     meta {
+    meta {
         description: "Marks duplicate reads in the input BAM file using GATK's Spark implementation of Picard's MarkDuplicates."
         external_help: "https://gatk.broadinstitute.org/hc/en-us/articles/13832682540699-MarkDuplicatesSpark"
         outputs: {
@@ -427,7 +434,7 @@ task mark_duplicates_spark {
             group: "Common",
         }
         optical_distance: {
-            description:  "Maximum distance between read coordinates to consider them optical duplicates. If `0`, then optical duplicate marking is disabled.",
+            description: "Maximum distance between read coordinates to consider them optical duplicates. If `0`, then optical duplicate marking is disabled.",
             help: "Suggested settings of 100 for unpatterned versions of the Illumina platform (e.g. HiSeq) or 2500 for patterned flowcell models (e.g. NovaSeq). Calculation of distance depends on coordinate data embedded in the read names, typically produced by the Illumina sequencing machines.",
             warning: "Optical duplicate detection will not work on non-standard names without modifying `read_name_regex`.",
         }
@@ -452,13 +459,10 @@ task mark_duplicates_spark {
 
     Float bam_size = size(bam, "GiB")
     Int memory_gb = min(ceil(bam_size + 15), 50) + modify_memory_gb
-    Int disk_size_gb = (
-        (
-            if create_bam
-            then ceil((bam_size * 2) + 10)
-            else ceil(bam_size + 10)
-        ) + modify_disk_size_gb
-    )
+    Int disk_size_gb = ((if create_bam
+        then ceil((bam_size * 2) + 10)
+        else ceil(bam_size + 10)
+    ) + modify_disk_size_gb)
 
     Int java_heap_size = ceil(memory_gb * 0.9)
 
@@ -470,12 +474,16 @@ task mark_duplicates_spark {
             --java-options "-Xmx~{java_heap_size}g" \
             -I "~{bam}" \
             -M "~{prefix}.metrics.txt" \
-            -O "~{if create_bam then prefix + ".bam" else "/dev/null"}" \
+            -O "~{if create_bam
+                then prefix + ".bam"
+                else "/dev/null"
+            }" \
             --create-output-bam-index ~{create_bam} \
             --read-validation-stringency "~{validation_stringency}" \
             --duplicate-scoring-strategy "~{duplicate_scoring_strategy}" \
-            --read-name-regex '~{
-                if (optical_distance > 0) then read_name_regex else "null"
+            --read-name-regex '~{if (optical_distance > 0)
+                then read_name_regex
+                else "null"
             }' \
             --duplicate-tagging-policy "~{tagging_policy}" \
             --optical-duplicate-pixel-distance ~{optical_distance} \
diff --git a/tools/htseq.wdl b/tools/htseq.wdl
index bcaca9ce0..cb8b16014 100755
--- a/tools/htseq.wdl
+++ b/tools/htseq.wdl
@@ -1,5 +1,4 @@
 ## [Homepage](https://github.com/htseq/htseq)
-
 version 1.1
 
 task count {
@@ -9,7 +8,7 @@ task count {
             feature_counts: {
                 description: "A two column TSV file. First column is feature names and second column is counts.",
                 help: "Presence of a header is determined by the `include_custom_header` parameter.",
-            }
+            },
         }
     }
 
@@ -89,10 +88,14 @@ task count {
     Float bam_size = size(bam, "GiB")
     Float gtf_size = size(gtf, "GiB")
 
-    Int memory_gb = (if pos_sorted then ceil(bam_size) + 4 else 4) + modify_memory_gb
+    Int memory_gb = (if pos_sorted
+        then ceil(bam_size) + 4
+        else 4
+    ) + modify_memory_gb
 
-    Int disk_size_gb = ceil(
-        (bam_size + gtf_size) * if pos_sorted then 4 else 1
+    Int disk_size_gb = ceil((bam_size + gtf_size) * if pos_sorted
+        then 4
+        else 1
     ) + 10 + modify_disk_size_gb
 
     command <<<
@@ -105,16 +108,24 @@ task count {
         # 9223372036854776000 == max 64 bit Float
         htseq-count -f bam \
             --max-reads-in-buffer 9223372036854776000 \
-            -r ~{if pos_sorted then "pos" else "name"} \
+            -r ~{if pos_sorted
+                then "pos"
+                else "name"
+            } \
             -s "~{strandedness}" \
             -a ~{minaqual} \
             -t "~{feature_type}" \
             -m "~{mode}" \
             -i "~{idattr}" \
-            --nonunique ~{if nonunique then "all" else "none"} \
-            --secondary-alignments ~{if secondary_alignments then "score" else "ignore"} \
-            --supplementary-alignments ~{(
-                if supplementary_alignments
+            --nonunique ~{if nonunique
+                then "all"
+                else "none"
+            } \
+            --secondary-alignments ~{if secondary_alignments
+                then "score"
+                else "ignore"
+            } \
+            --supplementary-alignments ~{(if supplementary_alignments
                 then "score"
                 else "ignore"
             )} \
@@ -139,7 +150,7 @@ task calc_tpm {
     meta {
         description: "Given a feature counts file and a feature lengths file, calculate Transcripts Per Million (TPM)"
         outputs: {
-            tpm_file: "Transcripts Per Million (TPM) file. A two column headered TSV file."
+            tpm_file: "Transcripts Per Million (TPM) file. A two column headered TSV file.",
         }
     }
 
@@ -171,7 +182,10 @@ task calc_tpm {
             "~{counts}" \
             "~{feature_lengths}" \
             "~{outfile_name}" \
-            ~{if has_header then "--counts_has_header" else ""}
+            ~{if has_header
+                then "--counts_has_header"
+                else ""
+            }
     >>>
 
     output {
diff --git a/tools/kraken2.wdl b/tools/kraken2.wdl
index b501ffa48..5d2081918 100644
--- a/tools/kraken2.wdl
+++ b/tools/kraken2.wdl
@@ -1,5 +1,4 @@
 ## [Homepage](https://github.com/DerrickWood/kraken2)
-
 version 1.1
 
 task download_taxonomy {
@@ -9,7 +8,7 @@ task download_taxonomy {
             taxonomy: {
                 description: "The NCBI taxonomy, which is needed by the `build_db` task.",
                 warning: "This output is not human-readable or meant for anything other than building a Kraken2 database.",
-            }
+            },
         }
     }
 
@@ -27,7 +26,10 @@ task download_taxonomy {
         set -euo pipefail
 
         kraken2-build --download-taxonomy \
-            ~{if protein then "--protein" else ""} \
+            ~{if protein
+                then "--protein"
+                else ""
+            } \
             --use-ftp \
             --db "~{db_name}" 2>&1 \
             | awk '/gunzip:/ { print; exit 42 } !/gunzip:/ { print }' 1>&2
@@ -58,7 +60,7 @@ task download_library {
             library: {
                 description: "A library of reference genomes, which is needed by the `build_db` task.",
                 warning: "This output is not human-readable or meant for anything other than building a Kraken2 database.",
-            }
+            },
         }
     }
 
@@ -97,21 +99,24 @@ task download_library {
     String db_name = "kraken2_" + library_name + "_library"
 
     #@ except: ExpressionSpacing
-    Int disk_size_gb = (
-        (
-            if library_name == "bacteria" then 300
-            else if library_name == "nr" then 600
-            else if library_name == "nt" then 2500
-            else 25
-        ) + modify_disk_size_gb
-    )
+    Int disk_size_gb = ((if library_name == "bacteria"
+        then 300
+        else if library_name == "nr"
+        then 600
+        else if library_name == "nt"
+        then 2500
+        else 25
+    ) + modify_disk_size_gb)
 
     command <<<
         set -euo pipefail
 
         kraken2-build --download-library \
             "~{library_name}" \
-            ~{if protein then "--protein" else ""} \
+            ~{if protein
+                then "--protein"
+                else ""
+            } \
             --use-ftp \
             --db "~{db_name}" 2>&1 \
             | awk '/gunzip:/ { print; exit 42 } !/gunzip:/ { print }' 1>&2
@@ -140,7 +145,7 @@ task create_library_from_fastas {
             custom_library: {
                 description: "Kraken2 compatible library, which is needed by the `build_db` task.",
                 warning: "This output is not human-readable or meant for anything other than building a Kraken2 database.",
-            }
+            },
         }
     }
 
@@ -172,7 +177,10 @@ task create_library_from_fastas {
         while read -r fasta; do
             gunzip -c "$fasta" > tmp.fa
             kraken2-build \
-                ~{if protein then "--protein" else ""} \
+                ~{if protein
+                    then "--protein"
+                    else ""
+                } \
                 --add-to-library tmp.fa \
                 --db "~{db_name}"
         done < fastas.txt
@@ -200,7 +208,7 @@ task build_db {
     meta {
         description: "Builds a custom Kraken2 database"
         outputs: {
-            built_db: "A complete Kraken2 database"
+            built_db: "A complete Kraken2 database",
         }
     }
 
@@ -238,9 +246,15 @@ task build_db {
         String db_name = "kraken2_db"
         Boolean protein = false
         Boolean use_all_cores = false
-        Int kmer_len = if protein then 15 else 35
-        Int minimizer_len = if protein then 12 else 31
-        Int minimizer_spaces = if protein then 0 else 7
+        Int kmer_len = if protein
+            then 15
+            else 35
+        Int minimizer_len = if protein
+            then 12
+            else 31
+        Int minimizer_spaces = if protein
+            then 0
+            else 7
         Int max_db_size_gb = -1
         Int ncpu = 4
         Int modify_memory_gb = 0
@@ -249,13 +263,10 @@ task build_db {
 
     Float tarballs_size = size(tarballs, "GiB")
     Int disk_size_gb = ceil(tarballs_size * 6) + 10 + modify_disk_size_gb
-    Int memory_gb = (
-        (
-            if (max_db_size_gb > 0)
-            then ceil(max_db_size_gb * 1.2)
-            else ceil(tarballs_size * 2)
-        ) + modify_memory_gb
-    )
+    Int memory_gb = ((if (max_db_size_gb > 0)
+        then ceil(max_db_size_gb * 1.2)
+        else ceil(tarballs_size * 2)
+    ) + modify_memory_gb)
 
     String max_db_size_bytes = "~{max_db_size_gb}000000000"
 
@@ -277,12 +288,14 @@ task build_db {
 
         >&2 echo "*** start DB build ***"
         kraken2-build --build \
-            ~{if protein then "--protein" else ""} \
+            ~{if protein
+                then "--protein"
+                else ""
+            } \
             --kmer-len ~{kmer_len} \
             --minimizer-len ~{minimizer_len} \
             --minimizer-spaces ~{minimizer_spaces} \
-            ~{(
-                if (max_db_size_gb > 0)
+            ~{(if (max_db_size_gb > 0)
                 then "--max-db-size '" + max_db_size_bytes + "'"
                 else ""
             )} \
@@ -359,9 +372,7 @@ task kraken {
         File read_two_fastq_gz
         #@ except: InputName
         File db
-        String prefix = sub(
-            basename(read_one_fastq_gz),
-            "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$",
+        String prefix = sub(basename(read_one_fastq_gz), "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$",
             ""  # Once replacing with capturing groups is supported, replace with group 3
         )
         Boolean store_sequences = false
@@ -376,11 +387,9 @@ task kraken {
     Float db_size = size(db, "GiB")
     Float read1_size = size(read_one_fastq_gz, "GiB")
     Float read2_size = size(read_two_fastq_gz, "GiB")
-    Int disk_size_gb_calculation = (
-        ceil((db_size * 2) + read1_size + read2_size) + 10 + modify_disk_size_gb
+    Int disk_size_gb_calculation = (ceil((db_size * 2) + read1_size + read2_size) + 10 + modify_disk_size_gb
     )
-    Int disk_size_gb = (
-        if store_sequences
+    Int disk_size_gb = (if store_sequences
         then disk_size_gb_calculation + ceil(read1_size + read2_size)
         else disk_size_gb_calculation
     )
@@ -403,12 +412,18 @@ task kraken {
 
         kraken2 --db kraken2_db/ \
             --paired \
-            --output ~{if store_sequences then "'" + out_sequences + "'" else "-"} \
+            --output ~{if store_sequences
+                then "'" + out_sequences + "'"
+                else "-"
+            } \
             --threads "$n_cores" \
             --minimum-base-quality ~{min_base_quality} \
             --report "~{out_report}" \
             --report-zero-counts \
-            ~{if use_names then "--use-names" else ""} \
+            ~{if use_names
+                then "--use-names"
+                else ""
+            } \
             "~{read_one_fastq_gz}" \
             "~{read_two_fastq_gz}"
 
diff --git a/tools/librarian.wdl b/tools/librarian.wdl
index faa136618..88b2fd078 100644
--- a/tools/librarian.wdl
+++ b/tools/librarian.wdl
@@ -1,5 +1,4 @@
 ## # librarian
-
 version 1.1
 
 task librarian {
@@ -24,18 +23,14 @@ task librarian {
 
     input {
         File read_one_fastq
-        String prefix = sub(
-            basename(read_one_fastq),
-            "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$",
+        String prefix = sub(basename(read_one_fastq), "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$",
             ""  # Once replacing with capturing groups is supported, replace with group 3
         ) + ".librarian"
         Int modify_disk_size_gb = 0
     }
 
     Float read1_size = size(read_one_fastq, "GiB")
-    Int disk_size_gb = (
-        ceil(read1_size) + 10 + modify_disk_size_gb
-    )
+    Int disk_size_gb = (ceil(read1_size) + 10 + modify_disk_size_gb)
 
     command <<<
         set -euo pipefail
diff --git a/tools/md5sum.wdl b/tools/md5sum.wdl
index e967e55c3..1e79260a0 100755
--- a/tools/md5sum.wdl
+++ b/tools/md5sum.wdl
@@ -1,12 +1,11 @@
 ## [Homepage](https://github.com/coreutils/coreutils)
-
 version 1.1
 
 task compute_checksum {
     meta {
         description: "Generates an MD5 checksum for the input file"
         outputs: {
-            md5sum: "STDOUT of the `md5sum` command that has been redirected to a file"
+            md5sum: "STDOUT of the `md5sum` command that has been redirected to a file",
         }
     }
 
diff --git a/tools/mosdepth.wdl b/tools/mosdepth.wdl
index 69b81d1ac..fdf4775c9 100644
--- a/tools/mosdepth.wdl
+++ b/tools/mosdepth.wdl
@@ -1,5 +1,4 @@
 ## [Homepage](https://github.com/brentp/mosdepth)
-
 version 1.1
 
 task coverage {
@@ -53,7 +52,10 @@ task coverage {
             -n \
             ~{"-b '" + coverage_bed + "'"} \
             -Q ~{min_mapping_quality} \
-            ~{if (use_fast_mode) then "-x" else ""} \
+            ~{if (use_fast_mode)
+                then "-x"
+                else ""
+            } \
             "~{prefix}" \
             "$CWD_BAM"
 
diff --git a/tools/ngsderive.wdl b/tools/ngsderive.wdl
index 3abaac343..72cb92ced 100644
--- a/tools/ngsderive.wdl
+++ b/tools/ngsderive.wdl
@@ -1,5 +1,4 @@
 ## [Homepage](https://github.com/stjudecloud/ngsderive)
-
 version 1.1
 
 task strandedness {
@@ -61,7 +60,10 @@ task strandedness {
         ln -s "~{gene_model}" "$CWD_GFF"
 
         ngsderive strandedness --verbose \
-            ~{if split_by_rg then "--split-by-rg" else ""} \
+            ~{if split_by_rg
+                then "--split-by-rg"
+                else ""
+            } \
             -m ~{min_reads_per_gene} \
             -n ~{num_genes} \
             -q ~{min_mapq} \
@@ -398,21 +400,30 @@ task endedness {
     }
 
     Float bam_size = size(bam, "GiB")
-    Int memory_gb = (
-        if calc_rpt
-        then (
-            ceil(bam_size * 2.5) + 4 + modify_memory_gb
-        )
+    Int memory_gb = (if calc_rpt
+        then (ceil(bam_size * 2.5) + 4 + modify_memory_gb)
         else 4
     )
     Int disk_size_gb = ceil(bam_size) + 10 + modify_disk_size_gb
 
     command <<<
         ngsderive endedness --verbose \
-            ~{if lenient then "--lenient" else ""} \
-            ~{if calc_rpt then "-r" else ""} \
-            ~{if round_rpt then "--round-rpt" else ""} \
-            ~{if split_by_rg then "--split-by-rg" else ""} \
+            ~{if lenient
+                then "--lenient"
+                else ""
+            } \
+            ~{if calc_rpt
+                then "-r"
+                else ""
+            } \
+            ~{if round_rpt
+                then "--round-rpt"
+                else ""
+            } \
+            ~{if split_by_rg
+                then "--split-by-rg"
+                else ""
+            } \
             --paired-deviance ~{paired_deviance} \
             -n ~{num_reads} \
             "~{bam}" \
diff --git a/tools/picard.wdl b/tools/picard.wdl
index 8f35947d1..3c8f7d4ce 100755
--- a/tools/picard.wdl
+++ b/tools/picard.wdl
@@ -1,5 +1,4 @@
 ## [Homepage](https://broadinstitute.github.io/picard/)
-
 version 1.1
 
 task mark_duplicates {
@@ -84,13 +83,10 @@ task mark_duplicates {
 
     Float bam_size = size(bam, "GiB")
     Int memory_gb = min(ceil(bam_size + 12), 50) + modify_memory_gb
-    Int disk_size_gb = (
-        (
-            if create_bam
-            then ceil((bam_size * 2) + 10)
-            else ceil(bam_size + 10)
-        ) + modify_disk_size_gb
-    )
+    Int disk_size_gb = ((if create_bam
+        then ceil((bam_size * 2) + 10)
+        else ceil(bam_size + 10)
+    ) + modify_disk_size_gb)
 
     Int java_heap_size = ceil(memory_gb * 0.9)
 
@@ -100,13 +96,17 @@ task mark_duplicates {
         picard -Xmx~{java_heap_size}g MarkDuplicates \
             -I "~{bam}" \
             --METRICS_FILE "~{prefix}.metrics.txt" \
-            -O "~{if create_bam then prefix + ".bam" else "/dev/null"}" \
+            -O "~{if create_bam
+                then prefix + ".bam"
+                else "/dev/null"
+            }" \
             --CREATE_INDEX ~{create_bam} \
             --CREATE_MD5_FILE ~{create_bam} \
             --VALIDATION_STRINGENCY "~{validation_stringency}" \
             --DUPLICATE_SCORING_STRATEGY "~{duplicate_scoring_strategy}" \
-            --READ_NAME_REGEX '~{
-                if (optical_distance > 0) then read_name_regex else "null"
+            --READ_NAME_REGEX '~{if (optical_distance > 0)
+                then read_name_regex
+                else "null"
             }' \
             --TAGGING_POLICY "~{tagging_policy}" \
             --CLEAR_DT ~{clear_dt} \
@@ -194,9 +194,10 @@ task validate_bam {
         Int modify_disk_size_gb = 0
     }
 
-    String mode_arg = if (summary_mode) then "--MODE SUMMARY" else ""
-    String stringency_arg = (
-        if (index_validation_stringency_less_exhaustive)
+    String mode_arg = if (summary_mode)
+        then "--MODE SUMMARY"
+        else ""
+    String stringency_arg = (if (index_validation_stringency_less_exhaustive)
         then "--INDEX_VALIDATION_STRINGENCY LESS_EXHAUSTIVE"
         else ""
     )
@@ -418,8 +419,10 @@ task merge_sam_files {
         File merged_bam_md5 = outfile_name + ".md5"
     }
 
-    runtime{
-        cpu: if threading then 2 else 1
+    runtime {
+        cpu: if threading
+            then 2
+            else 1
         memory: "~{memory_gb} GB"
         disks: "~{disk_size_gb} GB"
         container: "quay.io/biocontainers/picard:3.1.1--hdfd78af_0"
@@ -504,7 +507,7 @@ task collect_wgs_metrics {
             wgs_metrics: {
                 description: "Output report of `picard CollectWgsMetrics`",
                 external_help: "https://broadinstitute.github.io/picard/picard-metric-definitions.html#CollectWgsMetrics.WgsMetrics",
-            }
+            },
         }
     }
 
@@ -851,8 +854,7 @@ task bam_to_fastq {
 
         picard -Xmx~{java_heap_size}g SamToFastq INPUT="~{bam}" \
             FASTQ="~{prefix}.R1.fastq" \
-            ~{(
-                if paired
+            ~{(if paired
                 then "SECOND_END_FASTQ='" + prefix + ".R2.fastq'"
                 else ""
             )} \
@@ -860,7 +862,10 @@ task bam_to_fastq {
             VALIDATION_STRINGENCY=SILENT
 
         gzip "~{prefix}.R1.fastq" \
-            ~{if paired then "'" + prefix + ".R2.fastq'" else ""}
+            ~{if paired
+                then "'" + prefix + ".R2.fastq'"
+                else ""
+            }
     >>>
 
     output {
@@ -868,7 +873,7 @@ task bam_to_fastq {
         File? read_two_fastq_gz = "~{prefix}.R2.fastq.gz"
     }
 
-    runtime{
+    runtime {
         memory: "~{memory_gb} GB"
         disks: "~{disk_size_gb} GB"
         container: "quay.io/biocontainers/picard:3.1.1--hdfd78af_0"
@@ -932,7 +937,7 @@ task scatter_interval_list {
         }
     }
 
-    parameter_meta  {
+    parameter_meta {
         interval_list: "Input interval list to split"
         scatter_count: "Number of interval lists to create"
         subdivision_mode: {
@@ -999,7 +1004,7 @@ task create_sequence_dictionary {
         description: "Creates a sequence dictionary for the input FASTA file using Picard"
         external_help: "https://gatk.broadinstitute.org/hc/en-us/articles/13832748622491-CreateSequenceDictionary-Picard-"
         outputs: {
-            dictionary: "Sequence dictionary produced by `picard CreateSequenceDictionary`."
+            dictionary: "Sequence dictionary produced by `picard CreateSequenceDictionary`.",
         }
     }
 
diff --git a/tools/qualimap.wdl b/tools/qualimap.wdl
index 67cd89d14..4aec8447d 100755
--- a/tools/qualimap.wdl
+++ b/tools/qualimap.wdl
@@ -1,5 +1,4 @@
 ## [Homepage](http://qualimap.bioinfo.cipf.es/)
-
 version 1.1
 
 task rnaseq {
@@ -41,8 +40,12 @@ task rnaseq {
     }
 
     String out_tar_gz = prefix + ".tar.gz"
-    String name_sorted_arg = if (name_sorted) then "-s" else ""
-    String paired_end_arg = if (paired_end) then "-pe" else ""
+    String name_sorted_arg = if (name_sorted)
+        then "-s"
+        else ""
+    String paired_end_arg = if (paired_end)
+        then "-pe"
+        else ""
 
     Int java_heap_size = ceil(memory_gb * 0.9)
     Float bam_size = size(bam, "GiB")
@@ -50,13 +53,10 @@ task rnaseq {
 
     # Qualimap has an inefficient name sorting algorithm and will
     # use an excessive amount of storage.
-    Int disk_size_gb = (
-        (
-            if name_sorted
-            then ceil(bam_size + gtf_size + 15)
-            else ceil(((bam_size + gtf_size) * 12) + 10)
-        ) + modify_disk_size_gb
-    )
+    Int disk_size_gb = ((if name_sorted
+        then ceil(bam_size + gtf_size + 15)
+        else ceil(((bam_size + gtf_size) * 12) + 10)
+    ) + modify_disk_size_gb)
 
     command <<<
         set -euo pipefail
@@ -81,8 +81,7 @@ task rnaseq {
 
     output {
         File raw_summary = "~{prefix}/rnaseq_qc_results.txt"
-        File raw_coverage
-            = "~{prefix}/raw_data_qualimapReport/coverage_profile_along_genes_(total).txt"
+        File raw_coverage = "~{prefix}/raw_data_qualimapReport/coverage_profile_along_genes_(total).txt"
         File results = out_tar_gz
     }
 
diff --git a/tools/sambamba.wdl b/tools/sambamba.wdl
index 726b4714f..ac1fe349b 100644
--- a/tools/sambamba.wdl
+++ b/tools/sambamba.wdl
@@ -1,12 +1,11 @@
 ## [Homepage](https://lomereiter.github.io/sambamba/)
-
 version 1.1
 
 task index {
     meta {
         description: "Creates a `.bai` BAM index for the input BAM"
         outputs: {
-            bam_index: "A `.bai` BAM index associated with the input BAM. Filename will be `basename(bam) + '.bai'`."
+            bam_index: "A `.bai` BAM index associated with the input BAM. Filename will be `basename(bam) + '.bai'`.",
         }
     }
 
@@ -63,7 +62,7 @@ task merge {
     meta {
         description: "Merges multiple sorted BAMs into a single BAM"
         outputs: {
-            merged_bam: "The BAM resulting from merging all the input BAMs"
+            merged_bam: "The BAM resulting from merging all the input BAMs",
         }
     }
 
@@ -123,7 +122,7 @@ task sort {
     meta {
         description: "Sorts the input BAM file"
         outputs: {
-            sorted_bam: "The input BAM after it has been sorted according to `sort_order`"
+            sorted_bam: "The input BAM after it has been sorted according to `sort_order`",
         }
     }
 
@@ -157,7 +156,10 @@ task sort {
         sambamba sort \
             --nthreads ~{ncpu} \
             -o "~{outfile_name}" \
-            ~{if queryname_sort then "-n" else ""} \
+            ~{if queryname_sort
+                then "-n"
+                else ""
+            } \
             "~{bam}"
     >>>
 
@@ -209,7 +211,10 @@ task markdup {
     command <<<
         sambamba markdup \
             --nthreads ~{ncpu} \
-            ~{if remove_duplicates then "--remove-duplicates" else ""} \
+            ~{if remove_duplicates
+                then "--remove-duplicates"
+                else ""
+            } \
             "~{bam}" \
             "~{prefix}.markdup.bam" \
             > "~{prefix}.markdup_log.txt"
@@ -234,7 +239,7 @@ task flagstat {
     meta {
         description: "Produces a report containing statistics about the alignments based on the bit flags set in the BAM"
         outputs: {
-            flagstat_report: "`sambamba flagstat` STDOUT redirected to a file"
+            flagstat_report: "`sambamba flagstat` STDOUT redirected to a file",
         }
     }
 
@@ -275,7 +280,7 @@ task flagstat {
     >>>
 
     output {
-       File flagstat_report = outfile_name
+        File flagstat_report = outfile_name
     }
 
     runtime {
diff --git a/tools/samtools.wdl b/tools/samtools.wdl
index 21a8cfd46..b75864496 100755
--- a/tools/samtools.wdl
+++ b/tools/samtools.wdl
@@ -1,5 +1,4 @@
 ## [Homepage](http://samtools.sourceforge.net/)
-
 version 1.1
 
 import "../data_structures/flag_filter.wdl"
@@ -38,7 +37,7 @@ task split {
     meta {
         description: "Runs Samtools split on the input BAM file. This splits the BAM by read group into one or more output files."
         outputs: {
-            split_bams: "The split BAM files. The extensions will contain read group IDs, and will end in `.bam`."
+            split_bams: "The split BAM files. The extensions will contain read group IDs, and will end in `.bam`.",
         }
     }
 
@@ -131,12 +130,12 @@ task split {
                 rm first_read.sam
             done
         fi
-        
+
         exit $EXITCODE
     >>>
 
     output {
-       Array[File] split_bams = glob("*.bam")
+        Array[File] split_bams = glob("*.bam")
     }
 
     runtime {
@@ -152,7 +151,7 @@ task flagstat {
     meta {
         description: "Produces a `samtools flagstat` report containing statistics about the alignments based on the bit flags set in the BAM"
         outputs: {
-            flagstat_report: "`samtools flagstat` STDOUT redirected to a file"
+            flagstat_report: "`samtools flagstat` STDOUT redirected to a file",
         }
     }
 
@@ -195,7 +194,7 @@ task flagstat {
     >>>
 
     output {
-       File flagstat_report = outfile_name
+        File flagstat_report = outfile_name
     }
 
     runtime {
@@ -210,7 +209,7 @@ task index {
     meta {
         description: "Creates a `.bai` BAM index for the input BAM"
         outputs: {
-            bam_index: "A `.bai` BAM index associated with the input BAM. Filename will be `basename(bam) + '.bai'`."
+            bam_index: "A `.bai` BAM index associated with the input BAM. Filename will be `basename(bam) + '.bai'`.",
         }
     }
 
@@ -397,7 +396,6 @@ task subsample {
             fi
             rm first_read.sam
         fi
-
     >>>
 
     output {
@@ -419,7 +417,7 @@ task filter {
         description: "Filters a BAM based on its bitwise flag value."
         help: "This task is a wrapper around `samtools view`. This task will fail if there are no reads in the output BAM. This can happen either because the input BAM was empty or because the supplied `bitwise_filter` was too strict. If you want to down-sample a BAM, use the `subsample` task instead."
         outputs: {
-            filtered_bam: "BAM file that has been filtered based on the input flags"
+            filtered_bam: "BAM file that has been filtered based on the input flags",
         }
     }
 
@@ -505,7 +503,7 @@ task merge {
     meta {
         description: "Merges multiple sorted BAMs into a single BAM"
         outputs: {
-            merged_bam: "The BAM resulting from merging all the input BAMs"
+            merged_bam: "The BAM resulting from merging all the input BAMs",
         }
     }
 
@@ -584,11 +582,26 @@ task merge {
         samtools merge \
             --threads "$n_cores" \
             ~{"-h \"" + new_header + "\""} \
-            ~{if name_sorted then "-n" else ""} \
-            ~{if (region != "") then "-R \"" + region + "\"" else ""} \
-            ~{if attach_rg then "-r" else ""} \
-            ~{if combine_rg then "-c" else ""} \
-            ~{if combine_pg then "-p" else ""} \
+            ~{if name_sorted
+                then "-n"
+                else ""
+            } \
+            ~{if (region != "")
+                then "-R \"" + region + "\""
+                else ""
+            } \
+            ~{if attach_rg
+                then "-r"
+                else ""
+            } \
+            ~{if combine_rg
+                then "-c"
+                else ""
+            } \
+            ~{if combine_pg
+                then "-p"
+                else ""
+            } \
             "~{prefix}.bam" \
             "${bams[@]}"
 
@@ -613,7 +626,7 @@ task addreplacerg {
     meta {
         description: "Adds or replaces read group tags"
         outputs: {
-            tagged_bam: "The transformed input BAM after read group modifications have been applied"
+            tagged_bam: "The transformed input BAM after read group modifications have been applied",
         }
     }
 
@@ -677,8 +690,14 @@ task addreplacerg {
             --threads "$n_cores" \
             ~{sep(" ", prefix("-r ", squote(read_group_line)))} \
             ~{"-R \"" + read_group_id + "\""} \
-            -m ~{if orphan_only then "orphan_only" else "overwrite_all"} \
-            ~{if overwrite_header_record then "-w" else ""} \
+            -m ~{if orphan_only
+                then "orphan_only"
+                else "overwrite_all"
+            } \
+            ~{if overwrite_header_record
+                then "-w"
+                else ""
+            } \
             -o "~{outfile_name}" \
             "~{bam}"
     >>>
@@ -700,7 +719,7 @@ task collate {
     meta {
         description: "Runs `samtools collate` on the input BAM file. Shuffles and groups reads together by their names."
         outputs: {
-            collated_bam: "A collated BAM (reads sharing a name next to each other, no other guarantee of sort order)"
+            collated_bam: "A collated BAM (reads sharing a name next to each other, no other guarantee of sort order)",
         }
     }
 
@@ -751,7 +770,10 @@ task collate {
 
         samtools collate \
             --threads "$n_cores" \
-            ~{if fast_mode then "-f" else ""} \
+            ~{if fast_mode
+                then "-f"
+                else ""
+            } \
             -o "~{outfile_name}" \
             "~{bam}"
     >>>
@@ -854,13 +876,12 @@ task bam_to_fastq {
     }
 
     Float bam_size = size(bam, "GiB")
-    Int memory_gb = (
-        if (collated || !paired_end)
+    Int memory_gb = (if (collated || !paired_end)
         then 4
         else (ceil(bam_size * 0.4) + 4)
     ) + modify_memory_gb
-    Int disk_size_gb = ceil(bam_size * (
-        if (retain_collated_bam && !collated && paired_end)
+    Int disk_size_gb = ceil(bam_size * (if (retain_collated_bam && !collated && paired_end
+    )
         then 5
         else 2
     )) + 10 + modify_disk_size_gb
@@ -878,13 +899,18 @@ task bam_to_fastq {
         mkfifo bam_pipe
         if ! ~{collated} && ~{paired_end}; then
             samtools collate \
-                ~{if retain_collated_bam then "" else "-u"} \
+                ~{if retain_collated_bam
+                    then ""
+                    else "-u"
+                } \
                 --threads "$n_cores" \
-                ~{if fast_mode then "-f" else ""} \
+                ~{if fast_mode
+                    then "-f"
+                    else ""
+                } \
                 -O \
                 "~{bam}" \
-                | tee ~{(
-                    if retain_collated_bam
+                | tee ~{(if retain_collated_bam
                     then "\"" + prefix + ".collated.bam\""
                     else ""
                 )} \
@@ -900,32 +926,26 @@ task bam_to_fastq {
             -F "~{bitwise_filter.exclude_if_any}" \
             --rf "~{bitwise_filter.include_if_any}" \
             -G "~{bitwise_filter.exclude_if_all}" \
-            ~{(
-                if append_read_number
+            ~{(if append_read_number
                 then "-N"
                 else "-n"
             )} \
-            -1 ~{(
-                if paired_end
+            -1 ~{(if paired_end
                 then "\"" + prefix + ".R1.fastq.gz\""
                 else "\"" + prefix + ".fastq.gz\""
             )} \
-            -2 ~{(
-                if paired_end
+            -2 ~{(if paired_end
                 then "\"" + prefix + ".R2.fastq.gz\""
                 else "\"" + prefix + ".fastq.gz\""
             )} \
-            ~{(
-                if paired_end
-                then (
-                    if output_singletons
+            ~{(if paired_end
+                then (if output_singletons
                     then "-s \"" + prefix + ".singleton.fastq.gz\""
                     else "-s junk.singleton.fastq.gz"
                 )
                 else ""
             )} \
-            -0 ~{(
-                if paired_end
+            -0 ~{(if paired_end
                 then "junk.unknown_bit_setting.fastq.gz"
                 else "\"" + prefix + ".fastq.gz\""
             )} \
@@ -971,7 +991,7 @@ task fixmate {
         description: "Runs `samtools fixmate` on the input BAM file. This fills in mate coordinates and insert size fields among other tags and fields."
         warning: "This task assumes a name-sorted or name-collated input BAM. If you have a position-sorted BAM, please use the `position_sorted_fixmate` task."
         outputs: {
-            fixmate_bam: "The BAM resulting from running `samtools fixmate` on the input BAM"
+            fixmate_bam: "The BAM resulting from running `samtools fixmate` on the input BAM",
         }
     }
 
@@ -1042,11 +1062,26 @@ task fixmate {
 
         samtools fixmate \
             --threads "$n_cores" \
-            ~{if remove_unaligned_and_secondary then "-r" else ""} \
-            ~{if disable_proper_pair_check then "-p" else ""} \
-            ~{if add_cigar then "-c" else ""} \
-            ~{if add_mate_score then "-m" else ""} \
-            ~{if disable_flag_sanitization then "-z off" else ""} \
+            ~{if remove_unaligned_and_secondary
+                then "-r"
+                else ""
+            } \
+            ~{if disable_proper_pair_check
+                then "-p"
+                else ""
+            } \
+            ~{if add_cigar
+                then "-c"
+                else ""
+            } \
+            ~{if add_mate_score
+                then "-m"
+                else ""
+            } \
+            ~{if disable_flag_sanitization
+                then "-z off"
+                else ""
+            } \
             "~{bam}" \
             "~{prefix}~{extension}"
     >>>
@@ -1070,7 +1105,7 @@ task position_sorted_fixmate {
         warning: "If you already have a collated BAM, please use the `fixmate` task."
         help: "`fixmate` fills in mate coordinates and insert size fields among other tags and fields. This task collates the input BAM, runs `fixmate`, and then resorts the output into a position-sorted BAM."
         outputs: {
-            fixmate_bam: "BAM file with mate information added"
+            fixmate_bam: "BAM file with mate information added",
         }
     }
 
@@ -1137,18 +1172,36 @@ task position_sorted_fixmate {
 
         samtools collate \
             --threads "$n_cores" \
-            ~{if fast_mode then "-f" else ""} \
+            ~{if fast_mode
+                then "-f"
+                else ""
+            } \
             -u \
             -O \
             "~{bam}" \
             | samtools fixmate \
                 --threads "$n_cores" \
                 -u \
-                ~{if remove_unaligned_and_secondary then "-r" else ""} \
-                ~{if disable_proper_pair_check then "-p" else ""} \
-                ~{if add_cigar then "-c" else ""} \
-                ~{if add_mate_score then "-m" else ""} \
-                ~{if disable_flag_sanitization then "-z off" else ""} \
+                ~{if remove_unaligned_and_secondary
+                    then "-r"
+                    else ""
+                } \
+                ~{if disable_proper_pair_check
+                    then "-p"
+                    else ""
+                } \
+                ~{if add_cigar
+                    then "-c"
+                    else ""
+                } \
+                ~{if add_mate_score
+                    then "-m"
+                    else ""
+                } \
+                ~{if disable_flag_sanitization
+                    then "-z off"
+                    else ""
+                } \
                 - \
                 - \
                 | samtools sort \
@@ -1279,25 +1332,54 @@ task markdup {
 
         samtools markdup \
             --threads "$n_cores" \
-            -f "~{prefix + if json then ".json" else ".txt"}" \
+            -f "~{prefix + if json
+                then ".json"
+                else ".txt"
+            }" \
             --read-coords '~{read_coords_regex}' \
             --coords-order "~{coordinates_order}" \
-            ~{if remove_duplicates then "-r" else ""} \
-            ~{if mark_supp_or_sec_or_unmapped_as_duplicates then "-S" else ""} \
-            ~{if mark_duplicates_with_do_tag then "-t" else ""} \
-            ~{if duplicate_count then "--duplicate-count" else ""} \
-            ~{if include_qc_fails then "--include-fails" else ""} \
-            ~{if duplicates_of_duplicates_check then "" else "--no-multi-dup"} \
-            ~{if use_read_groups then "--use-read-groups" else ""} \
+            ~{if remove_duplicates
+                then "-r"
+                else ""
+            } \
+            ~{if mark_supp_or_sec_or_unmapped_as_duplicates
+                then "-S"
+                else ""
+            } \
+            ~{if mark_duplicates_with_do_tag
+                then "-t"
+                else ""
+            } \
+            ~{if duplicate_count
+                then "--duplicate-count"
+                else ""
+            } \
+            ~{if include_qc_fails
+                then "--include-fails"
+                else ""
+            } \
+            ~{if duplicates_of_duplicates_check
+                then ""
+                else "--no-multi-dup"
+            } \
+            ~{if use_read_groups
+                then "--use-read-groups"
+                else ""
+            } \
             -l ~{max_readlen} \
             -d ~{optical_distance} \
             -c \
             "~{bam}" \
-            "~{if create_bam then prefix + ".bam" else "/dev/null"}"
+            "~{if create_bam
+                then prefix + ".bam"
+                else "/dev/null"
+            }"
     >>>
 
     output {
-        File markdup_report = prefix + if json then ".json" else ".txt"
+        File markdup_report = prefix + if json
+            then ".json"
+            else ".txt"
         File? markdup_bam = prefix + ".bam"
     }
 
@@ -1314,7 +1396,7 @@ task faidx {
     meta {
         description: "Creates a `.fai` FASTA index for the input FASTA"
         outputs: {
-            fasta_index: "A `.fai` FASTA index associated with the input FASTA. Filename will be `basename(fasta) + '.fai'`."
+            fasta_index: "A `.fai` FASTA index associated with the input FASTA. Filename will be `basename(fasta) + '.fai'`.",
         }
     }
 
diff --git a/tools/star.wdl b/tools/star.wdl
index 47fd141e7..8b77ced1b 100755
--- a/tools/star.wdl
+++ b/tools/star.wdl
@@ -1,12 +1,11 @@
 ## [Homepage](https://github.com/alexdobin/STAR)
-
 version 1.1
 
 task build_star_db {
     meta {
         description: "Runs STAR's build command to generate a STAR format reference for alignment"
         outputs: {
-            star_db: "A gzipped TAR file containing the STAR reference files. Suitable as the `star_db_tar_gz` input to the `alignment` task."
+            star_db: "A gzipped TAR file containing the STAR reference files. Suitable as the `star_db_tar_gz` input to the `alignment` task.",
         }
     }
 
@@ -86,8 +85,7 @@ task build_star_db {
 
     Float reference_fasta_size = size(reference_fasta, "GiB")
     Float gtf_size = size(gtf, "GiB")
-    Int disk_size_gb = (
-        ceil((reference_fasta_size + gtf_size) * 3) + 10 + modify_disk_size_gb
+    Int disk_size_gb = (ceil((reference_fasta_size + gtf_size) * 3) + 10 + modify_disk_size_gb
     )
 
     # Leave 2GB as system overhead
@@ -558,7 +556,11 @@ task alignment {
         Array[File] read_one_fastqs_gz
         Array[String] read_groups
         Array[File]? read_two_fastqs_gz
-        Array[Int] out_sj_filter_intron_max_vs_read_n = [50000, 100000, 200000]
+        Array[Int] out_sj_filter_intron_max_vs_read_n = [
+            50000,
+            100000,
+            200000,
+        ]
         SpliceJunctionMotifs out_sj_filter_overhang_min = SpliceJunctionMotifs {
             noncanonical_motifs: 30,
             GT_AG_and_CT_AC_motif: 12,
@@ -595,9 +597,7 @@ task alignment {
         Pair[Int, Int] clip_3p_n_bases = (0, 0)
         Pair[Int, Int] clip_3p_after_adapter_n_bases = (0, 0)
         Pair[Int, Int] clip_5p_n_bases = (0, 0)
-        String prefix = sub(
-            basename(read_one_fastqs_gz[0]),
-            "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$",
+        String prefix = sub(basename(read_one_fastqs_gz[0]), "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$",
             ""  # Once replacing with capturing groups is supported, replace with group 3
         )
         String read_name_separator = "/"
@@ -699,16 +699,16 @@ task alignment {
         Int modify_disk_size_gb = 0
     }
 
-    Array[File] read_twos = select_first([read_two_fastqs_gz, []])
+    Array[File] read_twos = select_first([
+        read_two_fastqs_gz,
+        [],
+    ])
 
     Float read_one_fastqs_size = size(read_one_fastqs_gz, "GiB")
     Float read_two_fastqs_size = size(read_twos, "GiB")
     Float star_db_tar_gz_size = size(star_db_tar_gz, "GiB")
-    Int disk_size_gb = (
-        (
-            ceil(read_one_fastqs_size + read_two_fastqs_size + star_db_tar_gz_size) * 3
-        ) + 10 + modify_disk_size_gb
-    )
+    Int disk_size_gb = ((ceil(read_one_fastqs_size + read_two_fastqs_size + star_db_tar_gz_size
+    ) * 3) + 10 + modify_disk_size_gb)
 
     command <<<
         set -euo pipefail
@@ -733,9 +733,9 @@ task alignment {
             --outFileNamePrefix "~{prefix + "."}" \
             --twopassMode "~{twopass_mode}" \
             --outSAMattrRGline ~{sep(" , ", read_groups)} \
-            --outSJfilterIntronMaxVsReadN ~{
-                sep(" ", quote(out_sj_filter_intron_max_vs_read_n))
-            } \
+            --outSJfilterIntronMaxVsReadN ~{sep(" ", quote(
+                out_sj_filter_intron_max_vs_read_n
+            ))} \
             --outSJfilterOverhangMin ~{sep(" ", quote([
                 out_sj_filter_overhang_min.noncanonical_motifs,
                 out_sj_filter_overhang_min.GT_AG_and_CT_AC_motif,
@@ -766,33 +766,31 @@ task alignment {
                 align_sj_stitch_mismatch_n_max.GC_AG_and_CT_GC_motif,
                 align_sj_stitch_mismatch_n_max.AT_AC_and_GT_AT_motif,
             ]))} \
-            --clip3pAdapterSeq "~{clip_3p_adapter_seq.left}" ~{(
-                if (length(read_twos) != 0)
+            --clip3pAdapterSeq "~{clip_3p_adapter_seq.left}" ~{(if (length(read_twos) != 0
+            )
                 then "'" + clip_3p_adapter_seq.right + "'"
                 else ""
             )} \
-            --clip3pAdapterMMp ~{clip_3p_adapter_mmp.left} ~{(
-                if (length(read_twos) != 0)
+            --clip3pAdapterMMp ~{clip_3p_adapter_mmp.left} ~{(if (length(read_twos) != 0)
                 then clip_3p_adapter_mmp.right
                 else None
             )} \
-            --alignEndsProtrude ~{align_ends_protrude.left} "~{(
-                if (length(read_twos) != 0)
+            --alignEndsProtrude ~{align_ends_protrude.left} "~{(if (length(read_twos) != 0
+            )
                 then align_ends_protrude.right
                 else None
             )}" \
-            --clip3pNbases ~{clip_3p_n_bases.left} ~{(
-                if (length(read_twos) != 0)
+            --clip3pNbases ~{clip_3p_n_bases.left} ~{(if (length(read_twos) != 0)
                 then clip_3p_n_bases.right
                 else None
             )} \
-            --clip3pAfterAdapterNbases ~{clip_3p_after_adapter_n_bases.left} ~{(
-                if (length(read_twos) != 0)
+            --clip3pAfterAdapterNbases ~{clip_3p_after_adapter_n_bases.left} ~{(if (length(
+                read_twos
+            ) != 0)
                 then clip_3p_after_adapter_n_bases.right
                 else None
             )} \
-            --clip5pNbases ~{clip_5p_n_bases.left} ~{(
-                if (length(read_twos) != 0)
+            --clip5pNbases ~{clip_5p_n_bases.left} ~{(if (length(read_twos) != 0)
                 then clip_5p_n_bases.right
                 else None
             )} \
@@ -803,11 +801,11 @@ task alignment {
             --outSAMunmapped "~{out_sam_unmapped}" \
             --outSAMorder "~{out_sam_order}" \
             --outSAMreadID "~{out_sam_read_id}" \
-            --outSAMtlen ~{(
-                if (out_sam_tlen == "left_plus")
+            --outSAMtlen ~{(if (out_sam_tlen == "left_plus")
                 then "1"
-                else (
-                    if (out_sam_tlen == "left_any") then "2" else "error"
+                else (if (out_sam_tlen == "left_any")
+                    then "2"
+                    else "error"
                 )
             )} \
             --outFilterType "~{out_filter_type}" \
diff --git a/tools/util.wdl b/tools/util.wdl
index 3bb4c5963..be141df2c 100644
--- a/tools/util.wdl
+++ b/tools/util.wdl
@@ -1,12 +1,11 @@
 ## # Utilities
-
 version 1.1
 
 task download {
     meta {
         description: "Uses wget to download a file from a remote URL to the local filesystem"
         outputs: {
-            downloaded_file: "File downloaded from provided URL"
+            downloaded_file: "File downloaded from provided URL",
         }
     }
 
@@ -53,7 +52,7 @@ task split_string {
         description: "Split a string into an array of strings based on a delimiter"
         warning: "This implementation will result in a runtime error if the provided string has any embedded single quotes (`'`)!"
         outputs: {
-            split_strings: "Split string as an array"
+            split_strings: "Split string as an array",
         }
     }
 
@@ -90,7 +89,7 @@ task calc_feature_lengths {
         description: "Calculate feature lengths from a GTF file using the non-overlapping exonic length algorithm"
         help: "The non-overlapping exonic length algorithm can be implemented as the sum of each base covered by at least one exon; where each base is given a value of 1 regardless of how many exons overlap it."
         outputs: {
-            feature_lengths: "A two column headered TSV file with feature names in the first column and feature lengths (as integers) in the second column"
+            feature_lengths: "A two column headered TSV file with feature names in the first column and feature lengths (as integers) in the second column",
         }
     }
 
@@ -166,7 +165,7 @@ task add_to_bam_header {
     meta {
         description: "Adds another line of text to the bottom of a BAM header"
         outputs: {
-            reheadered_bam: "The BAM after its header has been modified"
+            reheadered_bam: "The BAM after its header has been modified",
         }
     }
 
@@ -215,7 +214,7 @@ task unpack_tarball {
     meta {
         description: "Accepts a `.tar.gz` archive and converts it into a flat array of files. Any directory structure of the archive is ignored."
         outputs: {
-            tarball_contents: "An array of files found in the input tarball"
+            tarball_contents: "An array of files found in the input tarball",
         }
     }
 
@@ -314,7 +313,7 @@ task global_phred_scores {
     meta {
         description: "Calculates statistics about PHRED scores of the input BAM"
         outputs: {
-            phred_scores: "Headered TSV file containing PHRED score statistics"
+            phred_scores: "Headered TSV file containing PHRED score statistics",
         }
     }
 
@@ -339,7 +338,10 @@ task global_phred_scores {
 
     command <<<
         python3 /scripts/util/calc_global_phred_scores.py \
-            ~{if fast_mode then "--fast_mode" else ""} \
+            ~{if fast_mode
+                then "--fast_mode"
+                else ""
+            } \
             "~{bam}" \
             "~{prefix}"
     >>>
@@ -384,13 +386,15 @@ task check_fastq_and_rg_concordance {
         Array[String]? read_two_names
     }
 
-    Array[String] read_twos = select_first([read_two_names, []])
+    Array[String] read_twos = select_first([
+        read_two_names,
+        [],
+    ])
 
     command <<<
         python3 /scripts/util/check_FQs_and_RGs.py \
             --read-one-fastqs "~{sep(",", read_one_names)}" \
-            ~{(
-                if length(read_twos) > 0
+            ~{(if length(read_twos) > 0
                 then "--read-two-fastqs \"" + sep(",", squote(read_twos)) + "\""
                 else ""
             )} \
@@ -407,7 +411,7 @@ task split_fastq {
     meta {
         description: "Splits a FASTQ into multiple files based on the number of reads per file"
         outputs: {
-            fastqs: "Array of FASTQ files, each containing a subset of the input FASTQ"
+            fastqs: "Array of FASTQ files, each containing a subset of the input FASTQ",
         }
     }
 
@@ -427,11 +431,7 @@ task split_fastq {
 
     input {
         File fastq
-        String prefix = sub(
-            basename(fastq),
-            "(fastq|fq)\\.gz$",
-            ""
-        )
+        String prefix = sub(basename(fastq), "(fastq|fq)\\.gz$", "")
         Int reads_per_file = 10000000
         Int modify_disk_size_gb = 0
         Int ncpu = 2
diff --git a/workflows/chipseq/chipseq-standard.wdl b/workflows/chipseq/chipseq-standard.wdl
index d9f9290df..aa30759fe 100755
--- a/workflows/chipseq/chipseq-standard.wdl
+++ b/workflows/chipseq/chipseq-standard.wdl
@@ -9,11 +9,14 @@ import "../../tools/samtools.wdl"
 import "../../tools/util.wdl"
 import "../general/bam-to-fastqs.wdl" as b2fq
 #@ except: LineWidth
-import "https://raw.githubusercontent.com/stjude/seaseq/2.3/workflows/workflows/mapping.wdl" as seaseq_map
+import "https://raw.githubusercontent.com/stjude/seaseq/2.3/workflows/workflows/mapping.wdl"
+    as seaseq_map
 #@ except: LineWidth
-import "https://raw.githubusercontent.com/stjude/seaseq/3.0/workflows/tasks/samtools.wdl" as seaseq_samtools
+import "https://raw.githubusercontent.com/stjude/seaseq/3.0/workflows/tasks/samtools.wdl"
+    as seaseq_samtools
 #@ except: LineWidth
-import "https://raw.githubusercontent.com/stjude/seaseq/3.0/workflows/tasks/seaseq_util.wdl" as seaseq_util
+import "https://raw.githubusercontent.com/stjude/seaseq/3.0/workflows/tasks/seaseq_util.wdl"
+    as seaseq_util
 
 workflow chipseq_standard_experimental {
     meta {
@@ -67,7 +70,10 @@ workflow chipseq_standard_experimental {
             use_all_cores,
         }
     }
-    File selected_bam = select_first([subsample.sampled_bam, bam])
+    File selected_bam = select_first([
+        subsample.sampled_bam,
+        bam,
+    ])
 
     call read_group.get_read_groups after validate_input_bam { input:
         bam = selected_bam,
@@ -79,7 +85,7 @@ workflow chipseq_standard_experimental {
         use_all_cores,
     }
 
-    scatter (pair in zip(bam_to_fastqs.read1s, get_read_groups.read_groups)){
+    scatter (pair in zip(bam_to_fastqs.read1s, get_read_groups.read_groups)) {
         if (enable_read_trimming) {
             call fp.fastp as trim { input:
                 read_one_fastq = pair.left,
@@ -93,10 +99,13 @@ workflow chipseq_standard_experimental {
             }
         }
 
-        File chosen_fastq = select_first([trim.single_end_reads_fastq_gz, pair.left])
+        File chosen_fastq = select_first([
+            trim.single_end_reads_fastq_gz,
+            pair.left,
+        ])
 
         call seaseq_util.basicfastqstats as basic_stats { input:
-            fastqfile = chosen_fastq
+            fastqfile = chosen_fastq,
         }
         call seaseq_map.mapping as bowtie_single_end_mapping { input:
             fastqfile = chosen_fastq,
@@ -104,13 +113,11 @@ workflow chipseq_standard_experimental {
             metricsfile = basic_stats.metrics_out,
             blacklist = excludelist,
         }
-        File chosen_bam = select_first(
-            [
-                bowtie_single_end_mapping.bklist_bam,
-                bowtie_single_end_mapping.mkdup_bam,
-                bowtie_single_end_mapping.sorted_bam,
-            ]
-        )
+        File chosen_bam = select_first([
+            bowtie_single_end_mapping.bklist_bam,
+            bowtie_single_end_mapping.mkdup_bam,
+            bowtie_single_end_mapping.sorted_bam,
+        ])
 
         call read_group.read_group_to_string { input:
             read_group = pair.right,
@@ -127,7 +134,7 @@ workflow chipseq_standard_experimental {
     }
 
     Array[File] aligned_bams = addreplacerg.tagged_bam
-    scatter(aligned_bam in aligned_bams){
+    scatter (aligned_bam in aligned_bams) {
         call picard.clean_sam as picard_clean { input:
             bam = aligned_bam,
         }
@@ -147,7 +154,9 @@ workflow chipseq_standard_experimental {
         use_all_cores,
     }
     #@ except: UnusedCall
-    call picard.validate_bam { input: bam = markdup.mkdupbam }
+    call picard.validate_bam { input:
+        bam = markdup.mkdupbam,
+    }
 
     call md5sum.compute_checksum { input:
         file = markdup.mkdupbam,
@@ -164,9 +173,13 @@ workflow chipseq_standard_experimental {
         File bam_checksum = compute_checksum.md5sum
         File bam_index = samtools_index.bam_index
         File bigwig = deeptools_bam_coverage.bigwig
-        Array[File] fastp_reports = select_all(flatten([fastp.report, trim.report]))
-        Array[File] fastp_jsons = select_all(flatten(
-            [fastp.report_json, trim.report_json]
-        ))
+        Array[File] fastp_reports = select_all(flatten([
+            fastp.report,
+            trim.report,
+        ]))
+        Array[File] fastp_jsons = select_all(flatten([
+            fastp.report_json,
+            trim.report_json,
+        ]))
     }
 }
diff --git a/workflows/dnaseq/dnaseq-core.wdl b/workflows/dnaseq/dnaseq-core.wdl
index 62b027db2..21a1d3990 100644
--- a/workflows/dnaseq/dnaseq-core.wdl
+++ b/workflows/dnaseq/dnaseq-core.wdl
@@ -1,5 +1,4 @@
 ## **WARNING:** this workflow is experimental! Use at your own risk!
-
 version 1.1
 
 import "../../tools/bwa.wdl"
@@ -69,10 +68,7 @@ workflow dnaseq_core_experimental {
         read_groups,
     }
 
-    scatter (tuple in zip(
-        zip(read_one_fastqs_gz, read_two_fastqs_gz),
-        read_groups
-    )) {
+    scatter (tuple in zip(zip(read_one_fastqs_gz, read_two_fastqs_gz), read_groups)) {
         if (enable_read_trimming) {
             call fp.fastp as trim after validate { input:
                 read_one_fastq = tuple.left.left,
@@ -87,8 +83,14 @@ workflow dnaseq_core_experimental {
                 output_fastq = enable_read_trimming,
             }
         }
-        File chosen_r1_fastq = select_first([trim.read_one_fastq_gz, tuple.left.left])
-        File chosen_r2_fastq = select_first([trim.read_two_fastq_gz, tuple.left.right])
+        File chosen_r1_fastq = select_first([
+            trim.read_one_fastq_gz,
+            tuple.left.left,
+        ])
+        File chosen_r2_fastq = select_first([
+            trim.read_two_fastq_gz,
+            tuple.left.right,
+        ])
 
         call util.split_fastq as read_ones after validate { input:
             fastq = chosen_r1_fastq,
@@ -105,11 +107,8 @@ workflow dnaseq_core_experimental {
                     read_one_fastq_gz = t.left,
                     read_two_fastq_gz = t.right,
                     bwa_db_tar_gz = bwa_db,
-                    prefix = sub(sub(
-                        basename(t.left),
-                        "(\\.subsampled)?\\.(fastq|fq)(\\.gz)?$",
-                        ""
-                    ), "\\.([rR][12])\\.", "."),
+                    prefix = sub(sub(basename(t.left), "(\\.subsampled)?\\.(fastq|fq)(\\.gz)?$",
+                        ""), "\\.([rR][12])\\.", "."),
                     read_group = tuple.right,
                     use_all_cores,
                 }
@@ -119,17 +118,17 @@ workflow dnaseq_core_experimental {
                     read_one_fastq_gz = t.left,
                     read_two_fastq_gz = t.right,
                     bwa_db_tar_gz = bwa_db,
-                    prefix = sub(sub(
-                        basename(t.left),
-                        "(\\.subsampled)?\\.(fastq|fq)(\\.gz)?$",
-                        ""
-                    ), "\\.([rR][12])\\.", "."),
+                    prefix = sub(sub(basename(t.left), "(\\.subsampled)?\\.(fastq|fq)(\\.gz)?$",
+                        ""), "\\.([rR][12])\\.", "."),
                     read_group = tuple.right,
                     use_all_cores,
                 }
             }
             call picard.sort as sort { input:
-                 bam = select_first([bwa_mem.bam, bwa_aln_pe.bam])
+                bam = select_first([
+                    bwa_mem.bam,
+                    bwa_aln_pe.bam,
+                ]),
             }
         }
     }
@@ -146,9 +145,13 @@ workflow dnaseq_core_experimental {
     output {
         File harmonized_bam = merge.merged_bam
         File harmonized_bam_index = index.bam_index
-        Array[File] fastp_reports = select_all(flatten([fastp.report, trim.report]))
-        Array[File] fastp_jsons = select_all(flatten(
-            [fastp.report_json, trim.report_json]
-        ))
+        Array[File] fastp_reports = select_all(flatten([
+            fastp.report,
+            trim.report,
+        ]))
+        Array[File] fastp_jsons = select_all(flatten([
+            fastp.report_json,
+            trim.report_json,
+        ]))
     }
 }
diff --git a/workflows/dnaseq/dnaseq-standard-fastq.wdl b/workflows/dnaseq/dnaseq-standard-fastq.wdl
index c0542c19d..fdf48606b 100644
--- a/workflows/dnaseq/dnaseq-standard-fastq.wdl
+++ b/workflows/dnaseq/dnaseq-standard-fastq.wdl
@@ -1,5 +1,4 @@
 ## **WARNING:** this workflow is experimental! Use at your own risk!
-
 version 1.1
 
 import "../../data_structures/read_group.wdl"
@@ -54,9 +53,7 @@ workflow dnaseq_standard_fastq_experimental {
         Array[File] read_one_fastqs_gz
         Array[File] read_two_fastqs_gz
         Array[ReadGroup] read_groups
-        String prefix = sub(
-            basename(read_one_fastqs_gz[0]),
-            "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$",
+        String prefix = sub(basename(read_one_fastqs_gz[0]), "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$",
             ""  # Once replacing with capturing groups is supported, replace with group 3
         )
         String aligner = "mem"
@@ -101,12 +98,10 @@ workflow dnaseq_standard_fastq_experimental {
         subsample.subsampled_read1,
         read_one_fastqs_gz,
     ])
-    Array[File] selected_read_two_fastqs = select_all(
-        select_first([
-            subsample.subsampled_read2,
-            read_two_fastqs_gz,
-        ])
-    )
+    Array[File] selected_read_two_fastqs = select_all(select_first([
+        subsample.subsampled_read2,
+        read_two_fastqs_gz,
+    ]))
 
     call dnaseq_core_wf.dnaseq_core_experimental after fqlint { input:
         read_one_fastqs_gz = selected_read_one_fastqs,
diff --git a/workflows/dnaseq/dnaseq-standard.wdl b/workflows/dnaseq/dnaseq-standard.wdl
index 7b2e86154..194c18cea 100644
--- a/workflows/dnaseq/dnaseq-standard.wdl
+++ b/workflows/dnaseq/dnaseq-standard.wdl
@@ -1,5 +1,4 @@
 ## **WARNING:** this workflow is experimental! Use at your own risk!
-
 version 1.1
 
 import "../../data_structures/read_group.wdl"
@@ -55,7 +54,7 @@ workflow dnaseq_standard_experimental {
     }
 
     call parse_input { input:
-        aligner
+        aligner,
     }
 
     if (validate_input) {
@@ -71,7 +70,10 @@ workflow dnaseq_standard_experimental {
             use_all_cores,
         }
     }
-    File selected_bam = select_first([subsample.sampled_bam, bam])
+    File selected_bam = select_first([
+        subsample.sampled_bam,
+        bam,
+    ])
 
     call read_group.get_read_groups after parse_input { input:
         bam = selected_bam,
@@ -95,7 +97,10 @@ workflow dnaseq_standard_experimental {
                 SM: sample_override,
             }
         }
-        ReadGroup selected_rg = select_first([overriden_rg, rg])
+        ReadGroup selected_rg = select_first([
+            overriden_rg,
+            rg,
+        ])
         call read_group.read_group_to_string { input:
             read_group = selected_rg,
             format_as_sam_record = true,
diff --git a/workflows/general/alignment-post.wdl b/workflows/general/alignment-post.wdl
index 53c18d64a..9caa5344c 100644
--- a/workflows/general/alignment-post.wdl
+++ b/workflows/general/alignment-post.wdl
@@ -4,7 +4,8 @@ import "../../tools/md5sum.wdl"
 import "../../tools/picard.wdl"
 import "../../tools/samtools.wdl"
 #@ except: LineWidth
-import "https://raw.githubusercontent.com/stjude/XenoCP/4.0.0-alpha/wdl/workflows/xenocp.wdl" as xenocp_wf
+import "https://raw.githubusercontent.com/stjude/XenoCP/4.0.0-alpha/wdl/workflows/xenocp.wdl"
+    as xenocp_wf
 
 workflow alignment_post {
     meta {
@@ -12,7 +13,7 @@ workflow alignment_post {
         outputs: {
             processed_bam: "Input BAM after being transformed by standard processing",
             bam_index: "BAI index associated with `processed_bam`",
-            bam_checksum:  "STDOUT of the `md5sum` command run on the input BAM that has been redirected to a file",
+            bam_checksum: "STDOUT of the `md5sum` command run on the input BAM that has been redirected to a file",
             validate_report: "Validation report produced by `picard ValidateSamFile`. Validation warnings and errors are logged.",
         }
         allowNestedInputs: true
@@ -46,7 +47,9 @@ workflow alignment_post {
         Boolean use_all_cores = false
     }
 
-    call picard.sort as picard_sort { input: bam }
+    call picard.sort as picard_sort { input:
+        bam,
+    }
 
     if (cleanse_xenograft) {
         call samtools.index as pre_xenocp_index { input:
@@ -57,14 +60,23 @@ workflow alignment_post {
         call xenocp_wf.xenocp { input:
             input_bam = picard_sort.sorted_bam,
             input_bai = pre_xenocp_index.bam_index,
-            reference_tar_gz = select_first([contaminant_db, ""]),
-            aligner = select_first([xenocp_aligner, "undefined"]),
+            reference_tar_gz = select_first([
+                contaminant_db,
+                "",
+            ]),
+            aligner = select_first([
+                xenocp_aligner,
+                "undefined",
+            ]),
             skip_duplicate_marking = true,
         }
     }
     if (mark_duplicates) {
         call picard.mark_duplicates as picard_markdup { input:
-            bam = select_first([xenocp.bam, picard_sort.sorted_bam]),
+            bam = select_first([
+                xenocp.bam,
+                picard_sort.sorted_bam,
+            ]),
         }
     }
 
@@ -79,9 +91,13 @@ workflow alignment_post {
         use_all_cores,
     }
     File aligned_bam_index = samtools_index.bam_index
-    call picard.validate_bam { input: bam = aligned_bam }
+    call picard.validate_bam { input:
+        bam = aligned_bam,
+    }
 
-    call md5sum.compute_checksum { input: file = aligned_bam }
+    call md5sum.compute_checksum { input:
+        file = aligned_bam,
+    }
 
     output {
         File processed_bam = aligned_bam
diff --git a/workflows/general/bam-to-fastqs.wdl b/workflows/general/bam-to-fastqs.wdl
index 409ac0133..573db7c7d 100644
--- a/workflows/general/bam-to-fastqs.wdl
+++ b/workflows/general/bam-to-fastqs.wdl
@@ -27,7 +27,9 @@ workflow bam_to_fastqs {
         Boolean use_all_cores = false
     }
 
-    call samtools.quickcheck { input: bam }
+    call samtools.quickcheck { input:
+        bam,
+    }
 
     call samtools.split after quickcheck { input:
         bam,
@@ -42,11 +44,13 @@ workflow bam_to_fastqs {
     }
 
     if (paired_end) {
-        scatter (reads in
-            zip(bam_to_fastq.read_one_fastq_gz, bam_to_fastq.read_two_fastq_gz)
-        ) {
+        scatter (reads in zip(bam_to_fastq.read_one_fastq_gz, bam_to_fastq.read_two_fastq_gz
+        )) {
             call fq.fqlint { input:
-                read_one_fastq = select_first([reads.left, "undefined"]),
+                read_one_fastq = select_first([
+                    reads.left,
+                    "undefined",
+                ]),
                 read_two_fastq = reads.right,
             }
         }
@@ -54,14 +58,16 @@ workflow bam_to_fastqs {
     if (!paired_end) {
         scatter (fq in bam_to_fastq.single_end_reads_fastq_gz) {
             call fq.fqlint as se_fqlint { input:
-                read_one_fastq = select_first([fq, "undefined"]),
+                read_one_fastq = select_first([
+                    fq,
+                    "undefined",
+                ]),
             }
         }
     }
 
     output {
-        Array[File] read1s = (
-            if paired_end
+        Array[File] read1s = (if paired_end
             then select_all(bam_to_fastq.read_one_fastq_gz)
             else select_all(bam_to_fastq.single_end_reads_fastq_gz)
         )
diff --git a/workflows/general/samtools-merge.wdl b/workflows/general/samtools-merge.wdl
index a34585c99..fe6825278 100644
--- a/workflows/general/samtools-merge.wdl
+++ b/workflows/general/samtools-merge.wdl
@@ -1,21 +1,20 @@
 ## **WARNING:** this workflow is experimental! Use at your own risk!
-
 version 1.1
 
 import "../../tools/samtools.wdl"
 
 workflow samtools_merge {
-    meta{
+    meta {
         name: "Merge BAMs"
         description: "Runs `samtools merge`, with optional iteration to avoid maximum command line argument length"
         category: "Utility"
         outputs: {
-            merged_bam: "The BAM resulting from merging all the input BAMs"
+            merged_bam: "The BAM resulting from merging all the input BAMs",
         }
         allowNestedInputs: true
     }
 
-    parameter_meta{
+    parameter_meta {
         bams: "BAMs to merge into a final BAM"
         prefix: "Prefix for output BAM."
         use_all_cores: "Use all cores? Recommended for cloud environments."
@@ -31,22 +30,21 @@ workflow samtools_merge {
 
     Int bam_length = length(bams)
 
-    if (bam_length > max_length){
+    if (bam_length > max_length) {
         # Find the number of merges required
-        scatter (merge_num in range((bam_length / max_length) + 1)){
+        scatter (merge_num in range((bam_length / max_length) + 1)) {
             # Get the sublist of bams
-            scatter (bam_num in range(max_length)){
-                Int num = (
-                    if merge_num > 0
+            scatter (bam_num in range(max_length)) {
+                Int num = (if merge_num > 0
                     then bam_num + (merge_num * max_length)
                     else bam_num
                 )
-                if (num < bam_length){
+                if (num < bam_length) {
                     File bam_list = bams[num]
                 }
             }
         }
-        scatter (list in bam_list){
+        scatter (list in bam_list) {
             call samtools.merge as inner_merge { input:
                 bams = select_all(list),
                 prefix,
@@ -65,7 +63,7 @@ workflow samtools_merge {
         }
     }
 
-    if (bam_length < max_length){
+    if (bam_length < max_length) {
         call samtools.merge as basic_merge { input:
             bams,
             prefix,
@@ -76,6 +74,9 @@ workflow samtools_merge {
     }
 
     output {
-        File merged_bam = select_first([final_merge.merged_bam, basic_merge.merged_bam])
+        File merged_bam = select_first([
+            final_merge.merged_bam,
+            basic_merge.merged_bam,
+        ])
     }
 }
diff --git a/workflows/methylation/methylation-cohort.wdl b/workflows/methylation/methylation-cohort.wdl
index 4632d5402..03a06cf6b 100644
--- a/workflows/methylation/methylation-cohort.wdl
+++ b/workflows/methylation/methylation-cohort.wdl
@@ -33,21 +33,20 @@ workflow methylation_cohort {
     Int beta_length = length(unfiltered_normalized_beta)
     Int pval_length = length(p_values)
 
-    if (beta_length > max_length){
-        scatter (merge_num in range((beta_length / max_length) + 1)){
+    if (beta_length > max_length) {
+        scatter (merge_num in range((beta_length / max_length) + 1)) {
             # Get the sublist of beta files
-            scatter (beta_num in range(max_length)){
-                Int num = (
-                    if merge_num > 0
+            scatter (beta_num in range(max_length)) {
+                Int num = (if merge_num > 0
                     then beta_num + (merge_num * max_length)
                     else beta_num
                 )
-                if (num < beta_length){
+                if (num < beta_length) {
                     File bam_list = unfiltered_normalized_beta[num]
                 }
             }
         }
-        scatter (iter_index in range(length(bam_list))){
+        scatter (iter_index in range(length(bam_list))) {
             call combine_data as inner_merge { input:
                 files_to_combine = select_all(bam_list[iter_index]),
                 combined_file_name = "~{iter_index}.combined.csv",
@@ -59,22 +58,21 @@ workflow methylation_cohort {
             combined_file_name = "combined_beta.csv",
         }
 
-        if (pval_length > 0 && !skip_pvalue_check){
+        if (pval_length > 0 && !skip_pvalue_check) {
             # If p-values are provided, merge those as well
-            scatter (merge_num in range((pval_length / max_length) + 1)){
+            scatter (merge_num in range((pval_length / max_length) + 1)) {
                 # Get the sublist of p-value files
-                scatter (pval_num in range(max_length)){
-                    Int num_p = (
-                        if merge_num > 0
+                scatter (pval_num in range(max_length)) {
+                    Int num_p = (if merge_num > 0
                         then pval_num + (merge_num * max_length)
                         else pval_num
                     )
-                    if (num_p < pval_length){
+                    if (num_p < pval_length) {
                         File pval_list = p_values[num_p]
                     }
                 }
             }
-            scatter (iter_index in range(length(pval_list))){
+            scatter (iter_index in range(length(pval_list))) {
                 call combine_data as inner_merge_pvals { input:
                     files_to_combine = select_all(pval_list[iter_index]),
                     combined_file_name = "~{iter_index}.pvals.combined.csv",
@@ -88,12 +86,12 @@ workflow methylation_cohort {
         }
     }
 
-    if (beta_length <= max_length){
+    if (beta_length <= max_length) {
         call combine_data as simple_merge { input:
             files_to_combine = unfiltered_normalized_beta,
             combined_file_name = "combined_beta.csv",
         }
-        if (pval_length > 0 && !skip_pvalue_check){
+        if (pval_length > 0 && !skip_pvalue_check) {
             call combine_data as simple_merge_pval { input:
                 files_to_combine = p_values,
                 combined_file_name = "combined_pvals.csv",
@@ -101,23 +99,19 @@ workflow methylation_cohort {
         }
     }
 
-    File? pval_file = (
-        if (pval_length > 0 && !skip_pvalue_check)
-        then select_first(
-            [
-                final_merge_pvals.combined_file,
-                simple_merge_pval.combined_file,
-            ])
+    File? pval_file = (if (pval_length > 0 && !skip_pvalue_check)
+        then select_first([
+            final_merge_pvals.combined_file,
+            simple_merge_pval.combined_file,
+        ])
         else None
     )
 
     call filter_probes { input:
-        beta_values = select_first(
-            [
-                final_merge.combined_file,
-                simple_merge.combined_file,
-            ]
-        ),
+        beta_values = select_first([
+            final_merge.combined_file,
+            simple_merge.combined_file,
+        ]),
         p_values = pval_file,
         num_probes,
     }
@@ -131,12 +125,10 @@ workflow methylation_cohort {
     }
 
     output {
-        File combined_beta = select_first(
-            [
-                final_merge.combined_file,
-                simple_merge.combined_file,
-            ]
-        )
+        File combined_beta = select_first([
+            final_merge.combined_file,
+            simple_merge.combined_file,
+        ])
         File filtered_beta = filter_probes.filtered_beta_values
         File filtered_probeset = filter_probes.filtered_probes
         File umap_embedding = generate_umap.umap
@@ -149,7 +141,7 @@ task combine_data {
     meta {
         description: "Combine data from multiple CSV files by column"
         outputs: {
-            combined_file: "Combined CSV file"
+            combined_file: "Combined CSV file",
         }
     }
 
@@ -173,16 +165,19 @@ task combine_data {
         Int modify_memory_gb = 0
     }
 
-    Int memory_gb = ceil(size(files_to_combine, "GiB") *
-        if simple_merge then 2 else 1)
-        + modify_memory_gb
-        + 2
+    Int memory_gb = ceil(size(files_to_combine, "GiB") * if simple_merge
+        then 2
+        else 1
+    ) + modify_memory_gb + 2
     Int disk_size_gb = ceil(size(files_to_combine, "GiB") * 2) + 2
 
     command <<<
         python /scripts/methylation/combine.py \
             --output-name "~{combined_file_name}" \
-            ~{if simple_merge then "--simple-merge" else ""} \
+            ~{if simple_merge
+                then "--simple-merge"
+                else ""
+            } \
             ~{sep(" ", quote(files_to_combine))}
     >>>
 
@@ -258,7 +253,7 @@ task generate_umap {
     meta {
         description: "Generate UMAP embedding"
         outputs: {
-            umap: "UMAP embedding for all samples"
+            umap: "UMAP embedding for all samples",
         }
     }
 
@@ -297,7 +292,7 @@ task plot_umap {
     meta {
         description: "Plot UMAP embedding"
         outputs: {
-            umap_plot: "UMAP plot for all samples"
+            umap_plot: "UMAP plot for all samples",
         }
     }
 
diff --git a/workflows/methylation/methylation-preprocess.wdl b/workflows/methylation/methylation-preprocess.wdl
index 95132cfc8..f85ed8613 100644
--- a/workflows/methylation/methylation-preprocess.wdl
+++ b/workflows/methylation/methylation-preprocess.wdl
@@ -48,10 +48,8 @@ task process_raw_idats {
     >>>
 
     output {
-        File beta_swan_norm_unfiltered
-            = out_base + ".beta_swan_norm_unfiltered.csv"
-        File beta_swan_norm_unfiltered_genomic
-            = out_base + ".beta_swan_norm_unfiltered.genomic.csv"
+        File beta_swan_norm_unfiltered = out_base + ".beta_swan_norm_unfiltered.csv"
+        File beta_swan_norm_unfiltered_genomic = out_base + ".beta_swan_norm_unfiltered.genomic.csv"
         File annotation = out_base + ".annotation.csv"
         File beta_unnorm = out_base + ".beta.csv"
         File cn_values = out_base + ".cn_values.csv"
diff --git a/workflows/methylation/methylation-standard.wdl b/workflows/methylation/methylation-standard.wdl
index 618d41efb..23be2d06b 100644
--- a/workflows/methylation/methylation-standard.wdl
+++ b/workflows/methylation/methylation-standard.wdl
@@ -32,19 +32,17 @@ workflow methylation {
 
     scatter (pair in zip(green_idats, red_idats)) {
         call preprocess.process_raw_idats { input:
-            idats = pair
+            idats = pair,
         }
     }
 
     call cohort.methylation_cohort { input:
-        unfiltered_normalized_beta =
-            process_raw_idats.beta_swan_norm_unfiltered_genomic,
+        unfiltered_normalized_beta = process_raw_idats.beta_swan_norm_unfiltered_genomic,
         p_values = process_raw_idats.probe_pvalues,
     }
 
     output {
-        Array[File] beta_swan_norm_unfiltered_genomic =
-            process_raw_idats.beta_swan_norm_unfiltered_genomic
+        Array[File] beta_swan_norm_unfiltered_genomic = process_raw_idats.beta_swan_norm_unfiltered_genomic
         File combined_beta = methylation_cohort.combined_beta
         File filtered_beta = methylation_cohort.filtered_beta
         File filtered_probeset = methylation_cohort.filtered_probeset
diff --git a/workflows/qc/markdups-post.wdl b/workflows/qc/markdups-post.wdl
index 70771d8e8..2e0420fa5 100644
--- a/workflows/qc/markdups-post.wdl
+++ b/workflows/qc/markdups-post.wdl
@@ -5,7 +5,6 @@
 ## whether a read is a duplicate or not.
 ## But the tasks called below produce different results depending on whether the
 ## input BAM has been duplicate marked or not.
-
 version 1.1
 
 import "../../tools/mosdepth.wdl"
@@ -61,7 +60,7 @@ workflow markdups_post {
         bam_index = markdups_bam_index,
         prefix = prefix + "." + "whole_genome",
     }
-    scatter(coverage_pair in zip(coverage_beds, coverage_labels)) {
+    scatter (coverage_pair in zip(coverage_beds, coverage_labels)) {
         call mosdepth.coverage as regions_coverage { input:
             bam = markdups_bam,
             bam_index = markdups_bam_index,
@@ -72,8 +71,7 @@ workflow markdups_post {
 
     output {
         File insert_size_metrics = collect_insert_size_metrics.insert_size_metrics
-        File insert_size_metrics_pdf
-            = collect_insert_size_metrics.insert_size_metrics_pdf
+        File insert_size_metrics_pdf = collect_insert_size_metrics.insert_size_metrics_pdf
         File flagstat_report = flagstat.flagstat_report
         File mosdepth_global_summary = wg_coverage.summary
         File mosdepth_global_dist = wg_coverage.global_dist
diff --git a/workflows/qc/quality-check-standard.wdl b/workflows/qc/quality-check-standard.wdl
index 1587b7fda..3762e2883 100644
--- a/workflows/qc/quality-check-standard.wdl
+++ b/workflows/qc/quality-check-standard.wdl
@@ -126,8 +126,7 @@ workflow quality_check_standard {
         File kraken_db
         File? gtf
         #@ except: LineWidth
-        File multiqc_config
-            = "https://raw.githubusercontent.com/stjudecloud/workflows/main/workflows/qc/multiqc_config/multiqc_config.yaml"
+        File multiqc_config = "https://raw.githubusercontent.com/stjudecloud/workflows/main/workflows/qc/multiqc_config/multiqc_config.yaml"
         Array[File] extra_multiqc_inputs = []
         Array[File] coverage_beds = []
         Array[String] coverage_labels = []
@@ -164,20 +163,24 @@ workflow quality_check_standard {
         coverage_labels,
     }
     call flag_filter.validate_flag_filter as kraken_filter_validator { input:
-        flags = standard_filter
+        flags = standard_filter,
     }
     if (run_comparative_kraken) {
-        call flag_filter.validate_flag_filter
-            as comparative_kraken_filter_validator
-        { input:
-            flags = comparative_filter
+        call flag_filter.validate_flag_filter as comparative_kraken_filter_validator { input:
+            flags = comparative_filter,
         }
     }
 
-    call md5sum.compute_checksum after parse_input { input: file = bam }
+    call md5sum.compute_checksum after parse_input { input:
+        file = bam,
+    }
 
-    call samtools.quickcheck after parse_input { input: bam }
-    call util.compression_integrity after parse_input { input: bgzipped_file = bam }
+    call samtools.quickcheck after parse_input { input:
+        bam,
+    }
+    call util.compression_integrity after parse_input { input:
+        bgzipped_file = bam,
+    }
 
     if (subsample_n_reads > 0) {
         call samtools.subsample after quickcheck { input:
@@ -188,7 +191,10 @@ workflow quality_check_standard {
         }
         if (defined(subsample.sampled_bam)) {
             call samtools.index as subsample_index { input:
-                bam = select_first([subsample.sampled_bam, "undefined"]),
+                bam = select_first([
+                    subsample.sampled_bam,
+                    "undefined",
+                ]),
                 use_all_cores,
             }
         }
@@ -203,8 +209,7 @@ workflow quality_check_standard {
         subsample_index.bam_index,
         bam_index,
     ])
-    String post_subsample_prefix = (
-        if (defined(subsample.sampled_bam))
+    String post_subsample_prefix = (if (defined(subsample.sampled_bam))
         then prefix + ".subsampled"
         else prefix
     )
@@ -235,7 +240,9 @@ workflow quality_check_standard {
         outfile_name = post_subsample_prefix + ".readlength.tsv",
     }
     call ngsderive.encoding after quickcheck { input:
-        ngs_files = [post_subsample_bam],
+        ngs_files = [
+            post_subsample_bam,
+        ],
         outfile_name = post_subsample_prefix + ".encoding.tsv",
         num_reads = -1,
     }
@@ -249,9 +256,7 @@ workflow quality_check_standard {
         prefix = post_subsample_prefix,
     }
 
-    call samtools.bam_to_fastq after quickcheck
-        after kraken_filter_validator
-    { input:
+    call samtools.bam_to_fastq after quickcheck after kraken_filter_validator { input:
         bam = post_subsample_bam,
         bitwise_filter = standard_filter,
         prefix = post_subsample_prefix,
@@ -267,14 +272,24 @@ workflow quality_check_standard {
     }
 
     call fq.fqlint { input:
-        read_one_fastq = select_first([bam_to_fastq.read_one_fastq_gz, "undefined"]),
-        read_two_fastq = select_first([bam_to_fastq.read_two_fastq_gz, "undefined"]),
+        read_one_fastq = select_first([
+            bam_to_fastq.read_one_fastq_gz,
+            "undefined",
+        ]),
+        read_two_fastq = select_first([
+            bam_to_fastq.read_two_fastq_gz,
+            "undefined",
+        ]),
     }
     call kraken2.kraken after fqlint { input:
-        read_one_fastq_gz
-            = select_first([bam_to_fastq.read_one_fastq_gz, "undefined"]),
-        read_two_fastq_gz
-            = select_first([bam_to_fastq.read_two_fastq_gz, "undefined"]),
+        read_one_fastq_gz = select_first([
+            bam_to_fastq.read_one_fastq_gz,
+            "undefined",
+        ]),
+        read_two_fastq_gz = select_first([
+            bam_to_fastq.read_two_fastq_gz,
+            "undefined",
+        ]),
         db = kraken_db,
         store_sequences = store_kraken_sequences,
         prefix = post_subsample_prefix,
@@ -282,23 +297,29 @@ workflow quality_check_standard {
     }
     if (run_fastp) {
         call fp.fastp after fqlint { input:
-            read_one_fastq
-                = select_first([bam_to_fastq.read_one_fastq_gz, "undefined"]),
-            read_two_fastq
-                = select_first([bam_to_fastq.read_two_fastq_gz, "undefined"]),
+            read_one_fastq = select_first([
+                bam_to_fastq.read_one_fastq_gz,
+                "undefined",
+            ]),
+            read_two_fastq = select_first([
+                bam_to_fastq.read_two_fastq_gz,
+                "undefined",
+            ]),
             output_fastq = false,
         }
     }
     if (run_librarian) {
         call libraran_tasks.librarian after fqlint { input:
-            read_one_fastq = select_first([bam_to_fastq.read_one_fastq_gz, "undefined"]),
+            read_one_fastq = select_first([
+                bam_to_fastq.read_one_fastq_gz,
+                "undefined",
+            ]),
         }
     }
 
     if (run_comparative_kraken) {
-        call samtools.bam_to_fastq as alt_filtered_fastq after quickcheck
-            after comparative_kraken_filter_validator
-        { input:
+        call samtools.bam_to_fastq as alt_filtered_fastq after quickcheck after comparative_kraken_filter_validator {
+            input:
             bam = post_subsample_bam,
             bitwise_filter = comparative_filter,
             prefix = post_subsample_prefix + ".alt_filtered",
@@ -315,16 +336,24 @@ workflow quality_check_standard {
             use_all_cores,
         }
         call fq.fqlint as alt_filtered_fqlint { input:
-            read_one_fastq
-                = select_first([alt_filtered_fastq.read_one_fastq_gz, "undefined"]),
-            read_two_fastq
-                = select_first([alt_filtered_fastq.read_two_fastq_gz, "undefined"]),
+            read_one_fastq = select_first([
+                alt_filtered_fastq.read_one_fastq_gz,
+                "undefined",
+            ]),
+            read_two_fastq = select_first([
+                alt_filtered_fastq.read_two_fastq_gz,
+                "undefined",
+            ]),
         }
         call kraken2.kraken as comparative_kraken after alt_filtered_fqlint { input:
-            read_one_fastq_gz
-                = select_first([alt_filtered_fastq.read_one_fastq_gz, "undefined"]),
-            read_two_fastq_gz
-                = select_first([alt_filtered_fastq.read_two_fastq_gz, "undefined"]),
+            read_one_fastq_gz = select_first([
+                alt_filtered_fastq.read_one_fastq_gz,
+                "undefined",
+            ]),
+            read_two_fastq_gz = select_first([
+                alt_filtered_fastq.read_two_fastq_gz,
+                "undefined",
+            ]),
             db = kraken_db,
             store_sequences = store_kraken_sequences,
             prefix = post_subsample_prefix + ".alt_filtered",
@@ -337,8 +366,8 @@ workflow quality_check_standard {
         bam_index = post_subsample_bam_index,
         prefix = post_subsample_prefix + ".whole_genome",
     }
-    scatter(coverage_pair in zip(coverage_beds, parse_input.labels)) {
-        call mosdepth.coverage as regions_coverage after quickcheck  { input:
+    scatter (coverage_pair in zip(coverage_beds, parse_input.labels)) {
+        call mosdepth.coverage as regions_coverage after quickcheck { input:
             bam = post_subsample_bam,
             bam_index = post_subsample_bam_index,
             coverage_bed = coverage_pair.left,
@@ -350,19 +379,31 @@ workflow quality_check_standard {
         call ngsderive.junction_annotation after quickcheck { input:
             bam = post_subsample_bam,
             bam_index = post_subsample_bam_index,
-            gene_model = select_first([gtf, "undefined"]),
+            gene_model = select_first([
+                gtf,
+                "undefined",
+            ]),
             prefix = post_subsample_prefix,
         }
         call ngsderive.strandedness after quickcheck { input:
             bam = post_subsample_bam,
             bam_index = post_subsample_bam_index,
-            gene_model = select_first([gtf, "undefined"]),
+            gene_model = select_first([
+                gtf,
+                "undefined",
+            ]),
             outfile_name = post_subsample_prefix + ".strandedness.tsv",
         }
         call qualimap.rnaseq as qualimap_rnaseq { input:
-            bam = select_first([bam_to_fastq.collated_bam, "undefined"]),
+            bam = select_first([
+                bam_to_fastq.collated_bam,
+                "undefined",
+            ]),
             prefix = post_subsample_prefix + ".qualimap_rnaseq_results",
-            gtf = select_first([gtf, "undefined"]),
+            gtf = select_first([
+                gtf,
+                "undefined",
+            ]),
             name_sorted = true,
             paired_end = true,  # matches default but prevents user from overriding
         }
@@ -434,17 +475,27 @@ workflow quality_check_standard {
         ],
         regions_coverage.summary,
         select_all(regions_coverage.region_dist),
-        select_first([markdups_post.mosdepth_region_summary, []]),
-        select_first([markdups_post.mosdepth_region_dist, []]),
-        (
-            if (mark_duplicates && optical_distance > 0)
-            then [markdups.mark_duplicates_metrics]
+        select_first([
+            markdups_post.mosdepth_region_summary,
+            [],
+        ]),
+        select_first([
+            markdups_post.mosdepth_region_dist,
+            [],
+        ]),
+        (if (mark_duplicates && optical_distance > 0)
+            then [
+                markdups.mark_duplicates_metrics,
+            ]
             else []
         ),
     ]))
 
     call multiqc_tasks.multiqc { input:
-        files = flatten([multiqc_files, extra_multiqc_inputs]),
+        files = flatten([
+            multiqc_files,
+            extra_multiqc_inputs,
+        ]),
         config = multiqc_config,
         report_name = post_subsample_prefix + ".multiqc",
     }
@@ -483,7 +534,10 @@ workflow quality_check_standard {
         File? kraken_sequences = kraken.sequences
         File? comparative_kraken_sequences = comparative_kraken.sequences
         File? junctions = junction_annotation.junctions
-        Array[File] intermediate_files = select_first([optional_files, []])
+        Array[File] intermediate_files = select_first([
+            optional_files,
+            [],
+        ])
     }
 }
 
@@ -491,7 +545,7 @@ task parse_input {
     meta {
         description: "Parses and validates the `quality_check_standard` workflow's provided inputs"
         outputs: {
-            labels: "An array of labels to use on the result coverage files associated with each coverage BED"
+            labels: "An array of labels to use on the result coverage files associated with each coverage BED",
         }
     }
 
@@ -539,8 +593,7 @@ task parse_input {
     >>>
 
     output {
-        Array[String] labels = (
-            if (coverage_beds_len > 0)
+        Array[String] labels = (if (coverage_beds_len > 0)
             then read_lines("labels.txt")
             else []
         )
diff --git a/workflows/reference/bwa-db-build.wdl b/workflows/reference/bwa-db-build.wdl
index 3aefef4f1..579385f1c 100644
--- a/workflows/reference/bwa-db-build.wdl
+++ b/workflows/reference/bwa-db-build.wdl
@@ -40,7 +40,7 @@ workflow bwa_db_build {
     }
 
     output {
-      File reference_fa = reference_download.downloaded_file
-      File bwa_db_tar_gz = build_bwa_db.bwa_db_tar_gz
+        File reference_fa = reference_download.downloaded_file
+        File bwa_db_tar_gz = build_bwa_db.bwa_db_tar_gz
     }
 }
diff --git a/workflows/reference/gatk-reference.wdl b/workflows/reference/gatk-reference.wdl
index 3b4dff835..2cf2e4087 100644
--- a/workflows/reference/gatk-reference.wdl
+++ b/workflows/reference/gatk-reference.wdl
@@ -88,16 +88,28 @@ workflow gatk_reference {
 
     if (defined(dbSNP_vcf_index_url) && defined(dbSNP_vcf_index_name)) {
         call util.download as dbsnp_index { input:
-            url = select_first([dbSNP_vcf_index_url, "undefined"]),
-            outfile_name = select_first([dbSNP_vcf_index_name, "undefined"]),
+            url = select_first([
+                dbSNP_vcf_index_url,
+                "undefined",
+            ]),
+            outfile_name = select_first([
+                dbSNP_vcf_index_name,
+                "undefined",
+            ]),
             disk_size_gb = dbSNP_vcf_index_disk_size_gb,
         }
     }
 
     if (defined(interval_list_url) && defined(interval_list_name)) {
         call util.download as intervals { input:
-            url = select_first([interval_list_url, "undefined"]),
-            outfile_name = select_first([interval_list_name, "undefined"]),
+            url = select_first([
+                interval_list_url,
+                "undefined",
+            ]),
+            outfile_name = select_first([
+                interval_list_name,
+                "undefined",
+            ]),
             disk_size_gb = interval_list_disk_size_gb,
         }
     }
diff --git a/workflows/reference/qc-reference.wdl b/workflows/reference/qc-reference.wdl
index 2d64b8901..28f67aebf 100644
--- a/workflows/reference/qc-reference.wdl
+++ b/workflows/reference/qc-reference.wdl
@@ -121,12 +121,12 @@ workflow qc_reference {
         }
     }
 
-    if (
-        (length(kraken_fastas) > 0)
-        || (length(kraken_fasta_urls) > 0)
-        || (length(kraken_libraries) > 0)
-    ) {
-        call kraken2.download_taxonomy { input: protein }
+    if ((length(kraken_fastas) > 0) || (length(kraken_fasta_urls) > 0) || (length(
+        kraken_libraries
+    ) > 0)) {
+        call kraken2.download_taxonomy { input:
+            protein,
+        }
     }
 
     scatter (lib in kraken_libraries) {
@@ -136,7 +136,10 @@ workflow qc_reference {
         }
     }
 
-    Array[File] custom_fastas = flatten([kraken_fastas, fastas_download.downloaded_file])
+    Array[File] custom_fastas = flatten([
+        kraken_fastas,
+        fastas_download.downloaded_file,
+    ])
     if (length(custom_fastas) > 0) {
         call kraken2.create_library_from_fastas { input:
             fastas_gz = custom_fastas,
@@ -145,9 +148,13 @@ workflow qc_reference {
     }
 
     Array[File] kraken_tarballs = flatten([
-        select_all([download_taxonomy.taxonomy]),
+        select_all([
+            download_taxonomy.taxonomy,
+        ]),
         download_library.library,
-        select_all([create_library_from_fastas.custom_library]),
+        select_all([
+            create_library_from_fastas.custom_library,
+        ]),
     ])
     if (length(kraken_tarballs) > 0) {
         call kraken2.build_db as kraken_build_db { input:
diff --git a/workflows/reference/star-db-build.wdl b/workflows/reference/star-db-build.wdl
index d3a99fbe2..d2d14b2a1 100644
--- a/workflows/reference/star-db-build.wdl
+++ b/workflows/reference/star-db-build.wdl
@@ -56,8 +56,8 @@ workflow star_db_build {
     }
 
     output {
-      File reference_fa = reference_download.downloaded_file
-      File gtf = gtf_download.downloaded_file
-      File star_db_tar_gz = build_star_db.star_db
+        File reference_fa = reference_download.downloaded_file
+        File gtf = gtf_download.downloaded_file
+        File star_db_tar_gz = build_star_db.star_db
     }
 }
diff --git a/workflows/rnaseq/rnaseq-core.wdl b/workflows/rnaseq/rnaseq-core.wdl
index b5d088317..afad29046 100644
--- a/workflows/rnaseq/rnaseq-core.wdl
+++ b/workflows/rnaseq/rnaseq-core.wdl
@@ -144,9 +144,7 @@ workflow rnaseq_core {
             GC_AG_and_CT_GC_motif: 5,
             AT_AC_and_GT_AT_motif: 5,
         }
-        String prefix = sub(
-            basename(read_one_fastqs_gz[0]),
-            "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$",
+        String prefix = sub(basename(read_one_fastqs_gz[0]), "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$",
             ""  # Once replacing with capturing groups is supported, replace with group 3
         )
         String xenocp_aligner = "star"
@@ -201,13 +199,11 @@ workflow rnaseq_core {
         }
     }
 
-    Array[File] chosen_r1s = (
-        if enable_read_trimming
+    Array[File] chosen_r1s = (if enable_read_trimming
         then select_all(trim.read_one_fastq_gz)
         else read_one_fastqs_gz
     )
-    Array[File] chosen_r2s = (
-        if enable_read_trimming
+    Array[File] chosen_r2s = (if enable_read_trimming
         then select_all(trim.read_two_fastq_gz)
         else read_two_fastqs_gz
     )
@@ -252,8 +248,7 @@ workflow rnaseq_core {
         gene_model = gtf,
     }
 
-    String htseq_strandedness = (
-        if (provided_strandedness != "")
+    String htseq_strandedness = (if (provided_strandedness != "")
         then htseq_strandedness_mapping[provided_strandedness]
         else htseq_strandedness_mapping[ngsderive_strandedness.strandedness_string]
     )
@@ -262,12 +257,11 @@ workflow rnaseq_core {
         bam = alignment_post.processed_bam,
         gtf,
         strandedness = htseq_strandedness,
-        prefix = basename(alignment_post.processed_bam, "bam")
-            + (
-                if provided_strandedness == ""
-                then ngsderive_strandedness.strandedness_string
-                else provided_strandedness
-            ),
+        prefix = basename(alignment_post.processed_bam, "bam") + (if provided_strandedness
+            == ""
+            then ngsderive_strandedness.strandedness_string
+            else provided_strandedness
+        ),
         pos_sorted = true,
     }
 
@@ -280,9 +274,13 @@ workflow rnaseq_core {
         File feature_counts = htseq_count.feature_counts
         File inferred_strandedness = ngsderive_strandedness.strandedness_file
         String inferred_strandedness_string = ngsderive_strandedness.strandedness_string
-        Array[File] fastp_reports = select_all(flatten([fastp.report, trim.report]))
-        Array[File] fastp_jsons = select_all(flatten(
-            [fastp.report_json, trim.report_json]
-        ))
+        Array[File] fastp_reports = select_all(flatten([
+            fastp.report,
+            trim.report,
+        ]))
+        Array[File] fastp_jsons = select_all(flatten([
+            fastp.report_json,
+            trim.report_json,
+        ]))
     }
 }
diff --git a/workflows/rnaseq/rnaseq-standard-fastq.wdl b/workflows/rnaseq/rnaseq-standard-fastq.wdl
index dee188b10..29ff3bfc6 100644
--- a/workflows/rnaseq/rnaseq-standard-fastq.wdl
+++ b/workflows/rnaseq/rnaseq-standard-fastq.wdl
@@ -73,9 +73,7 @@ workflow rnaseq_standard_fastq {
         Array[File] read_two_fastqs_gz
         Array[ReadGroup] read_groups
         File? contaminant_db
-        String prefix = sub(
-            basename(read_one_fastqs_gz[0]),
-            "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$",
+        String prefix = sub(basename(read_one_fastqs_gz[0]), "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$",
             ""  # Once replacing with capturing groups is supported, replace with group 3
         )
         String xenocp_aligner = "star"
@@ -100,7 +98,7 @@ workflow rnaseq_standard_fastq {
         }
     }
 
-    if (validate_input){
+    if (validate_input) {
         scatter (reads in zip(read_one_fastqs_gz, read_two_fastqs_gz)) {
             call fq.fqlint after parse_input { input:
                 read_one_fastq = reads.left,
@@ -123,12 +121,10 @@ workflow rnaseq_standard_fastq {
         subsample.subsampled_read1,
         read_one_fastqs_gz,
     ])
-    Array[File] selected_read_two_fastqs = select_all(
-        select_first([
-            subsample.subsampled_read2,
-            read_two_fastqs_gz,
-        ])
-    )
+    Array[File] selected_read_two_fastqs = select_all(select_first([
+        subsample.subsampled_read2,
+        read_two_fastqs_gz,
+    ]))
 
     call rnaseq_core_wf.rnaseq_core after fqlint { input:
         read_one_fastqs_gz = selected_read_one_fastqs,
diff --git a/workflows/rnaseq/rnaseq-standard.wdl b/workflows/rnaseq/rnaseq-standard.wdl
index c7278a72a..48f70ab95 100755
--- a/workflows/rnaseq/rnaseq-standard.wdl
+++ b/workflows/rnaseq/rnaseq-standard.wdl
@@ -92,7 +92,10 @@ workflow rnaseq_standard {
             use_all_cores,
         }
     }
-    File selected_bam = select_first([subsample.sampled_bam, bam])
+    File selected_bam = select_first([
+        subsample.sampled_bam,
+        bam,
+    ])
 
     call read_group.get_read_groups after validate_input_bam { input:
         bam = selected_bam,
diff --git a/workflows/rnaseq/rnaseq-variant-calling.wdl b/workflows/rnaseq/rnaseq-variant-calling.wdl
index 8df2e61a1..ab7a8a445 100644
--- a/workflows/rnaseq/rnaseq-variant-calling.wdl
+++ b/workflows/rnaseq/rnaseq-variant-calling.wdl
@@ -54,7 +54,7 @@ workflow rnaseq_variant_calling {
         Int scatter_count = 6
     }
 
-    if (!bam_is_dup_marked){
+    if (!bam_is_dup_marked) {
         call picard.mark_duplicates { input:
             bam,
             create_bam = true,
@@ -62,8 +62,14 @@ workflow rnaseq_variant_calling {
     }
 
     call gatk.split_n_cigar_reads { input:
-        bam = select_first([mark_duplicates.duplicate_marked_bam, bam]),
-        bam_index = select_first([mark_duplicates.duplicate_marked_bam_index, bam_index]),
+        bam = select_first([
+            mark_duplicates.duplicate_marked_bam,
+            bam,
+        ]),
+        bam_index = select_first([
+            mark_duplicates.duplicate_marked_bam_index,
+            bam_index,
+        ]),
         fasta,
         fasta_index,
         dict,

From 2805d4c154e6b2c325efd73667b06e152bc8e78d Mon Sep 17 00:00:00 2001
From: Ari Frantz <ari.frantz@stjude.org>
Date: Fri, 6 Feb 2026 11:23:59 -0500
Subject: [PATCH 33/47] docs: revise best practices and contributing guide
 (#293)

_Describe the problem or feature in addition to a link to the issues._

closes #277

Before submitting this PR, please make sure:

- [ ] You have added a few sentences describing the PR here.
- [ ] The code passes all CI tests without any errors or warnings.
- [ ] You have added tests (when appropriate).
- [ ] You have added an entry in any relevant CHANGELOGs (when
appropriate).
- [ ] If you have made any changes to the `scripts/` or `docker/`
directories, please ensure any image versions have been incremented
accordingly!
- [ ] You have updated the README or other documentation to account for
these changes (when appropriate).

---------

Co-authored-by: Andrew Thrasher <adthrasher@gmail.com>
---
 CONTRIBUTING.md   | 44 ++++++++++++++++++++++
 best-practices.md | 93 ++++++++++++++---------------------------------
 2 files changed, 71 insertions(+), 66 deletions(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 81fc14d24..706a0a221 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -31,6 +31,50 @@ Our pull request template has an extensive checklist that must be completed prio
 
 Note that the maintainers reserve the right to close any submission without review for any reason.
 
+## Expectations for WDL contributions
+
+We have some opinionated rules and guidelines we use while writing WDL for this repository. These include:
+
+- See `template/common-parameter-meta.txt` for common description strings.
+  - If applicable, use the same parameter name and help text as the underlying tool called by the task.
+- All requirement values are overridable at runtime. However, tasks should have easily configurable memory and disk space allocations.
+  - See the various tasks in the template directory for possible ways to allocate resources.
+    - Contributors can mix and match the available templates, copy and pasting subsections as appropriate.
+    - A task may contain both statically and dynamically allocated resources.
+- Multi-core tasks should *always* follow the conventions laid out in the `use_all_cores_task` example (see `template/task-examples.wdl`).
+  - This is catering to cloud users, who may be allocated a machine with more cores than are specified by the `ncpu` parameter.
+  - Note that future versions of WDL will likely cause a change to this convention.
+    - We plan to deprecate the `ncpu` param in favor of accessing the runtime section directly (`n_cores=~{task.runtime.cpu}`).
+- Output file names should *always* be determined with either the `outfile_name` parameter or the `prefix` parameter.
+  - `outfile_name` should be preferred if no downstream tasks/tools rely on the file name/extension.
+  - Tasks with multiple outputs should always use the `prefix` convention.
+- After the input sorting rules in `sprocket lint` have been applied, follow the below rules for further sorting.
+  - "sample" files come before "reference" files.
+  - If present, `use_all_cores` should be the last `Boolean` in its block.
+  - The `ncpu` parameter comes before inputs that allocate memory, which come before inputs that allocate disk space.
+    - This block of 2-3 inputs should come after all other inputs.
+- If a task uses multiple cores or is multithreaded, then at least 2 cpu should be specified.
+- Use the `as` keyword sparingly; only in the case of increased readability or to avoid name collisions.
+  - Prefer using `as` in the import block rather than at the task/workflow call level.
+  - When using `as` to rename an invalid URI, attempt to make as few changes to the filename as possible (i.e. try not to abbreviate).
+  - To disambiguate a task or workflow file from it's contents, you can respectively add the `_tasks` or `_wf` suffix in the import section.
+- Whenever possible, prefer a Docker image maintained by an external source (such as BioContainers) rather than creating your own image.
+- When adding a `Dockerfile` to this repository, follow the below conventions:
+  - Create a directory under the `docker/` directory and choose an appropriate name (likely shared with the underlying tool). The `Dockerfile` should be nested under this new directory. Then create a `package.json` alongside the `Dockerfile`. The `package.json` file is required to contain two JSON fields (`name` and `version`). It can optionally contain a `revision` field.
+  - Docker images should be versioned according to the following convention
+    - The `version` should be shared with whatever underlying tool is being used
+      - If no specific tool is named (e.g. the `util` image), default to SemVer. Ignore the next 3 bullet points.
+    - The revision should start with zero (`0`).
+      - If the Docker image gets updated, *without* updating the base tool's version, increment the number by one.
+      - If the Docker image gets updated, *including* updating the base tool's version, revert back to zero.
+- Any tasks which are deprecated should have a `deprecated: true` key in their `meta` section.
+  - Never include a `deprecated: false` key in any production tasks. All tasks are assumed to not be deprecated unless otherwise noted.
+  - In addition, there should be a `warning` key which starts with the text `**[DEPRECATED]**`.
+    - No other text or explanation is required after the above text, but it can be added for further context.
+  - These two conventions allow for a task's deprecated status to be communicated in multiple ways, ensuring no user misses the notice.
+  - Deprecated tasks should be placed at the end of their file.
+- While WDL allows embedded scripts in the `command` block sections, this repository requires scripts (e.g. R, Python) to be separate and placed in the `scripts` folder. The relevant Docker image build for your task should then include the script during the build so the task can access it. This separation of concerns improves the developer experience by improving syntax highlighting in the WDL document and enabling linting and formatting checks for the scripting languages.
+
 ## FAQs
 
 ### Can I use Artificial Intelligence (AI)?
diff --git a/best-practices.md b/best-practices.md
index 5a9df8072..38d6cb005 100644
--- a/best-practices.md
+++ b/best-practices.md
@@ -1,69 +1,30 @@
 # WDL Best Practices
 
-All rules below should be followed by contributors to this repo. Contributors should also follow the rules enforced by [Sprocket](https://sprocket.bio/). Pull Requests which do not conform to these specifications will be asked to change.
-
-## Rules
-
-- All WDL should be written in v1.1+
 - All tasks with multiple commands (including any pipes (`|`)) should have `set -euo pipefail` before any other commands.
-- All tasks should run in a persistently versioned container
-  - This ensures reproducibility across time and environments
-- See `template/common-parameter-meta.txt` for common description strings.
-  - If applicable, use the same parameter name, help string, and parameter ordering as the underlying tool called by the task
-- Check all assumptions made about workflow inputs before beginning long running executions
-  - Common examples of assumptions that should be checked: valid `String` choice, mutually exclusive parameters, missing optional file for selected parameters, filename extensions
-  - This can commonly be handled by a `parse_input` task (defined in the same file as the workflow in question)
-    - When possible, avoid passing in entire files to the `parse_input` task. Coerce files to `Boolean`s or `String`s to avoid unnecessary disk space usage
-- Tasks with string parameters for which a limited number of choices are valid, must be documented following the template in `string_choices_task` (see `template/task-examples.wdl`)
-  - they should also fail quickly with an informative error message if an invalid input is provided
-    - In most cases, just passing the parameter to the underlying tool should produce a satisfactory error, but this must be checked for each tool
-  - While redundant, it is still best practice to validate these strings in the `parse_input` task of any workflow which calls the task
-    - This ensures the workflow will fail as fast as possible to save users time and resources
-- All requirement values are overridable at runtime. However, tasks should have easily configurable memory and disk space allocations
-  - see the various tasks in the template directory for possible ways to allocate resources
-    - Contributors can mix and match the available templates, copy and pasting subsections as appropriate
-    - It is allowed to have one resource allocated dynamically, and another allocated statically in the same task.
-- multi-core tasks should *always* follow the conventions laid out in the `use_all_cores_task` example (see `template/task-examples.wdl`)
-  - this is catering to cloud users, who may be allocated a machine with more cores than are specified by the `ncpu` parameter
-  - Note that future versions of WDL will likely cause a change to this convention.
-    - We plan to deprecate the `ncpu` param in favor of accessing the runtime section directly (`n_cores=~{task.runtime.cpu}`)
-- Tasks which assume a file and any accessory files (e.g. a BAM and a BAI) have specific extensions and/or are in the same directory should *always* follow the conventions laid out in the `localize_files_task` example (see `template/task-examples.wdl`)
-  - This is to accommodate as many backends as possible
-- output file names should *always* be determined with either the `outfile_name` parameter or the `prefix` parameter.
-  - `outfile_name` should be preferred if no downstream tasks/tools rely on the file name/extension
-  - tasks with multiple outputs should always use the `prefix` convention
-- After the input sorting rules in `sprocket lint` have been applied, follow the below rules for further sorting.
-  - "sample" files come before "reference" files
-  - If present, `use_all_cores` should be the last `Boolean` in its block
-  - the `ncpu` parameter comes before inputs that allocate memory, which come before inputs that allocate disk space
-    - This block of 2-3 inputs should come after all other inputs.
-- Most tasks should have a default `maxRetries` of 1
-  - Certain tasks are prone to intermittent failure (often if an internet connection is involved) and can have a higher default `maxRetries`.
-- If a task uses multiple cores or is multithreaded, then at least 2 cpu should be specified.
-- Use the `as` keyword sparingly; only in the case of increased readability or to avoid name collisions
-  - Prefer using `as` in the import block rather than at the task/workflow call level
-  - When using `as` to rename an invalid URI, attempt to make as few changes to the filename as possible (i.e. try not to abbreviate)
-  - To disambiguate a task or workflow file from it's contents, you can respectively add the `_tasks` or `_wf` suffix in the import section
-- the non-empty qualifier (`+`) of arrays and maps should be avoided
-- Whenever possible, prefer a Docker image maintained by an external source (such as BioContainers) rather than creating your own image
-- When adding a Dockerfile to this repository, follow the below conventions
-  - Create a directory under the `docker/` directory and choose an appropriate name (likely shared with the underlying tool). The `Dockerfile` should be nested under this new directory. Then create a `package.json` alongside the `Dockerfile`. The `package.json` file is required to contain two JSON fields (`name` and `version`). It can optionally contain a `revision` field.
-  - Docker images should be versioned according to the following convention
-    - The `version` should be shared with whatever underlying tool is being used
-      - If no specific tool is named (e.g. the `util` image), default to SemVer. Ignore the next 3 bullet points.
-    - The revision should start with zero (`0`)
-      - If the Docker image gets updated, *without* updating the base tool's version, increment the number by one
-      - If the Docker image gets updated, *including* updating the base tool's version, revert back to zero
-- general purpose tasks can use the `util` image maintained in this repo
-- The `description` key in WDL meta sections should be in active voice, beginning the first sentence with a verb
-  - Each task/workflow is _doing_ something. The first sentence should be a succinct description of what that "something" is.
-  - The `description` key should be succinct. Generally, one sentence shorter than 140 characters is appropriate.
-- If documenting a workflow, task, input, or output and you need to be more verbose than is appropriate in a `description` field, you may include _in addition_ a `help` key with extended prose or an `external_help` key with a URL
-  - the presence of `help` or `external_help` is _not_ a substitute for a `description`
-- Any tasks which are deprecated should have a `deprecated: true` key in their `meta` section
-  - It is allowed (but redundant and discouraged) to include a `deprecated: false` key in any production tasks. All tasks are assumed to not be deprecated unless otherwise noted.
-  - In addition, there should be a `warning` key which starts with the text `**[DEPRECATED]**`
-    - No other text or explanation is required after the above text, but it can be added for further context
-  - These two rules allow for a task's deprecated status to be communicated in multiple ways, ensuring no user misses the notice
-  - Deprecated tasks should be placed at the end of their file
-- While WDL allows embedded scripts in the `command` block sections, this repository requires scripts (e.g. R, Python) to be separate and placed in the `scripts` folder. The relevant Docker image build for your task should then include the script during the build so the task can access it. This separation of concerns improves the developer experience by improving syntax highlighting in the WDL document and enabling linting and formatting checks for the scripting languages.
\ No newline at end of file
+  - Tasks without multiple commands or pipes can omit this.
+  - These options will cause common classes of bugs in Bash scripts to fail immediately and loudly, instead of causing silent or subtle bugs in your task behavior.
+- All tasks should run in a persistently versioned container.
+  - e.g. do not use `latest` tags for Docker images.
+  - This helps ensure reproducibility across time and environments.
+- Check all assumptions made about workflow inputs before beginning long running executions.
+  - Common examples of assumptions that should be checked:
+    - valid `String` choice (for WDL 1.3 and later, an `enum` should be used in place of `String`s with a fixed set of valid options)
+    - mutually exclusive parameters
+    - missing optional file for selected parameters
+    - filename extensions
+  - Use `after` clauses in workflows to ensure that all these assumptions are valid before beginning tasks with heavy computation.
+- If the _contents_ of a `File` are not read or do not need to be localized for a task, try to coerce the `File` variable to a `Boolean` (with `defined()`) or a `String` (with `basename()`) to avoid unnecessary disk space usage and networking.
+- All requirement values are overridable at runtime. However, tasks should have easily configurable memory and disk space allocations.
+  - Often, tasks have a dynamic calculation for resource requirements based on input sizes. Users of a WDL should have an easy way to fine tune this calculation.
+  - This may mean incorporating an `Int` or `Float` in the inputs of the task that is applied to the dynamic calculation.
+  - For WDL 1.3 and later, WDL authors can change resource requirements between retry attempts. This enables mitigation of errors relating to resources limits, but users may inadvertantly disable these mitigations by introducing runtime overrides. WDL authors should expose resource fine tuning via the input section and incorporate those user values in any dynamic calculations to prevent runtime locking.
+- Tasks which assume a file and any accessory files (e.g. a BAM and a BAI) have specific extensions and/or are in the same directory should *always* create symlinks from the mounted inputs to the work directory of the task
+  - This is because individual `File` types are not guarenteed to be in the same mounted directory.
+  - The `command` may include something like: `ln -s "~{<input name>}" "./<expected name>"`
+- Tasks should `rm` any temporary or intermediate files created in the work directory (including symlinks).
+  - This helps reduce disk bloat from keeping unnecessary files around.
+  - This is especially important for any large or uncompressed files, such as reference FASTAs or databases.
+- Most tasks should have a default `maxRetries` of 1.
+  - This is because many WDL backends are prone to intermittent failures that can be recovered from with a retry.
+  - Certain tasks are especially prone to intermittent failure (often if any networking is involved) and can have a higher default `maxRetries`.
+  - Certain tasks with potentially high compute costs in cloud environments may default to `0`. This should be used in combination with call caching to aid rerunning while minimizing costs.

From c958f35c8f4b9778ab1a0e5282587819ab751338 Mon Sep 17 00:00:00 2001
From: Andrew Thrasher <adthrasher@gmail.com>
Date: Fri, 6 Feb 2026 16:15:39 -0500
Subject: [PATCH 34/47] chore: add WDL-specific instructions for copilot (#295)

Adding an initial set of instructions for Copilot specific to WDL and
our internal best practices.

Before submitting this PR, please make sure:

- [x] You have added a few sentences describing the PR here.
- [ ] The code passes all CI tests without any errors or warnings.
- [ ] You have added tests (when appropriate).
- [ ] You have added an entry in any relevant CHANGELOGs (when
appropriate).
- [ ] If you have made any changes to the `scripts/` or `docker/`
directories, please ensure any image versions have been incremented
accordingly!
- [ ] You have updated the README or other documentation to account for
these changes (when appropriate).
---
 .github/instructions/wdl.instructions.md | 13 +++++++++++++
 1 file changed, 13 insertions(+)
 create mode 100644 .github/instructions/wdl.instructions.md

diff --git a/.github/instructions/wdl.instructions.md b/.github/instructions/wdl.instructions.md
new file mode 100644
index 000000000..0f3fbef0a
--- /dev/null
+++ b/.github/instructions/wdl.instructions.md
@@ -0,0 +1,13 @@
+---
+applyTo: "**/*.wdl"
+---
+# This file contains instructions for the WDL code style in the workflows directory.
+
+# WDL Development Instructions
+
+- Use the latest version of the WDL spec from https://github.com/openwdl/wdl/
+- Use the CONTRIBUTING.md guide in this repository for general coding style and best practices.
+- Use the best-practices.md guide in this repository for WDL-specific best practices.
+- Prefer the strings in template/common-parameter-meta.txt when writing parameter meta sections.
+- Use the templates in template/task-examples.wdl when writing new tasks.
+- All WDL code should be checked with Sprocket lint and formatted with Sprocket format before committing.

From 4a7fa73dddfe143e2e874853ad37c384866d4741 Mon Sep 17 00:00:00 2001
From: Kevin Benton <1820709+kevin-benton@users.noreply.github.com>
Date: Mon, 9 Feb 2026 12:47:29 -0600
Subject: [PATCH 35/47] chore(Snyk): Security upgrade ubuntu from
 noble-20250925 to noble-20260113 (#288)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

![snyk-top-banner](https://res.cloudinary.com/snyk/image/upload/r-d/scm-platform/snyk-pull-requests/pr-banner-default.svg)

### Snyk has created this PR to fix 2 vulnerabilities in the dockerfile
dependencies of this project.

Keeping your Docker base image up-to-date means you’ll benefit from
security fixes in the latest version of your chosen image.

#### Snyk changed the following file(s):

- `docker/util/Dockerfile`

We recommend upgrading to `ubuntu:24.04`, as this image has only **8**
known vulnerabilities. To do this, merge this pull request, then verify
your application still works as expected.


#### Vulnerabilities that will be fixed with an upgrade:

|  | Issue | Score |

:-------------------------:|:-------------------------|:-------------------------
![high
severity](https://res.cloudinary.com/snyk/image/upload/w_20,h_20/v1561977819/icon/h.png
'high severity') | Out-of-bounds Write
<br/>[SNYK-UBUNTU2404-GNUPG2-14849555](https://snyk.io/vuln/SNYK-UBUNTU2404-GNUPG2-14849555)
| &nbsp;&nbsp;**281**&nbsp;&nbsp;
![medium
severity](https://res.cloudinary.com/snyk/image/upload/w_20,h_20/v1561977819/icon/m.png
'medium severity') | Directory Traversal
<br/>[SNYK-UBUNTU2404-PAM-11936905](https://snyk.io/vuln/SNYK-UBUNTU2404-PAM-11936905)
| &nbsp;&nbsp;**231**&nbsp;&nbsp;
![medium
severity](https://res.cloudinary.com/snyk/image/upload/w_20,h_20/v1561977819/icon/m.png
'medium severity') | Directory Traversal
<br/>[SNYK-UBUNTU2404-PAM-11936905](https://snyk.io/vuln/SNYK-UBUNTU2404-PAM-11936905)
| &nbsp;&nbsp;**231**&nbsp;&nbsp;
![medium
severity](https://res.cloudinary.com/snyk/image/upload/w_20,h_20/v1561977819/icon/m.png
'medium severity') | Directory Traversal
<br/>[SNYK-UBUNTU2404-PAM-11936905](https://snyk.io/vuln/SNYK-UBUNTU2404-PAM-11936905)
| &nbsp;&nbsp;**231**&nbsp;&nbsp;
![medium
severity](https://res.cloudinary.com/snyk/image/upload/w_20,h_20/v1561977819/icon/m.png
'medium severity') | Directory Traversal
<br/>[SNYK-UBUNTU2404-PAM-11936905](https://snyk.io/vuln/SNYK-UBUNTU2404-PAM-11936905)
| &nbsp;&nbsp;**231**&nbsp;&nbsp;


---

> [!IMPORTANT]
>
> - Check the changes in this PR to ensure they won't cause issues with
your project.
> - Max score is 1000. Note that the real score may have changed since
the PR was raised.
> - This PR was automatically created by Snyk using the credentials of a
real user.

---

**Note:** _You are seeing this because you or someone else with access
to this repository has authorized Snyk to open fix PRs._

For more information: <img
src="https://api.segment.io/v1/pixel/track?data=eyJ3cml0ZUtleSI6InJyWmxZcEdHY2RyTHZsb0lYd0dUcVg4WkFRTnNCOUEwIiwiYW5vbnltb3VzSWQiOiI1YWE0Mzc4Yi1kNzA2LTRmYWEtYTAzNC04NThhYTYwNDhhOWIiLCJldmVudCI6IlBSIHZpZXdlZCIsInByb3BlcnRpZXMiOnsicHJJZCI6IjVhYTQzNzhiLWQ3MDYtNGZhYS1hMDM0LTg1OGFhNjA0OGE5YiJ9fQ=="
width="0" height="0"/>
🧐 [View latest project
report](https://app.snyk.io/org/compbio/project/c80293d7-c0f4-4d94-b297-efe1fc5caf56?utm_source&#x3D;github&amp;utm_medium&#x3D;referral&amp;page&#x3D;fix-pr)
📜 [Customise PR
templates](https://docs.snyk.io/scan-using-snyk/pull-requests/snyk-fix-pull-or-merge-requests/customize-pr-templates?utm_source=github&utm_content=fix-pr-template)
🛠 [Adjust project
settings](https://app.snyk.io/org/compbio/project/c80293d7-c0f4-4d94-b297-efe1fc5caf56?utm_source&#x3D;github&amp;utm_medium&#x3D;referral&amp;page&#x3D;fix-pr/settings)
📚 [Read about Snyk's upgrade
logic](https://docs.snyk.io/scan-with-snyk/snyk-open-source/manage-vulnerabilities/upgrade-package-versions-to-fix-vulnerabilities?utm_source=github&utm_content=fix-pr-template)

---

**Learn how to fix vulnerabilities with free interactive lessons:**

🦉 [Out-of-bounds
Write](https://learn.snyk.io/lesson/out-of-bounds-write/?loc&#x3D;fix-pr)
🦉 [Directory
Traversal](https://learn.snyk.io/lesson/directory-traversal/?loc&#x3D;fix-pr)

[//]: #
'snyk:metadata:{"breakingChangeRiskLevel":null,"FF_showPullRequestBreakingChanges":false,"FF_showPullRequestBreakingChangesWebSearch":false,"customTemplate":{"variablesUsed":[],"fieldsUsed":[]},"dependencies":[{"name":"ubuntu","from":"noble-20250925","to":"24.04"}],"env":"prod","issuesToFix":["SNYK-UBUNTU2404-GNUPG2-14849555","SNYK-UBUNTU2404-PAM-11936905","SNYK-UBUNTU2404-PAM-11936905","SNYK-UBUNTU2404-PAM-11936905","SNYK-UBUNTU2404-PAM-11936905"],"prId":"5aa4378b-d706-4faa-a034-858aa6048a9b","prPublicId":"5aa4378b-d706-4faa-a034-858aa6048a9b","packageManager":"dockerfile","priorityScoreList":[281,231],"projectPublicId":"c80293d7-c0f4-4d94-b297-efe1fc5caf56","projectUrl":"https://app.snyk.io/org/compbio/project/c80293d7-c0f4-4d94-b297-efe1fc5caf56?utm_source=github&utm_medium=referral&page=fix-pr","prType":"fix","templateFieldSources":{"branchName":"default","commitMessage":"default","description":"default","title":"default"},"templateVariants":["updated-fix-title","priorityScore"],"type":"auto","upgrade":["SNYK-UBUNTU2404-GNUPG2-14849555","SNYK-UBUNTU2404-PAM-11936905","SNYK-UBUNTU2404-PAM-11936905","SNYK-UBUNTU2404-PAM-11936905","SNYK-UBUNTU2404-PAM-11936905"],"vulns":["SNYK-UBUNTU2404-GNUPG2-14849555","SNYK-UBUNTU2404-PAM-11936905"],"patch":[],"isBreakingChange":false,"remediationStrategy":"vuln"}'

---------

Co-authored-by: snyk-bot <snyk-bot@snyk.io>
Co-authored-by: Copilot <198982749+Copilot@users.noreply.github.com>
Co-authored-by: adthrasher <1165729+adthrasher@users.noreply.github.com>
Co-authored-by: Andrew Thrasher <adthrasher@gmail.com>
---
 data_structures/flag_filter.wdl              |  2 +-
 data_structures/read_group.wdl               |  6 +++---
 docker/python-plotting/package.json          |  2 +-
 docker/umap/Dockerfile                       | 12 ++++++------
 docker/umap/package.json                     |  2 +-
 docker/util/Dockerfile                       |  2 +-
 docker/util/package.json                     |  2 +-
 tools/htseq.wdl                              |  2 +-
 tools/md5sum.wdl                             |  2 +-
 tools/util.wdl                               | 14 +++++++-------
 workflows/dnaseq/dnaseq-standard.wdl         |  2 +-
 workflows/methylation/methylation-cohort.wdl |  4 ++--
 workflows/qc/quality-check-standard.wdl      |  2 +-
 workflows/rnaseq/rnaseq-standard.wdl         |  2 +-
 14 files changed, 28 insertions(+), 28 deletions(-)

diff --git a/data_structures/flag_filter.wdl b/data_structures/flag_filter.wdl
index 263b7f456..e57326359 100644
--- a/data_structures/flag_filter.wdl
+++ b/data_structures/flag_filter.wdl
@@ -106,7 +106,7 @@ task validate_string_is_12bit_int {
     >>>
 
     runtime {
-        container: "ghcr.io/stjudecloud/util:3.0.1"
+        container: "ghcr.io/stjudecloud/util:3.0.2"
         maxRetries: 1
     }
 }
diff --git a/data_structures/read_group.wdl b/data_structures/read_group.wdl
index df81982d6..b06ba639b 100644
--- a/data_structures/read_group.wdl
+++ b/data_structures/read_group.wdl
@@ -138,7 +138,7 @@ task get_read_groups {
 
     runtime {
         disks: "~{disk_size_gb} GB"
-        container: "ghcr.io/stjudecloud/util:3.0.1"
+        container: "ghcr.io/stjudecloud/util:3.0.2"
         maxRetries: 1
     }
 }
@@ -363,7 +363,7 @@ task validate_read_group {
     >>>
 
     runtime {
-        container: "ghcr.io/stjudecloud/util:3.0.1"
+        container: "ghcr.io/stjudecloud/util:3.0.2"
         maxRetries: 1
     }
 }
@@ -421,7 +421,7 @@ task inner_read_group_to_string {
     }
 
     runtime {
-        container: "ghcr.io/stjudecloud/util:3.0.1"
+        container: "ghcr.io/stjudecloud/util:3.0.2"
         maxRetries: 1
     }
 }
diff --git a/docker/python-plotting/package.json b/docker/python-plotting/package.json
index 9ca1275fa..1fc0df903 100644
--- a/docker/python-plotting/package.json
+++ b/docker/python-plotting/package.json
@@ -1,4 +1,4 @@
 {
     "name": "python-plotting",
-    "version": "2.0.7"
+    "version": "2.0.8"
 }
\ No newline at end of file
diff --git a/docker/umap/Dockerfile b/docker/umap/Dockerfile
index 9afd846b3..452d55829 100644
--- a/docker/umap/Dockerfile
+++ b/docker/umap/Dockerfile
@@ -1,10 +1,10 @@
-FROM python:3.13.1-alpine
+FROM python:3.14.2-alpine
 
-# Downstream requires llvm15, but that is out-of-date
-# llvm15 installs with the -15 suffix, so we need to symlink
-RUN apk add --no-cache build-base llvm15-dev bash \
-    && ln -s /usr/bin/llvm-config-15 /usr/bin/llvm-config
+# Downstream requires llvm20, but that is out-of-date
+# llvm20 installs with the -20 suffix, so we need to symlink
+RUN apk add --no-cache build-base llvm20-dev bash cmake llvm20 llvm20-libs llvm20-static llvm20-gtest \
+    && ln -s /usr/bin/llvm20-config /usr/bin/llvm-config
 
-RUN pip install umap-learn==0.5.7 pandas
+RUN pip install umap-learn==0.5.11 pandas
 
 COPY --from=scripts --chmod=777 methylation/generate_umap.py /scripts/methylation/generate_umap.py
\ No newline at end of file
diff --git a/docker/umap/package.json b/docker/umap/package.json
index fc6ab1a0d..71cfaf371 100644
--- a/docker/umap/package.json
+++ b/docker/umap/package.json
@@ -1,5 +1,5 @@
 {
     "name": "umap",
     "version": "0.5.7",
-    "revision": "10"
+    "revision": "11"
 }
\ No newline at end of file
diff --git a/docker/util/Dockerfile b/docker/util/Dockerfile
index ef7287a87..ed5fbbe42 100644
--- a/docker/util/Dockerfile
+++ b/docker/util/Dockerfile
@@ -1,4 +1,4 @@
-FROM ubuntu:noble-20250925
+FROM ubuntu:noble-20260113
 
 RUN apt-get update \
     && apt-get upgrade -y \
diff --git a/docker/util/package.json b/docker/util/package.json
index 252cb57ef..a30115713 100644
--- a/docker/util/package.json
+++ b/docker/util/package.json
@@ -1,4 +1,4 @@
 {
     "name": "util",
-    "version": "3.0.1"
+    "version": "3.0.2"
 }
\ No newline at end of file
diff --git a/tools/htseq.wdl b/tools/htseq.wdl
index cb8b16014..18ff0e0da 100755
--- a/tools/htseq.wdl
+++ b/tools/htseq.wdl
@@ -195,7 +195,7 @@ task calc_tpm {
     runtime {
         memory: "4 GB"
         disks: "10 GB"
-        container: "ghcr.io/stjudecloud/util:3.0.1"
+        container: "ghcr.io/stjudecloud/util:3.0.2"
         maxRetries: 1
     }
 }
diff --git a/tools/md5sum.wdl b/tools/md5sum.wdl
index 1e79260a0..c450d9282 100755
--- a/tools/md5sum.wdl
+++ b/tools/md5sum.wdl
@@ -34,7 +34,7 @@ task compute_checksum {
 
     runtime {
         disks: "~{disk_size_gb} GB"
-        container: "ghcr.io/stjudecloud/util:3.0.1"
+        container: "ghcr.io/stjudecloud/util:3.0.2"
         maxRetries: 1
     }
 }
diff --git a/tools/util.wdl b/tools/util.wdl
index be141df2c..fd0f35263 100644
--- a/tools/util.wdl
+++ b/tools/util.wdl
@@ -40,7 +40,7 @@ task download {
 
     runtime {
         disks: "~{disk_size_gb} GB"
-        container: "ghcr.io/stjudecloud/util:3.0.1"
+        container: "ghcr.io/stjudecloud/util:3.0.2"
         maxRetries: 1
     }
 }
@@ -79,7 +79,7 @@ task split_string {
     }
 
     runtime {
-        container: "ghcr.io/stjudecloud/util:3.0.1"
+        container: "ghcr.io/stjudecloud/util:3.0.2"
         maxRetries: 1
     }
 }
@@ -127,7 +127,7 @@ task calc_feature_lengths {
     runtime {
         memory: "16 GB"
         disks: "~{disk_size_gb} GB"
-        container: "ghcr.io/stjudecloud/util:3.0.1"
+        container: "ghcr.io/stjudecloud/util:3.0.2"
         maxRetries: 1
     }
 }
@@ -246,7 +246,7 @@ task unpack_tarball {
 
     runtime {
         disks: "~{disk_size_gb} GB"
-        container: "ghcr.io/stjudecloud/util:3.0.1"
+        container: "ghcr.io/stjudecloud/util:3.0.2"
         maxRetries: 1
     }
 }
@@ -353,7 +353,7 @@ task global_phred_scores {
     runtime {
         memory: "4 GB"
         disks: "~{disk_size_gb} GB"
-        container: "ghcr.io/stjudecloud/util:3.0.1"
+        container: "ghcr.io/stjudecloud/util:3.0.2"
         maxRetries: 1
     }
 }
@@ -402,7 +402,7 @@ task check_fastq_and_rg_concordance {
     >>>
 
     runtime {
-        container: "ghcr.io/stjudecloud/util:3.0.1"
+        container: "ghcr.io/stjudecloud/util:3.0.2"
         maxRetries: 1
     }
 }
@@ -462,7 +462,7 @@ task split_fastq {
         cpu: ncpu
         memory: "4 GB"
         disks: "~{disk_size_gb} GB"
-        container: "ghcr.io/stjudecloud/util:3.0.1"
+        container: "ghcr.io/stjudecloud/util:3.0.2"
         maxRetries: 1
     }
 }
diff --git a/workflows/dnaseq/dnaseq-standard.wdl b/workflows/dnaseq/dnaseq-standard.wdl
index 194c18cea..b4608c891 100644
--- a/workflows/dnaseq/dnaseq-standard.wdl
+++ b/workflows/dnaseq/dnaseq-standard.wdl
@@ -163,7 +163,7 @@ task parse_input {
     >>>
 
     runtime {
-        container: "ghcr.io/stjudecloud/util:3.0.1"
+        container: "ghcr.io/stjudecloud/util:3.0.2"
         maxRetries: 1
     }
 }
diff --git a/workflows/methylation/methylation-cohort.wdl b/workflows/methylation/methylation-cohort.wdl
index 03a06cf6b..cb5f39c8a 100644
--- a/workflows/methylation/methylation-cohort.wdl
+++ b/workflows/methylation/methylation-cohort.wdl
@@ -280,7 +280,7 @@ task generate_umap {
     }
 
     runtime {
-        container: "ghcr.io/stjudecloud/umap:0.5.7-10"
+        container: "ghcr.io/stjudecloud/umap:0.5.7-11"
         memory: "8 GB"
         cpu: 1
         disks: "~{disk_size_gb} GB"
@@ -320,7 +320,7 @@ task plot_umap {
         cpu: 1
         memory: "4 GB"
         disks: "4 GB"
-        container: "ghcr.io/stjudecloud/python-plotting:2.0.7"
+        container: "ghcr.io/stjudecloud/python-plotting:2.0.8"
         maxRetries: 1
     }
 }
diff --git a/workflows/qc/quality-check-standard.wdl b/workflows/qc/quality-check-standard.wdl
index 3762e2883..062216b2d 100644
--- a/workflows/qc/quality-check-standard.wdl
+++ b/workflows/qc/quality-check-standard.wdl
@@ -600,7 +600,7 @@ task parse_input {
     }
 
     runtime {
-        container: "ghcr.io/stjudecloud/util:3.0.1"
+        container: "ghcr.io/stjudecloud/util:3.0.2"
         maxRetries: 1
     }
 }
diff --git a/workflows/rnaseq/rnaseq-standard.wdl b/workflows/rnaseq/rnaseq-standard.wdl
index 48f70ab95..7db5e205e 100755
--- a/workflows/rnaseq/rnaseq-standard.wdl
+++ b/workflows/rnaseq/rnaseq-standard.wdl
@@ -184,7 +184,7 @@ task parse_input {
     >>>
 
     runtime {
-        container: "ghcr.io/stjudecloud/util:3.0.1"
+        container: "ghcr.io/stjudecloud/util:3.0.2"
         maxRetries: 1
     }
 }

From 90573e087c5568d3dd3313f7cf60b52c1df2e4b9 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 9 Feb 2026 15:22:16 -0500
Subject: [PATCH 36/47] chore(deps): bump urllib3 from 2.5.0 to 2.6.3 (#291)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bumps [urllib3](https://github.com/urllib3/urllib3) from 2.5.0 to 2.6.3.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a
href="https://github.com/urllib3/urllib3/releases">urllib3's
releases</a>.</em></p>
<blockquote>
<h2>2.6.3</h2>
<h2>🚀 urllib3 is fundraising for HTTP/2 support</h2>
<p><a
href="https://sethmlarson.dev/urllib3-is-fundraising-for-http2-support">urllib3
is raising ~$40,000 USD</a> to release HTTP/2 support and ensure
long-term sustainable maintenance of the project after a sharp decline
in financial support. If your company or organization uses Python and
would benefit from HTTP/2 support in Requests, pip, cloud SDKs, and
thousands of other projects <a
href="https://opencollective.com/urllib3">please consider contributing
financially</a> to ensure HTTP/2 support is developed sustainably and
maintained for the long-haul.</p>
<p>Thank you for your support.</p>
<h2>Changes</h2>
<ul>
<li>Fixed a security issue where decompression-bomb safeguards of the
streaming API were bypassed when HTTP redirects were followed.
(CVE-2026-21441 reported by <a
href="https://github.com/D47A"><code>@​D47A</code></a>, 8.9 High,
GHSA-38jv-5279-wg99)</li>
<li>Started treating <code>Retry-After</code> times greater than 6 hours
as 6 hours by default. (<a
href="https://redirect.github.com/urllib3/urllib3/issues/3743">urllib3/urllib3#3743</a>)</li>
<li>Fixed <code>urllib3.connection.VerifiedHTTPSConnection</code> on
Emscripten. (<a
href="https://redirect.github.com/urllib3/urllib3/issues/3752">urllib3/urllib3#3752</a>)</li>
</ul>
<h2>2.6.2</h2>
<h2>🚀 urllib3 is fundraising for HTTP/2 support</h2>
<p><a
href="https://sethmlarson.dev/urllib3-is-fundraising-for-http2-support">urllib3
is raising ~$40,000 USD</a> to release HTTP/2 support and ensure
long-term sustainable maintenance of the project after a sharp decline
in financial support. If your company or organization uses Python and
would benefit from HTTP/2 support in Requests, pip, cloud SDKs, and
thousands of other projects <a
href="https://opencollective.com/urllib3">please consider contributing
financially</a> to ensure HTTP/2 support is developed sustainably and
maintained for the long-haul.</p>
<p>Thank you for your support.</p>
<h2>Changes</h2>
<ul>
<li>Fixed <code>HTTPResponse.read_chunked()</code> to properly handle
leftover data in the decoder's buffer when reading compressed chunked
responses. (<a
href="https://redirect.github.com/urllib3/urllib3/issues/3734">urllib3/urllib3#3734</a>)</li>
</ul>
<h2>2.6.1</h2>
<h2>🚀 urllib3 is fundraising for HTTP/2 support</h2>
<p><a
href="https://sethmlarson.dev/urllib3-is-fundraising-for-http2-support">urllib3
is raising ~$40,000 USD</a> to release HTTP/2 support and ensure
long-term sustainable maintenance of the project after a sharp decline
in financial support. If your company or organization uses Python and
would benefit from HTTP/2 support in Requests, pip, cloud SDKs, and
thousands of other projects <a
href="https://opencollective.com/urllib3">please consider contributing
financially</a> to ensure HTTP/2 support is developed sustainably and
maintained for the long-haul.</p>
<p>Thank you for your support.</p>
<h2>Changes</h2>
<ul>
<li>Restore previously removed <code>HTTPResponse.getheaders()</code>
and <code>HTTPResponse.getheader()</code> methods. (<a
href="https://redirect.github.com/urllib3/urllib3/issues/3731">#3731</a>)</li>
</ul>
<h2>2.6.0</h2>
<h2>🚀 urllib3 is fundraising for HTTP/2 support</h2>
<p><a
href="https://sethmlarson.dev/urllib3-is-fundraising-for-http2-support">urllib3
is raising ~$40,000 USD</a> to release HTTP/2 support and ensure
long-term sustainable maintenance of the project after a sharp decline
in financial support. If your company or organization uses Python and
would benefit from HTTP/2 support in Requests, pip, cloud SDKs, and
thousands of other projects <a
href="https://opencollective.com/urllib3">please consider contributing
financially</a> to ensure HTTP/2 support is developed sustainably and
maintained for the long-haul.</p>
<p>Thank you for your support.</p>
<h2>Security</h2>
<ul>
<li>Fixed a security issue where streaming API could improperly handle
highly compressed HTTP content (&quot;decompression bombs&quot;) leading
to excessive resource consumption even when a small amount of data was
requested. Reading small chunks of compressed data is safer and much
more efficient now. (CVE-2025-66471 reported by <a
href="https://github.com/Cycloctane"><code>@​Cycloctane</code></a>, 8.9
High, GHSA-2xpw-w6gg-jr37)</li>
<li>Fixed a security issue where an attacker could compose an HTTP
response with virtually unlimited links in the
<code>Content-Encoding</code> header, potentially leading to a denial of
service (DoS) attack by exhausting system resources during decoding. The
number of allowed chained encodings is now limited to 5. (CVE-2025-66418
reported by <a
href="https://github.com/illia-v"><code>@​illia-v</code></a>, 8.9 High,
GHSA-gm62-xv2j-4w53)</li>
</ul>
<blockquote>
<p>[!IMPORTANT]</p>
<ul>
<li>If urllib3 is not installed with the optional
<code>urllib3[brotli]</code> extra, but your environment contains a
Brotli/brotlicffi/brotlipy package anyway, make sure to upgrade it to at
least Brotli 1.2.0 or brotlicffi 1.2.0.0 to benefit from the security
fixes and avoid warnings. Prefer using <code>urllib3[brotli]</code> to
install a compatible Brotli package automatically.</li>
</ul>
</blockquote>
<!-- raw HTML omitted -->
</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a
href="https://github.com/urllib3/urllib3/blob/main/CHANGES.rst">urllib3's
changelog</a>.</em></p>
<blockquote>
<h1>2.6.3 (2026-01-07)</h1>
<ul>
<li>Fixed a high-severity security issue where decompression-bomb
safeguards of
the streaming API were bypassed when HTTP redirects were followed.
(<code>GHSA-38jv-5279-wg99
&lt;https://github.com/urllib3/urllib3/security/advisories/GHSA-38jv-5279-wg99&gt;</code>__)</li>
<li>Started treating <code>Retry-After</code> times greater than 6 hours
as 6 hours by
default. (<code>[#3743](https://github.com/urllib3/urllib3/issues/3743)
&lt;https://github.com/urllib3/urllib3/issues/3743&gt;</code>__)</li>
<li>Fixed <code>urllib3.connection.VerifiedHTTPSConnection</code> on
Emscripten.
(<code>[#3752](https://github.com/urllib3/urllib3/issues/3752)
&lt;https://github.com/urllib3/urllib3/issues/3752&gt;</code>__)</li>
</ul>
<h1>2.6.2 (2025-12-11)</h1>
<ul>
<li>Fixed <code>HTTPResponse.read_chunked()</code> to properly handle
leftover data in
the decoder's buffer when reading compressed chunked responses.
(<code>[#3734](https://github.com/urllib3/urllib3/issues/3734)
&lt;https://github.com/urllib3/urllib3/issues/3734&gt;</code>__)</li>
</ul>
<h1>2.6.1 (2025-12-08)</h1>
<ul>
<li>Restore previously removed <code>HTTPResponse.getheaders()</code>
and
<code>HTTPResponse.getheader()</code> methods.
(<code>[#3731](https://github.com/urllib3/urllib3/issues/3731)
&lt;https://github.com/urllib3/urllib3/issues/3731&gt;</code>__)</li>
</ul>
<h1>2.6.0 (2025-12-05)</h1>
<h2>Security</h2>
<ul>
<li>Fixed a security issue where streaming API could improperly handle
highly
compressed HTTP content (&quot;decompression bombs&quot;) leading to
excessive resource
consumption even when a small amount of data was requested. Reading
small
chunks of compressed data is safer and much more efficient now.
(<code>GHSA-2xpw-w6gg-jr37
&lt;https://github.com/urllib3/urllib3/security/advisories/GHSA-2xpw-w6gg-jr37&gt;</code>__)</li>
<li>Fixed a security issue where an attacker could compose an HTTP
response with
virtually unlimited links in the <code>Content-Encoding</code> header,
potentially
leading to a denial of service (DoS) attack by exhausting system
resources
during decoding. The number of allowed chained encodings is now limited
to 5.
(<code>GHSA-gm62-xv2j-4w53
&lt;https://github.com/urllib3/urllib3/security/advisories/GHSA-gm62-xv2j-4w53&gt;</code>__)</li>
</ul>
<p>.. caution::</p>
<ul>
<li>If urllib3 is not installed with the optional
<code>urllib3[brotli]</code> extra, but
your environment contains a Brotli/brotlicffi/brotlipy package anyway,
make
sure to upgrade it to at least Brotli 1.2.0 or brotlicffi 1.2.0.0 to
benefit from the security fixes and avoid warnings. Prefer using</li>
</ul>
<!-- raw HTML omitted -->
</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a
href="https://github.com/urllib3/urllib3/commit/0248277dd7ac0239204889ca991353ad3e3a1ddc"><code>0248277</code></a>
Release 2.6.3</li>
<li><a
href="https://github.com/urllib3/urllib3/commit/8864ac407bba8607950025e0979c4c69bc7abc7b"><code>8864ac4</code></a>
Merge commit from fork</li>
<li><a
href="https://github.com/urllib3/urllib3/commit/70cecb27ca99d56aaaeb63ac27ee270ef2b24c5c"><code>70cecb2</code></a>
Fix Scorecard issues related to vulnerable dev dependencies (<a
href="https://redirect.github.com/urllib3/urllib3/issues/3755">#3755</a>)</li>
<li><a
href="https://github.com/urllib3/urllib3/commit/41f249abe1ef3e20768588969c4035aba060a359"><code>41f249a</code></a>
Move &quot;v2.0 Migration Guide&quot; to the end of the table of
contents (<a
href="https://redirect.github.com/urllib3/urllib3/issues/3747">#3747</a>)</li>
<li><a
href="https://github.com/urllib3/urllib3/commit/fd4dffd2fc544166b76151a2fa3d7b7c0eab540c"><code>fd4dffd</code></a>
Patch <code>VerifiedHTTPSConnection</code> for Emscripten (<a
href="https://redirect.github.com/urllib3/urllib3/issues/3752">#3752</a>)</li>
<li><a
href="https://github.com/urllib3/urllib3/commit/13f0bfd55e4468fe1ea9c6f809d3a87b0f93ebab"><code>13f0bfd</code></a>
Handle massive values in Retry-After when calculating time to sleep for
(<a
href="https://redirect.github.com/urllib3/urllib3/issues/3743">#3743</a>)</li>
<li><a
href="https://github.com/urllib3/urllib3/commit/8c480bf87bcefd321b3a1ae47f04e908b6b2ed7b"><code>8c480bf</code></a>
Bump actions/upload-artifact from 5.0.0 to 6.0.0 (<a
href="https://redirect.github.com/urllib3/urllib3/issues/3748">#3748</a>)</li>
<li><a
href="https://github.com/urllib3/urllib3/commit/4b40616e959c0a2c466e8075f2a785a9f99bb0c1"><code>4b40616</code></a>
Bump actions/cache from 4.3.0 to 5.0.1 (<a
href="https://redirect.github.com/urllib3/urllib3/issues/3750">#3750</a>)</li>
<li><a
href="https://github.com/urllib3/urllib3/commit/82b8479663d037d220c883f1584dd01a43bb273b"><code>82b8479</code></a>
Bump actions/download-artifact from 6.0.0 to 7.0.0 (<a
href="https://redirect.github.com/urllib3/urllib3/issues/3749">#3749</a>)</li>
<li><a
href="https://github.com/urllib3/urllib3/commit/34284cb01700bb7d4fdd472f909e22393e9174e2"><code>34284cb</code></a>
Mention experimental features in the security policy (<a
href="https://redirect.github.com/urllib3/urllib3/issues/3746">#3746</a>)</li>
<li>Additional commits viewable in <a
href="https://github.com/urllib3/urllib3/compare/2.5.0...2.6.3">compare
view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility
score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=urllib3&package-manager=uv&previous-version=2.5.0&new-version=2.6.3)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot merge` will merge this PR after your CI passes on it
- `@dependabot squash and merge` will squash and merge this PR after
your CI passes on it
- `@dependabot cancel merge` will cancel a previously requested merge
and block automerging
- `@dependabot reopen` will reopen this PR if it is closed
- `@dependabot close` will close this PR and stop Dependabot recreating
it. You can achieve the same result by closing it manually
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)
You can disable automated security fix PRs for this repo from the
[Security Alerts
page](https://github.com/stjudecloud/workflows/network/alerts).

</details>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 uv.lock | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/uv.lock b/uv.lock
index 5d61e8384..ba7164f2e 100644
--- a/uv.lock
+++ b/uv.lock
@@ -543,11 +543,11 @@ wheels = [
 
 [[package]]
 name = "urllib3"
-version = "2.5.0"
+version = "2.6.3"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/15/22/9ee70a2574a4f4599c47dd506532914ce044817c7752a79b6a51286319bc/urllib3-2.5.0.tar.gz", hash = "sha256:3fc47733c7e419d4bc3f6b3dc2b4f890bb743906a30d56ba4a5bfa4bbff92760", size = 393185, upload-time = "2025-06-18T14:07:41.644Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/c7/24/5f1b3bdffd70275f6661c76461e25f024d5a38a46f04aaca912426a2b1d3/urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed", size = 435556, upload-time = "2026-01-07T16:24:43.925Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/a7/c2/fe1e52489ae3122415c51f387e221dd0773709bad6c6cdaa599e8a2c5185/urllib3-2.5.0-py3-none-any.whl", hash = "sha256:e6b01673c0fa6a13e374b50871808eb3bf7046c4b125b216f6bf1cc604cff0dc", size = 129795, upload-time = "2025-06-18T14:07:40.39Z" },
+    { url = "https://files.pythonhosted.org/packages/39/08/aaaad47bc4e9dc8c725e68f9d04865dbcb2052843ff09c97b08904852d84/urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4", size = 131584, upload-time = "2026-01-07T16:24:42.685Z" },
 ]
 
 [[package]]

From 79aa181a13360999aead4a82529ff709feac512e Mon Sep 17 00:00:00 2001
From: Ari Frantz <ari.frantz@stjude.org>
Date: Mon, 16 Feb 2026 09:27:04 -0500
Subject: [PATCH 37/47] feat: gzip picard validate STDOUT (#298)

_Describe the problem or feature in addition to a link to the issues._

the non summary mode of validate_bam can output multiple lines _per
read_ in the input BAM, using more disk than the input file. This
passing that output through gzip.

This also removes the x2 factor on disk size allocation for this task.
We may instead want to keep that x2 factor but only if we're in full
report mode?

Before submitting this PR, please make sure:

- [ ] You have added a few sentences describing the PR here.
- [ ] The code passes all CI tests without any errors or warnings.
- [ ] You have added tests (when appropriate).
- [ ] You have added an entry in any relevant CHANGELOGs (when
appropriate).
- [ ] If you have made any changes to the `scripts/` or `docker/`
directories, please ensure any image versions have been incremented
accordingly!
- [ ] You have updated the README or other documentation to account for
these changes (when appropriate).

---------

Co-authored-by: Andrew Thrasher <adthrasher@gmail.com>
---
 .gitignore         |  3 ++-
 tools/CHANGELOG.md |  7 ++++++
 tools/picard.wdl   | 63 ++++++++++++++++++++++------------------------
 3 files changed, 39 insertions(+), 34 deletions(-)

diff --git a/.gitignore b/.gitignore
index 2bab65b34..ac07b2a2c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -15,6 +15,7 @@ pytest/
 **/womtool*
 *.jar
 runs/
+out/
 miniwdl_call_cache/
 miniwdl_singularity_cache/
 _LAST
@@ -30,7 +31,7 @@ _LAST
 results*/
 output*/
 
-# Ignore JSONs in the root (probably being used for input to sprocket run)
+# Ignore JSONs in the root (probably being used for input)
 /*.json
 
 # Ignore `sprocket doc` dir
diff --git a/tools/CHANGELOG.md b/tools/CHANGELOG.md
index c57ce6972..12563cd4a 100644
--- a/tools/CHANGELOG.md
+++ b/tools/CHANGELOG.md
@@ -4,6 +4,13 @@ All notable changes to this project will be documented in this file.
  
 The format is based on [Keep a Changelog](http://keepachangelog.com/).
 
+## 2026 February
+
+### Changed
+
+- gzip `picard.validate_bam` output when not in `summary_mode` [#298](https://github.com/stjudecloud/workflows/pull/298)
+- Lowered default disk allocation for `picard.validate_bam` [#298](https://github.com/stjudecloud/workflows/pull/298)
+
 ## 2026 January
 
 ### Changed
diff --git a/tools/picard.wdl b/tools/picard.wdl
index 3c8f7d4ce..22570d083 100755
--- a/tools/picard.wdl
+++ b/tools/picard.wdl
@@ -1,4 +1,5 @@
 ## [Homepage](https://broadinstitute.github.io/picard/)
+
 version 1.1
 
 task mark_duplicates {
@@ -83,10 +84,13 @@ task mark_duplicates {
 
     Float bam_size = size(bam, "GiB")
     Int memory_gb = min(ceil(bam_size + 12), 50) + modify_memory_gb
-    Int disk_size_gb = ((if create_bam
-        then ceil((bam_size * 2) + 10)
-        else ceil(bam_size + 10)
-    ) + modify_disk_size_gb)
+    Int disk_size_gb = (
+        (
+            if create_bam
+            then ceil((bam_size * 2) + 10)
+            else ceil(bam_size + 10)
+        ) + modify_disk_size_gb
+    )
 
     Int java_heap_size = ceil(memory_gb * 0.9)
 
@@ -96,17 +100,13 @@ task mark_duplicates {
         picard -Xmx~{java_heap_size}g MarkDuplicates \
             -I "~{bam}" \
             --METRICS_FILE "~{prefix}.metrics.txt" \
-            -O "~{if create_bam
-                then prefix + ".bam"
-                else "/dev/null"
-            }" \
+            -O "~{if create_bam then prefix + ".bam" else "/dev/null"}" \
             --CREATE_INDEX ~{create_bam} \
             --CREATE_MD5_FILE ~{create_bam} \
             --VALIDATION_STRINGENCY "~{validation_stringency}" \
             --DUPLICATE_SCORING_STRATEGY "~{duplicate_scoring_strategy}" \
-            --READ_NAME_REGEX '~{if (optical_distance > 0)
-                then read_name_regex
-                else "null"
+            --READ_NAME_REGEX '~{
+                if (optical_distance > 0) then read_name_regex else "null"
             }' \
             --TAGGING_POLICY "~{tagging_policy}" \
             --CLEAR_DT ~{clear_dt} \
@@ -194,15 +194,15 @@ task validate_bam {
         Int modify_disk_size_gb = 0
     }
 
-    String mode_arg = if (summary_mode)
-        then "--MODE SUMMARY"
-        else ""
-    String stringency_arg = (if (index_validation_stringency_less_exhaustive)
+    String outfile = if summary_mode then outfile_name else outfile_name + ".gz"
+    String mode_arg = if (summary_mode) then "--MODE SUMMARY" else ""
+    String stringency_arg = (
+        if (index_validation_stringency_less_exhaustive)
         then "--INDEX_VALIDATION_STRINGENCY LESS_EXHAUSTIVE"
         else ""
     )
     Float bam_size = size(bam, "GiB")
-    Int disk_size_gb = ceil(bam_size * 2) + 10 + modify_disk_size_gb
+    Int disk_size_gb = ceil(bam_size) + 10 + modify_disk_size_gb
     Int java_heap_size = ceil(memory_gb * 0.9)
 
     command <<<
@@ -218,7 +218,8 @@ task validate_bam {
             --VALIDATION_STRINGENCY "~{validation_stringency}" \
             ~{sep(" ", prefix("--IGNORE ", squote(ignore_list)))} \
             --MAX_OUTPUT ~{max_errors} \
-            > "~{outfile_name}" \
+            ~{if !summary_mode then "| gzip" else ""} \
+            > "~{outfile}" \
             || rc=$?
 
         # rc = 0 = success
@@ -236,16 +237,16 @@ task validate_bam {
         fi
 
         if ! ~{succeed_on_errors} \
-            && [ "$(grep -Ec "$GREP_PATTERN" "~{outfile_name}")" -gt 0 ]
+            && [ "$(grep -Ec "$GREP_PATTERN" "~{outfile}")" -gt 0 ]
         then
             >&2 echo "Problems detected by Picard ValidateSamFile"
-            >&2 grep -E "$GREP_PATTERN" "~{outfile_name}"
+            >&2 grep -E "$GREP_PATTERN" "~{outfile}"
             exit $rc
         fi
     >>>
 
     output {
-        File validate_report = outfile_name
+        File validate_report = outfile
     }
 
     runtime {
@@ -419,10 +420,8 @@ task merge_sam_files {
         File merged_bam_md5 = outfile_name + ".md5"
     }
 
-    runtime {
-        cpu: if threading
-            then 2
-            else 1
+    runtime{
+        cpu: if threading then 2 else 1
         memory: "~{memory_gb} GB"
         disks: "~{disk_size_gb} GB"
         container: "quay.io/biocontainers/picard:3.1.1--hdfd78af_0"
@@ -507,7 +506,7 @@ task collect_wgs_metrics {
             wgs_metrics: {
                 description: "Output report of `picard CollectWgsMetrics`",
                 external_help: "https://broadinstitute.github.io/picard/picard-metric-definitions.html#CollectWgsMetrics.WgsMetrics",
-            },
+            }
         }
     }
 
@@ -854,7 +853,8 @@ task bam_to_fastq {
 
         picard -Xmx~{java_heap_size}g SamToFastq INPUT="~{bam}" \
             FASTQ="~{prefix}.R1.fastq" \
-            ~{(if paired
+            ~{(
+                if paired
                 then "SECOND_END_FASTQ='" + prefix + ".R2.fastq'"
                 else ""
             )} \
@@ -862,10 +862,7 @@ task bam_to_fastq {
             VALIDATION_STRINGENCY=SILENT
 
         gzip "~{prefix}.R1.fastq" \
-            ~{if paired
-                then "'" + prefix + ".R2.fastq'"
-                else ""
-            }
+            ~{if paired then "'" + prefix + ".R2.fastq'" else ""}
     >>>
 
     output {
@@ -873,7 +870,7 @@ task bam_to_fastq {
         File? read_two_fastq_gz = "~{prefix}.R2.fastq.gz"
     }
 
-    runtime {
+    runtime{
         memory: "~{memory_gb} GB"
         disks: "~{disk_size_gb} GB"
         container: "quay.io/biocontainers/picard:3.1.1--hdfd78af_0"
@@ -937,7 +934,7 @@ task scatter_interval_list {
         }
     }
 
-    parameter_meta {
+    parameter_meta  {
         interval_list: "Input interval list to split"
         scatter_count: "Number of interval lists to create"
         subdivision_mode: {
@@ -1004,7 +1001,7 @@ task create_sequence_dictionary {
         description: "Creates a sequence dictionary for the input FASTA file using Picard"
         external_help: "https://gatk.broadinstitute.org/hc/en-us/articles/13832748622491-CreateSequenceDictionary-Picard-"
         outputs: {
-            dictionary: "Sequence dictionary produced by `picard CreateSequenceDictionary`.",
+            dictionary: "Sequence dictionary produced by `picard CreateSequenceDictionary`."
         }
     }
 

From 74e0dbf20f7ff4c80b89f3cd9ac9d16dcfa877d7 Mon Sep 17 00:00:00 2001
From: Andrew Thrasher <adthrasher@gmail.com>
Date: Mon, 16 Feb 2026 16:39:38 -0500
Subject: [PATCH 38/47] feat: methylation filtering (#283)

Add filtering of sex chromosomes to the UMAP generation. Also generate a
list of probes that have SNPs.

Before submitting this PR, please make sure:

- [x] You have added a few sentences describing the PR here.
- [x] The code passes all CI tests without any errors or warnings.
- [ ] You have added tests (when appropriate).
- [x] You have added an entry in any relevant CHANGELOGs (when
appropriate).
- [x] If you have made any changes to the `scripts/` or `docker/`
directories, please ensure any image versions have been incremented
accordingly!
- [ ] You have updated the README or other documentation to account for
these changes (when appropriate).

---------

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
Co-authored-by: Ari Frantz <ari.frantz@stjude.org>
Co-authored-by: Jobin Sunny <38107318+jsunny23@users.noreply.github.com>
---
 .github/workflows/docker-build.yaml           |  41 ------
 data_structures/flag_filter.wdl               |   2 +-
 data_structures/read_group.wdl                |   6 +-
 developer_scripts/run_sprocket_or_miniwdl.sh  |   3 +-
 docker/minfi/Dockerfile                       |   3 +-
 docker/minfi/package.json                     |   2 +-
 docker/pandas/package.json                    |   2 +-
 docker/python-plotting/package.json           |   2 +-
 docker/util/package.json                      |   2 +-
 scripts/CHANGELOG.md                          |  11 ++
 scripts/methylation/filter.py                 |  18 ++-
 scripts/methylation/list-sex-probes.R         |  15 ++
 scripts/methylation/methylation-preprocess.R  |  82 ++++++++---
 tools/htseq.wdl                               |   2 +-
 tools/md5sum.wdl                              |   2 +-
 tools/util.wdl                                |  14 +-
 workflows/dnaseq/dnaseq-standard.wdl          |   2 +-
 workflows/methylation/CHANGELOG.md            |  11 +-
 workflows/methylation/methylation-cohort.wdl  |  21 ++-
 .../methylation/methylation-preprocess.wdl    |  41 +++++-
 .../methylation/methylation-standard.wdl      | 136 ++++++++++++++++++
 workflows/qc/quality-check-standard.wdl       |   2 +-
 workflows/rnaseq/rnaseq-standard.wdl          |   2 +-
 23 files changed, 331 insertions(+), 91 deletions(-)
 create mode 100644 scripts/methylation/list-sex-probes.R

diff --git a/.github/workflows/docker-build.yaml b/.github/workflows/docker-build.yaml
index 641134af4..d88d9170b 100644
--- a/.github/workflows/docker-build.yaml
+++ b/.github/workflows/docker-build.yaml
@@ -70,47 +70,6 @@ jobs:
           tags: ${{ env.REGISTRY }}/${{ github.repository_owner }}/${{ env.TOOL }}:local
           cache-from: type=registry,ref=${{ env.REGISTRY }}/${{ github.repository_owner }}/${{ env.TOOL }}:buildcache
           cache-to: type=registry,ref=${{ env.REGISTRY }}/${{ github.repository_owner }}/${{ env.TOOL }}:buildcache,mode=max
-      - name: container diff
-        run: |
-          docker pull ${{ env.REGISTRY }}/${{ github.repository_owner }}/${{ env.TOOL }}:${{ env.TAG_TO_CHECK }} || true
-          if [ -z "$(docker images -q ${{ env.REGISTRY }}/${{ github.repository_owner }}/${{ env.TOOL }}:${{ env.TAG_TO_CHECK }})" ]; then
-            echo "Prior image not found in registry, skipping container-diff"
-            exit 0
-          fi
-          curl -LO https://storage.googleapis.com/container-diff/latest/container-diff-linux-amd64 && \
-          chmod +x container-diff-linux-amd64 && \
-          ./container-diff-linux-amd64 diff --json daemon://${{ env.REGISTRY }}/${{ github.repository_owner }}/${{ env.TOOL }}:${{ env.TAG_TO_CHECK }} daemon://${{ env.REGISTRY }}/${{ github.repository_owner }}/${{ env.TOOL }}:local --type=history --type=file --type=size | tee comparison.json
-          # check overall size result
-          changed=0
-          size_diff=$(jq '.[] | select(.DiffType == "Size") | if .Diff[0].Size1 == .Diff[0].Size2 then 1 else 0 end' comparison.json)
-          if [ $size_diff -eq 0 ]
-          then
-            echo "Size of the image has changed"
-            changed=1
-          fi
-          # check file diff result
-          additions=$(jq '.[] | select(.DiffType == "File") | if .Diff.Adds != null then .Diff.Adds[].Name else 0 end' comparison.json)
-          deletions=$(jq '.[] | select(.DiffType == "File") | if .Diff.Dels != null then .Diff.Dels[].Name else 0 end' comparison.json)
-          modifications=$(jq '.[] | select(.DiffType == "File") | if .Diff.Mods != null then .Diff.Mods[].Name else 0 end' comparison.json)
-          if [ "$additions" != "0" ]
-          then
-            echo "Files have been added"
-            echo $additions
-            changed=1
-          fi
-          if [ "$deletions" != "0" ]
-          then
-            echo "Files have been deleted"
-            echo $deletions
-            changed=1
-          fi
-          if [ "$modifications" != "0" ]
-          then
-            echo "Files have been modified"
-            echo $modifications
-            changed=1
-          fi
-          exit $changed
       - name: Run Snyk to check Docker image for vulnerabilities
         # Snyk can be used to break the build when it detects vulnerabilities.
         # In this case we want to upload the issues to GitHub Code Scanning
diff --git a/data_structures/flag_filter.wdl b/data_structures/flag_filter.wdl
index e57326359..7a27ea213 100644
--- a/data_structures/flag_filter.wdl
+++ b/data_structures/flag_filter.wdl
@@ -106,7 +106,7 @@ task validate_string_is_12bit_int {
     >>>
 
     runtime {
-        container: "ghcr.io/stjudecloud/util:3.0.2"
+        container: "ghcr.io/stjudecloud/util:3.0.3"
         maxRetries: 1
     }
 }
diff --git a/data_structures/read_group.wdl b/data_structures/read_group.wdl
index b06ba639b..460ddea33 100644
--- a/data_structures/read_group.wdl
+++ b/data_structures/read_group.wdl
@@ -138,7 +138,7 @@ task get_read_groups {
 
     runtime {
         disks: "~{disk_size_gb} GB"
-        container: "ghcr.io/stjudecloud/util:3.0.2"
+        container: "ghcr.io/stjudecloud/util:3.0.3"
         maxRetries: 1
     }
 }
@@ -363,7 +363,7 @@ task validate_read_group {
     >>>
 
     runtime {
-        container: "ghcr.io/stjudecloud/util:3.0.2"
+        container: "ghcr.io/stjudecloud/util:3.0.3"
         maxRetries: 1
     }
 }
@@ -421,7 +421,7 @@ task inner_read_group_to_string {
     }
 
     runtime {
-        container: "ghcr.io/stjudecloud/util:3.0.2"
+        container: "ghcr.io/stjudecloud/util:3.0.3"
         maxRetries: 1
     }
 }
diff --git a/developer_scripts/run_sprocket_or_miniwdl.sh b/developer_scripts/run_sprocket_or_miniwdl.sh
index 57869e6fa..4bccf4104 100755
--- a/developer_scripts/run_sprocket_or_miniwdl.sh
+++ b/developer_scripts/run_sprocket_or_miniwdl.sh
@@ -66,7 +66,8 @@ if [ "$runner" != "miniwdl" ]; then
    else
       entrypoint=$wf
    fi
-   sprocket run --output output --overwrite -e "$entrypoint" "$wdl" ${input_file:+"$input_file"} "$@"
+   sprocket run --output-dir output --target "$entrypoint" "$wdl" ${input_file:+"$input_file"} "$@"
+   cp output/runs/*/_latest/outputs.json output/outputs.json
 else
    if [[ $input_file ]]; then
       input_dir=$(dirname "$input_file")
diff --git a/docker/minfi/Dockerfile b/docker/minfi/Dockerfile
index 8c979be56..a338a8859 100644
--- a/docker/minfi/Dockerfile
+++ b/docker/minfi/Dockerfile
@@ -8,4 +8,5 @@ RUN R --no-save <<SCRIPT
     BiocManager::install("IlluminaHumanMethylationEPICanno.ilm10b4.hg19")
 SCRIPT
 
-COPY --from=scripts --chmod=777 methylation/methylation-preprocess.R /scripts/methylation/methylation-preprocess.R
\ No newline at end of file
+COPY --from=scripts --chmod=777 methylation/methylation-preprocess.R /scripts/methylation/methylation-preprocess.R
+COPY --from=scripts --chmod=777 methylation/list-sex-probes.R /scripts/methylation/list-sex-probes.R
\ No newline at end of file
diff --git a/docker/minfi/package.json b/docker/minfi/package.json
index 6ff678bc3..1d4a8c985 100644
--- a/docker/minfi/package.json
+++ b/docker/minfi/package.json
@@ -1,5 +1,5 @@
 {
     "name": "minfi",
     "version": "1.48.0",
-    "revision": "7"
+    "revision": "8"
 }
diff --git a/docker/pandas/package.json b/docker/pandas/package.json
index 99bbe68cf..f61a87646 100644
--- a/docker/pandas/package.json
+++ b/docker/pandas/package.json
@@ -1,5 +1,5 @@
 {
     "name": "pandas",
     "version": "2.2.1",
-    "revision": "6"
+    "revision": "7"
 }
diff --git a/docker/python-plotting/package.json b/docker/python-plotting/package.json
index 1fc0df903..490c74a47 100644
--- a/docker/python-plotting/package.json
+++ b/docker/python-plotting/package.json
@@ -1,4 +1,4 @@
 {
     "name": "python-plotting",
-    "version": "2.0.8"
+    "version": "2.0.9"
 }
\ No newline at end of file
diff --git a/docker/util/package.json b/docker/util/package.json
index a30115713..ac48e5fa7 100644
--- a/docker/util/package.json
+++ b/docker/util/package.json
@@ -1,4 +1,4 @@
 {
     "name": "util",
-    "version": "3.0.2"
+    "version": "3.0.3"
 }
\ No newline at end of file
diff --git a/scripts/CHANGELOG.md b/scripts/CHANGELOG.md
index ffd29b6fa..017df4bda 100644
--- a/scripts/CHANGELOG.md
+++ b/scripts/CHANGELOG.md
@@ -8,6 +8,17 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/).
 
 Any change to the `scripts/` directory should be accompanied by version increases in the `docker/` directory! If you are editing this file, please ensure these changes propagate!
 
+## 2026 February
+
+### Added
+
+- Added `list-sex-probes.R` to dump the list of probes that align to the sex chromosomes [#283](https://github.com/stjudecloud/workflows/pull/283)
+
+### Changed
+
+- `filter.py` now accepts an optional set of files listing probes to exclude [#283](https://github.com/stjudecloud/workflows/pull/283)
+- `methylation-preprocess.R` now outputs non-genomic probes, SNP affected probes, and non-SNP affected probes [#283](https://github.com/stjudecloud/workflows/pull/283)
+
 ## 2025 September
 
 ### Added
diff --git a/scripts/methylation/filter.py b/scripts/methylation/filter.py
index 96a9caf37..de3aaf396 100644
--- a/scripts/methylation/filter.py
+++ b/scripts/methylation/filter.py
@@ -31,6 +31,9 @@ def get_args():
         help="Fraction of samples that must exceed the p-value threshold.",
     )
     parser.add_argument("--pval", type=str, help="P-values CSV file.")
+    parser.add_argument(
+        "--exclude", type=str, action="append", help="Files with probes to exclude."
+    )
     parser.add_argument("beta", type=str, help="Beta values CSV file.")
     args = parser.parse_args()
 
@@ -40,6 +43,15 @@ def get_args():
 if __name__ == "__main__":
     args = get_args()
 
+    # Read probes to exclude
+    probes_to_exclude = []
+    if args.exclude is not None:
+        for exclude_file in args.exclude:
+            with open(exclude_file, "r") as ef:
+                for line in ef:
+                    probes_to_exclude.append(line.strip())
+        print("Number of probes to exclude:", len(probes_to_exclude))
+
     # Read p-values and find probes with high p-value in too many samples
     high_pval_probes = []
     if args.pval is not None:
@@ -66,6 +78,10 @@ def get_args():
             "high_pval_probes.csv", index=False, header=False
         )
 
+    # Combine with probes to exclude
+    exclude_probe_list = set(high_pval_probes).union(set(probes_to_exclude))
+    print("Total number of probes to exclude:", len(exclude_probe_list))
+
     # Read beta values and compute standard deviation
     data = []
 
@@ -78,7 +94,7 @@ def get_args():
 
             probe = line[0]
             sd = np.std([float(x) for x in line[1:]])
-            if probe not in high_pval_probes:
+            if probe not in exclude_probe_list:
                 data.append([probe, sd])
 
     sd_df = pd.DataFrame(data, columns=["probe", "sd"]).set_index("probe")
diff --git a/scripts/methylation/list-sex-probes.R b/scripts/methylation/list-sex-probes.R
new file mode 100644
index 000000000..8937d575c
--- /dev/null
+++ b/scripts/methylation/list-sex-probes.R
@@ -0,0 +1,15 @@
+library(minfi)
+library(IlluminaHumanMethylationEPICmanifest)
+library(IlluminaHumanMethylationEPICanno.ilm10b4.hg19)
+
+
+ann <- getAnnotation(IlluminaHumanMethylationEPICanno.ilm10b4.hg19)
+sex_probes <- rownames(ann)[ann$chr == "chrX" | ann$chr == "chrY"]
+
+write.table(
+  sex_probes,
+  file = "sex_probes.txt",
+  quote = FALSE,
+  row.names = FALSE,
+  col.names = FALSE
+)
diff --git a/scripts/methylation/methylation-preprocess.R b/scripts/methylation/methylation-preprocess.R
index 236938332..bb77be676 100644
--- a/scripts/methylation/methylation-preprocess.R
+++ b/scripts/methylation/methylation-preprocess.R
@@ -38,58 +38,96 @@ set.seed(args$seed)
 
 dir <- getwd()
 
-rg_set <- read.metharray(args$idat_base, verbose = TRUE, force = TRUE)
-saveRDS(rg_set, paste0(args$out_base, ".RGSet.rds"))
+red_green_channel_set <-
+  read.metharray(args$idat_base, verbose = TRUE, force = TRUE)
+saveRDS(red_green_channel_set, paste0(args$out_base, ".RGSet.rds"))
 
 # Generate detection p-values for all probed positions
-det_p <- detectionP(rg_set)
+det_p <- detectionP(red_green_channel_set)
 det_p <-
   det_p[order(rownames(det_p)), , drop = FALSE]
 
 # The manifest is needed by preprocessRAW
-manifest <- getManifest(rg_set)
+manifest <- getManifest(red_green_channel_set)
 manifest
 
 # Load raw data into a MethylSet object be converting red/green
 # channels into a matrix of methlyated and unmethylated signals.
-m_set <- preprocessRaw(rg_set)
-saveRDS(m_set, paste0(args$out_base, ".MSet.rds"))
+methyl_set <- preprocessRaw(red_green_channel_set)
+saveRDS(methyl_set, paste0(args$out_base, ".MSet.rds"))
 
 # Convert to a RatioSet
-r_set <- ratioConvert(m_set, what = "both", keepCN = TRUE)
-saveRDS(r_set, paste0(args$out_base, ".RSet.rds"))
+ratio_set <- ratioConvert(methyl_set, what = "both", keepCN = TRUE)
+saveRDS(ratio_set, paste0(args$out_base, ".RSet.rds"))
 
 # Add genomic coordinates to each probe (plus additional annotation)
-gr_set <- mapToGenome(r_set)
-saveRDS(gr_set, paste0(args$out_base, ".GRSet.rds"))
+genomic_ratio_set <- mapToGenome(ratio_set)
+saveRDS(genomic_ratio_set, paste0(args$out_base, ".GRSet.rds"))
+
+non_genomic_probes <-
+  setdiff(featureNames(ratio_set), featureNames(genomic_ratio_set))
+
+# Get the list of sites with SNPs
+snps <- getSnpInfo(genomic_ratio_set)
+genomic_ratio_set_snps <- addSnpInfo(genomic_ratio_set)
+# Remove probes with SNPs at CpG or single-base extension sites
+genomic_ratio_set_snps <-
+  dropLociWithSnps(genomic_ratio_set_snps, snps = c("SBE", "CpG"), maf = 0)
+probes_without_snps <- featureNames(genomic_ratio_set_snps)
 
 # Take the genomic mapped RatioSet and fill Beta values (non-normalized).
 # Get the NON-normalized beta values:
-beta <- getBeta(gr_set)
+beta <- getBeta(genomic_ratio_set)
 write.csv(beta, paste0(args$out_base, ".beta.csv"))
 
 # Get M-value matrix and copy-number matrix
 # Get M and CN vals if interested:
-m <- getM(gr_set)
+m <- getM(genomic_ratio_set)
 write.csv(m, paste0(args$out_base, ".m_values.csv"))
-cn <- getCN(gr_set)
+cn <- getCN(genomic_ratio_set)
 write.csv(cn, paste0(args$out_base, ".cn_values.csv"))
 
 # Get sample names and probe names
-sample_names <- sampleNames(gr_set)
+sample_names <- sampleNames(genomic_ratio_set)
 write.csv(sample_names, paste0(args$out_base, ".sampleNames.csv"))
-probe_names <- featureNames(gr_set)
+probe_names <- featureNames(genomic_ratio_set)
 write.csv(probe_names, paste0(args$out_base, ".probeNames.csv"))
 
-gr <- granges(gr_set)
+# Write probes with SNPs
+probes_with_snps <- setdiff(probe_names, probes_without_snps)
+write.table(
+  probes_with_snps,
+  paste0(args$out_base, ".probes_with_snps.tab"),
+  row.names = FALSE,
+  col.names = FALSE,
+  quote = FALSE,
+)
+write.table(
+  probes_without_snps,
+  paste0(args$out_base, ".probes_without_snps.tab"),
+  row.names = FALSE,
+  col.names = FALSE,
+  quote = FALSE,
+)
+
+# Write non-genomic probe list
+write.table(
+  non_genomic_probes,
+  paste0(args$out_base, ".non_genomic_probes.tab"),
+  row.names = FALSE,
+  col.names = FALSE,
+  quote = FALSE,
+)
+
+gr <- granges(genomic_ratio_set)
 write.csv(gr, paste0(args$out_base, ".gr.csv"))
 
-annotation <- getAnnotation(gr_set)
+annotation <- getAnnotation(genomic_ratio_set)
 write.csv(annotation, paste0(args$out_base, ".annotation.csv"))
 
 # Perform SWAN normalization on beta values
-gr_set_swan_norm <- preprocessSWAN(rg_set)
-beta_swan_norm <- getBeta(gr_set_swan_norm)
+genomic_ratio_set_swan_norm <- preprocessSWAN(red_green_channel_set)
+beta_swan_norm <- getBeta(genomic_ratio_set_swan_norm)
 
 # Write the normalized beta-values that have NOT yet had
 # low-variance probes filtered out
@@ -101,9 +139,9 @@ write.csv(
 # Write the normalized beta-values that have NOT yet had
 # low-variance probes filtered out
 # Filter to only those that are mappable to the genome.
-r_set <- ratioConvert(gr_set_swan_norm)
-gr_set <- mapToGenome(r_set)
-beta_swan_norm <- getBeta(gr_set)
+ratio_set <- ratioConvert(genomic_ratio_set_swan_norm)
+genomic_ratio_set <- mapToGenome(ratio_set)
+beta_swan_norm <- getBeta(genomic_ratio_set)
 beta_swan_norm <-
   beta_swan_norm[order(rownames(beta_swan_norm)), , drop = FALSE]
 write.csv(
diff --git a/tools/htseq.wdl b/tools/htseq.wdl
index 18ff0e0da..2b6d619ea 100755
--- a/tools/htseq.wdl
+++ b/tools/htseq.wdl
@@ -195,7 +195,7 @@ task calc_tpm {
     runtime {
         memory: "4 GB"
         disks: "10 GB"
-        container: "ghcr.io/stjudecloud/util:3.0.2"
+        container: "ghcr.io/stjudecloud/util:3.0.3"
         maxRetries: 1
     }
 }
diff --git a/tools/md5sum.wdl b/tools/md5sum.wdl
index c450d9282..8cb35f1dd 100755
--- a/tools/md5sum.wdl
+++ b/tools/md5sum.wdl
@@ -34,7 +34,7 @@ task compute_checksum {
 
     runtime {
         disks: "~{disk_size_gb} GB"
-        container: "ghcr.io/stjudecloud/util:3.0.2"
+        container: "ghcr.io/stjudecloud/util:3.0.3"
         maxRetries: 1
     }
 }
diff --git a/tools/util.wdl b/tools/util.wdl
index fd0f35263..ed1b0ab71 100644
--- a/tools/util.wdl
+++ b/tools/util.wdl
@@ -40,7 +40,7 @@ task download {
 
     runtime {
         disks: "~{disk_size_gb} GB"
-        container: "ghcr.io/stjudecloud/util:3.0.2"
+        container: "ghcr.io/stjudecloud/util:3.0.3"
         maxRetries: 1
     }
 }
@@ -79,7 +79,7 @@ task split_string {
     }
 
     runtime {
-        container: "ghcr.io/stjudecloud/util:3.0.2"
+        container: "ghcr.io/stjudecloud/util:3.0.3"
         maxRetries: 1
     }
 }
@@ -127,7 +127,7 @@ task calc_feature_lengths {
     runtime {
         memory: "16 GB"
         disks: "~{disk_size_gb} GB"
-        container: "ghcr.io/stjudecloud/util:3.0.2"
+        container: "ghcr.io/stjudecloud/util:3.0.3"
         maxRetries: 1
     }
 }
@@ -246,7 +246,7 @@ task unpack_tarball {
 
     runtime {
         disks: "~{disk_size_gb} GB"
-        container: "ghcr.io/stjudecloud/util:3.0.2"
+        container: "ghcr.io/stjudecloud/util:3.0.3"
         maxRetries: 1
     }
 }
@@ -353,7 +353,7 @@ task global_phred_scores {
     runtime {
         memory: "4 GB"
         disks: "~{disk_size_gb} GB"
-        container: "ghcr.io/stjudecloud/util:3.0.2"
+        container: "ghcr.io/stjudecloud/util:3.0.3"
         maxRetries: 1
     }
 }
@@ -402,7 +402,7 @@ task check_fastq_and_rg_concordance {
     >>>
 
     runtime {
-        container: "ghcr.io/stjudecloud/util:3.0.2"
+        container: "ghcr.io/stjudecloud/util:3.0.3"
         maxRetries: 1
     }
 }
@@ -462,7 +462,7 @@ task split_fastq {
         cpu: ncpu
         memory: "4 GB"
         disks: "~{disk_size_gb} GB"
-        container: "ghcr.io/stjudecloud/util:3.0.2"
+        container: "ghcr.io/stjudecloud/util:3.0.3"
         maxRetries: 1
     }
 }
diff --git a/workflows/dnaseq/dnaseq-standard.wdl b/workflows/dnaseq/dnaseq-standard.wdl
index b4608c891..270f6bd2d 100644
--- a/workflows/dnaseq/dnaseq-standard.wdl
+++ b/workflows/dnaseq/dnaseq-standard.wdl
@@ -163,7 +163,7 @@ task parse_input {
     >>>
 
     runtime {
-        container: "ghcr.io/stjudecloud/util:3.0.2"
+        container: "ghcr.io/stjudecloud/util:3.0.3"
         maxRetries: 1
     }
 }
diff --git a/workflows/methylation/CHANGELOG.md b/workflows/methylation/CHANGELOG.md
index 8a0182c82..6ab572cd9 100644
--- a/workflows/methylation/CHANGELOG.md
+++ b/workflows/methylation/CHANGELOG.md
@@ -3,7 +3,16 @@
 All notable changes to this project will be documented in this file.
  
 The format is based on [Keep a Changelog](http://keepachangelog.com/).
- 
+
+## 2026 February
+
+### Added
+
+- Now outputs list of probes that map to sex chromosomes [#283](https://github.com/stjudecloud/workflows/pull/283)
+- Pipeline writes out the list of filtered probes [#283](https://github.com/stjudecloud/workflows/pull/283)
+- Pipeline writes out list of probes that are affected by SNPs [#283](https://github.com/stjudecloud/workflows/pull/283)
+- Pipeline writes out list of probes that were filtered for not mapping to genomic coordinates [#283](https://github.com/stjudecloud/workflows/pull/283)
+
 ## 2025 September
 
 ### Added
diff --git a/workflows/methylation/methylation-cohort.wdl b/workflows/methylation/methylation-cohort.wdl
index cb5f39c8a..743e9ae97 100644
--- a/workflows/methylation/methylation-cohort.wdl
+++ b/workflows/methylation/methylation-cohort.wdl
@@ -11,12 +11,15 @@ workflow methylation_cohort {
             umap_embedding: "UMAP embedding for all samples",
             umap_plot: "UMAP plot for all samples",
             probe_pvalues: "Matrix (in CSV format) containing detection p-values for every (common) probe on the array as rows and all of the input samples as columns.",
+            high_pval_probes: "List of probes that were filtered out due to high p-values",
         }
         allowNestedInputs: true
     }
 
     parameter_meta {
         unfiltered_normalized_beta: "Array of unfiltered normalized beta values for each sample"
+        sex_probe_list: "List of probes mapping to sex chromosomes to optionally filter"
+        additional_probes_to_exclude: "Additional probes to exclude from the analysis"
         p_values: "Array of detection p-value files for each sample."
         skip_pvalue_check: "Skip filtering based on p-values, even if `p_values` is supplied."
         num_probes: "Number of probes to use when filtering to the top `num_probes` probes with the highest standard deviation."
@@ -24,6 +27,8 @@ workflow methylation_cohort {
 
     input {
         Array[File] unfiltered_normalized_beta
+        File? sex_probe_list
+        File? additional_probes_to_exclude
         Array[File] p_values = []
         Boolean skip_pvalue_check = false
         Int num_probes = 10000
@@ -114,6 +119,10 @@ workflow methylation_cohort {
         ]),
         p_values = pval_file,
         num_probes,
+        additional_probes_to_exclude = select_all([
+            sex_probe_list,
+            additional_probes_to_exclude,
+        ]),
     }
 
     call generate_umap { input:
@@ -134,6 +143,7 @@ workflow methylation_cohort {
         File umap_embedding = generate_umap.umap
         File umap_plot = plot_umap.umap_plot
         File? probe_pvalues = pval_file
+        File? high_pval_probes = filter_probes.high_pval_probes
     }
 }
 
@@ -186,7 +196,7 @@ task combine_data {
     }
 
     runtime {
-        container: "ghcr.io/stjudecloud/pandas:2.2.1-6"
+        container: "ghcr.io/stjudecloud/pandas:2.2.1-7"
         memory: "~{memory_gb} GB"
         cpu: 1
         disks: "~{disk_size_gb} GB"
@@ -201,12 +211,14 @@ task filter_probes {
         outputs: {
             filtered_beta_values: "Filtered beta values for all samples",
             filtered_probes: "Probes that were retained after filtering.",
+            high_pval_probes: "Probes that were filtered out due to high p-values",
         }
     }
 
     parameter_meta {
         beta_values: "Beta values for all samples"
         p_values: "P-values for all samples"
+        additional_probes_to_exclude: "Additional probes to exclude from the analysis"
         prefix: "Prefix for the output files. The extensions `.beta.csv` and `.probes.csv` will be appended."
         pval_threshold: "P-value cutoff to determine poor quality probes"
         pval_sample_fraction: "Fraction of samples that must exceed p-value threshold to exclude probe"
@@ -216,6 +228,7 @@ task filter_probes {
     input {
         File beta_values
         File? p_values
+        Array[File] additional_probes_to_exclude = []
         String prefix = "filtered"
         Float pval_threshold = 0.01
         Float pval_sample_fraction = 0.5
@@ -232,16 +245,18 @@ task filter_probes {
             --pval-threshold ~{pval_threshold} \
             --pval-sample-fraction ~{pval_sample_fraction} \
             ~{"--pval '" + p_values + "'"} \
+            ~{sep(" ", prefix("--exclude ", quote(additional_probes_to_exclude)))} \
             "~{beta_values}"
     >>>
 
     output {
         File filtered_beta_values = "~{prefix}.beta.csv"
         File filtered_probes = "~{prefix}.probes.csv"
+        File? high_pval_probes = "high_pval_probes.csv"
     }
 
     runtime {
-        container: "ghcr.io/stjudecloud/pandas:2.2.1-6"
+        container: "ghcr.io/stjudecloud/pandas:2.2.1-7"
         memory: "8 GB"
         cpu: 1
         disks: "~{disk_size_gb} GB"
@@ -320,7 +335,7 @@ task plot_umap {
         cpu: 1
         memory: "4 GB"
         disks: "4 GB"
-        container: "ghcr.io/stjudecloud/python-plotting:2.0.8"
+        container: "ghcr.io/stjudecloud/python-plotting:2.0.9"
         maxRetries: 1
     }
 }
diff --git a/workflows/methylation/methylation-preprocess.wdl b/workflows/methylation/methylation-preprocess.wdl
index f85ed8613..9d174b130 100644
--- a/workflows/methylation/methylation-preprocess.wdl
+++ b/workflows/methylation/methylation-preprocess.wdl
@@ -12,6 +12,9 @@ task process_raw_idats {
             m_values: "M values",
             probe_names: "Probe names found on the array",
             probe_pvalues: "Matrix (in CSV format) containing detection p-values for every (common) probe on the array as rows.",
+            probes_with_snps: "List of probes that contain SNPs",
+            probes_without_snps: "List of probes that do not contain SNPs",
+            non_genomic_probes: "List of probes that do not map to the genome",
         }
     }
 
@@ -45,6 +48,8 @@ task process_raw_idats {
             --idat_base "~{idat_base}" \
             --out_base "~{out_base}" \
             --seed ~{seed}
+
+        rm ./*.idat
     >>>
 
     output {
@@ -56,13 +61,47 @@ task process_raw_idats {
         File m_values = out_base + ".m_values.csv"
         File probe_names = out_base + ".probeNames.csv"
         File probe_pvalues = out_base + ".detectionP.csv"
+        File probes_with_snps = out_base + ".probes_with_snps.tab"
+        File probes_without_snps = out_base + ".probes_without_snps.tab"
+        File non_genomic_probes = out_base + ".non_genomic_probes.tab"
     }
 
     runtime {
-        container: "ghcr.io/stjudecloud/minfi:1.48.0-7"
+        container: "ghcr.io/stjudecloud/minfi:1.48.0-8"
         memory: "8 GB"
         cpu: 1
         disks: "~{disk_size_gb} GB"
         maxRetries: 1
     }
 }
+
+task list_sex_probes {
+    meta {
+        description: "List probes that map to the sex chromosomes"
+        outputs: {
+            probe_list: "List of probe names that map to the sex chromosomes"
+        }
+    }
+
+    parameter_meta {}
+
+    input {}
+
+    command <<<
+        set -euo pipefail
+
+        Rscript /scripts/methylation/list-sex-probes.R
+    >>>
+
+    output {
+        File probe_list = "sex_probes.txt"
+    }
+
+    runtime {
+        container: "ghcr.io/stjudecloud/minfi:1.48.0-8"
+        memory: "3 GB"
+        cpu: 1
+        disks: "2 GB"
+        maxRetries: 1
+    }
+}
diff --git a/workflows/methylation/methylation-standard.wdl b/workflows/methylation/methylation-standard.wdl
index 23be2d06b..c30fe9d63 100644
--- a/workflows/methylation/methylation-standard.wdl
+++ b/workflows/methylation/methylation-standard.wdl
@@ -16,6 +16,10 @@ workflow methylation {
             umap_embedding: "UMAP embedding for all samples",
             umap_plot: "UMAP plot for all samples",
             probe_pvalues: "Matrix (in CSV format) containing detection p-values for every (common) probe on the array as rows and all of the input samples as columns.",
+            probes_with_snps: "List of probes that are affected by SNPs",
+            sex_probe_list: "List of probes that map to sex chromosomes",
+            high_pval_probes: "List of probes that were filtered out due to high p-values",
+            non_genomic_probes: "List of probes that do not map to the genome",
         }
         allowNestedInputs: true
     }
@@ -23,11 +27,13 @@ workflow methylation {
     parameter_meta {
         green_idats: "Array of raw green IDAT files from the Illumina methylation array. See NOTE in `process_raw_idats` task for naming convention."
         red_idats: "Array of raw red IDAT files from the Illumina methylation array.  See NOTE in `process_raw_idats` task for naming convention."
+        additional_probes_to_exclude: "Optional file containing a list of additional probes to exclude from the analysis. This should be a text file with one probe name per line."
     }
 
     input {
         Array[File] green_idats
         Array[File] red_idats
+        File? additional_probes_to_exclude
     }
 
     scatter (pair in zip(green_idats, red_idats)) {
@@ -36,9 +42,92 @@ workflow methylation {
         }
     }
 
+    call preprocess.list_sex_probes {}
+
     call cohort.methylation_cohort { input:
         unfiltered_normalized_beta = process_raw_idats.beta_swan_norm_unfiltered_genomic,
         p_values = process_raw_idats.probe_pvalues,
+        sex_probe_list = list_sex_probes.probe_list,
+        additional_probes_to_exclude,
+    }
+
+    Array[File] probe_files = process_raw_idats.probes_with_snps
+    Int probelist_length = length(probe_files)
+    Int max_length = 100
+
+    if (probelist_length > max_length){
+        scatter (merge_num in range((probelist_length / max_length) + 1)){
+            # Get the sublist of probe files
+            scatter (probe_num in range(max_length)){
+                Int num = (
+                    if merge_num > 0
+                    then probe_num + (merge_num * max_length)
+                    else probe_num
+                )
+                if (num < probelist_length){
+                    File probe_file_batches = probe_files[num]
+                }
+            }
+        }
+        scatter (iter_index in range(length(probe_file_batches))){
+            call concat_and_uniq { input:
+                files_to_combine = select_all(probe_file_batches[iter_index]),
+                output_file_name = "probes_with_snps_part_~{iter_index}.tab",
+            }
+        }
+
+        call concat_and_uniq as final_cat { input:
+            files_to_combine = flatten([
+                concat_and_uniq.combined_file
+            ]),
+            output_file_name = "probes_with_snps.tab",
+        }
+    }
+
+    if (probelist_length <= max_length){
+        call concat_and_uniq as simple_merge { input:
+            files_to_combine = probe_files,
+            output_file_name = "probes_with_snps.tab",
+        }
+    }
+
+    Array[File] non_genomic_probe_list = process_raw_idats.non_genomic_probes
+    Int non_genomic_probelist_length = length(non_genomic_probe_list)
+
+    if (non_genomic_probelist_length > max_length){
+        scatter (merge_num in range((non_genomic_probelist_length / max_length) + 1)){
+            # Get the sublist of probe files
+            scatter (probe_num in range(max_length)){
+                Int num_ng = (
+                    if merge_num > 0
+                    then probe_num + (merge_num * max_length)
+                    else probe_num
+                )
+                if (num_ng < non_genomic_probelist_length){
+                    File non_genomic_probe_batches = non_genomic_probe_list[num_ng]
+                }
+            }
+        }
+        scatter (iter_index in range(length(non_genomic_probe_batches))){
+            call concat_and_uniq as non_genomic_concat { input:
+                files_to_combine = select_all(non_genomic_probe_batches[iter_index]),
+                output_file_name = "non_genomic_probes_part_~{iter_index}.tab",
+            }
+        }
+
+        call concat_and_uniq as final_cat_non_genomic { input:
+            files_to_combine = flatten([
+                non_genomic_concat.combined_file
+            ]),
+            output_file_name = "non_genomic_probes.tab",
+        }
+    }
+
+    if (non_genomic_probelist_length <= max_length){
+        call concat_and_uniq as simple_merge_non_genomic { input:
+            files_to_combine = non_genomic_probe_list,
+            output_file_name = "non_genomic_probes.tab",
+        }
     }
 
     output {
@@ -49,5 +138,52 @@ workflow methylation {
         File umap_embedding = methylation_cohort.umap_embedding
         File umap_plot = methylation_cohort.umap_plot
         File? probe_pvalues = methylation_cohort.probe_pvalues
+        File probes_with_snps = select_first([
+            final_cat.combined_file,
+            simple_merge.combined_file,
+        ])
+        File sex_probe_list = list_sex_probes.probe_list
+        File? high_pval_probes = methylation_cohort.high_pval_probes
+        File non_genomic_probes = select_first([
+            final_cat_non_genomic.combined_file,
+            simple_merge_non_genomic.combined_file,
+        ])
+    }
+}
+
+task concat_and_uniq {
+    meta {
+        description: "Concatenate multiple files and retain unique lines"
+        outputs: {
+            combined_file: "File containing unique lines from all input files"
+        }
+    }
+
+    parameter_meta {
+        files_to_combine: "Array of input files to concatenate"
+        output_file_name: "Name of the output file"
+    }
+
+    input {
+        Array[File] files_to_combine
+        String output_file_name = "unique_lines.txt"
+    }
+
+    command <<<
+        set -euo pipefail
+
+        sort -u ~{sep(" ", quote(files_to_combine))} > "~{output_file_name}"
+    >>>
+
+    output {
+        File combined_file = "~{output_file_name}"
+    }
+
+    runtime {
+        container: "ghcr.io/stjudecloud/pandas:2.2.1-7"
+        memory: "2 GB"
+        cpu: 1
+        disks: "4 GB"
+        maxRetries: 1
     }
 }
diff --git a/workflows/qc/quality-check-standard.wdl b/workflows/qc/quality-check-standard.wdl
index 062216b2d..bd7fb0e9d 100644
--- a/workflows/qc/quality-check-standard.wdl
+++ b/workflows/qc/quality-check-standard.wdl
@@ -600,7 +600,7 @@ task parse_input {
     }
 
     runtime {
-        container: "ghcr.io/stjudecloud/util:3.0.2"
+        container: "ghcr.io/stjudecloud/util:3.0.3"
         maxRetries: 1
     }
 }
diff --git a/workflows/rnaseq/rnaseq-standard.wdl b/workflows/rnaseq/rnaseq-standard.wdl
index 7db5e205e..793446486 100755
--- a/workflows/rnaseq/rnaseq-standard.wdl
+++ b/workflows/rnaseq/rnaseq-standard.wdl
@@ -184,7 +184,7 @@ task parse_input {
     >>>
 
     runtime {
-        container: "ghcr.io/stjudecloud/util:3.0.2"
+        container: "ghcr.io/stjudecloud/util:3.0.3"
         maxRetries: 1
     }
 }

From 21bc68f8adb80483339a81930e29dbdef4526c2e Mon Sep 17 00:00:00 2001
From: Andrew Thrasher <adthrasher@gmail.com>
Date: Mon, 23 Feb 2026 15:09:00 -0500
Subject: [PATCH 39/47] chore: GiB -> GB (#302)

Currently, we compute file sizes in `GiB`. We then use those to request
disk space, but in `GB`. This is silently undercounting the amount of
disk space we need for these tasks. `GB` was chosen to be the common
unit because our variables already end in `_gb` so this is the least
disruptive change.

Before submitting this PR, please make sure:

- [x] You have added a few sentences describing the PR here.
- [ ] The code passes all CI tests without any errors or warnings.
- [ ] You have added tests (when appropriate).
- [x] You have added an entry in any relevant CHANGELOGs (when
appropriate).
- [ ] If you have made any changes to the `scripts/` or `docker/`
directories, please ensure any image versions have been incremented
accordingly!
- [ ] You have updated the README or other documentation to account for
these changes (when appropriate).
---
 data_structures/CHANGELOG.md                 |   6 +
 data_structures/read_group.wdl               |   2 +-
 template/task-examples.wdl                   |   2 +-
 tools/CHANGELOG.md                           |   1 +
 tools/bwa.wdl                                |  69 +++---
 tools/deeptools.wdl                          |   2 +-
 tools/fastqc.wdl                             |   2 +-
 tools/fq.wdl                                 |   8 +-
 tools/gatk4.wdl                              |   2 +-
 tools/htseq.wdl                              |  57 ++---
 tools/kraken2.wdl                            | 101 ++++----
 tools/librarian.wdl                          |  11 +-
 tools/md5sum.wdl                             |   2 +-
 tools/mosdepth.wdl                           |   2 +-
 tools/multiqc.wdl                            |   2 +-
 tools/ngsderive.wdl                          |  45 ++--
 tools/picard.wdl                             |  26 +-
 tools/qualimap.wdl                           |   6 +-
 tools/sambamba.wdl                           |  10 +-
 tools/samtools.wdl                           | 240 ++++++-------------
 tools/star.wdl                               |  74 +++---
 tools/util.wdl                               |  14 +-
 workflows/methylation/CHANGELOG.md           |   4 +
 workflows/methylation/methylation-cohort.wdl |  87 +++----
 24 files changed, 348 insertions(+), 427 deletions(-)

diff --git a/data_structures/CHANGELOG.md b/data_structures/CHANGELOG.md
index 84f947b78..21e57972b 100644
--- a/data_structures/CHANGELOG.md
+++ b/data_structures/CHANGELOG.md
@@ -4,6 +4,12 @@ All notable changes to this project will be documented in this file.
  
 The format is based on [Keep a Changelog](http://keepachangelog.com/).
 
+## 2026 February
+
+### Changed
+
+- Converted size checks to GB from GiB [#302](https://github.com/stjudecloud/workflows/pull/302)
+
 ## 2025 September
 
 ### Added
diff --git a/data_structures/read_group.wdl b/data_structures/read_group.wdl
index 460ddea33..30955325c 100644
--- a/data_structures/read_group.wdl
+++ b/data_structures/read_group.wdl
@@ -123,7 +123,7 @@ task get_read_groups {
         Int modify_disk_size_gb = 0
     }
 
-    Float bam_size = size(bam, "GiB")
+    Float bam_size = size(bam, "GB")
     Int disk_size_gb = ceil(bam_size) + 10 + modify_disk_size_gb
 
     command <<<
diff --git a/template/task-examples.wdl b/template/task-examples.wdl
index 4eae09447..5bc0e6ff8 100644
--- a/template/task-examples.wdl
+++ b/template/task-examples.wdl
@@ -44,7 +44,7 @@ task dynamic_disk_and_ram_task {
         Int modify_disk_size_gb = 0
     }
 
-    Int input_size_gb = ceil(size(<input files>, "GiB"))
+    Int input_size_gb = ceil(size(<input files>, "GB"))
 
     Int memory_gb = ceil(input_size_gb * X) + modify_memory_gb
     Int disk_size_gb = ceil(input_size_gb * Y) + modify_disk_size_gb
diff --git a/tools/CHANGELOG.md b/tools/CHANGELOG.md
index 12563cd4a..045c1e109 100644
--- a/tools/CHANGELOG.md
+++ b/tools/CHANGELOG.md
@@ -10,6 +10,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/).
 
 - gzip `picard.validate_bam` output when not in `summary_mode` [#298](https://github.com/stjudecloud/workflows/pull/298)
 - Lowered default disk allocation for `picard.validate_bam` [#298](https://github.com/stjudecloud/workflows/pull/298)
+- Converted size checks to GB from GiB [#302](https://github.com/stjudecloud/workflows/pull/302)
 
 ## 2026 January
 
diff --git a/tools/bwa.wdl b/tools/bwa.wdl
index 3a7b3cb0d..1d65d6df4 100644
--- a/tools/bwa.wdl
+++ b/tools/bwa.wdl
@@ -1,11 +1,12 @@
 ## [Homepage](https://github.com/lh3/bwa)
+
 version 1.1
 
 task bwa_aln {
     meta {
         description: "Maps Single-End FASTQ files to BAM format using bwa aln"
         outputs: {
-            bam: "Aligned BAM format file",
+            bam: "Aligned BAM format file"
         }
     }
 
@@ -35,7 +36,9 @@ task bwa_aln {
         File fastq
         File bwa_db_tar_gz
         String read_group
-        String prefix = sub(basename(fastq), "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$",
+        String prefix = sub(
+            basename(fastq),
+            "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$",
             ""  # Once replacing with capturing groups is supported, replace with group 3
         )
         Boolean use_all_cores = false
@@ -45,9 +48,10 @@ task bwa_aln {
 
     String output_bam = prefix + ".bam"
 
-    Float input_fastq_size = size(fastq, "GiB")
-    Float reference_size = size(bwa_db_tar_gz, "GiB")
-    Int disk_size_gb = (ceil((input_fastq_size + reference_size) * 2) + 10 + modify_disk_size_gb
+    Float input_fastq_size = size(fastq, "GB")
+    Float reference_size = size(bwa_db_tar_gz, "GB")
+    Int disk_size_gb = (
+        ceil((input_fastq_size + reference_size) * 2) + 10 + modify_disk_size_gb
     )
 
     command <<<
@@ -94,7 +98,7 @@ task bwa_aln_pe {
     meta {
         description: "Maps Paired-End FASTQ files to BAM format using bwa aln"
         outputs: {
-            bam: "Aligned BAM format file",
+            bam: "Aligned BAM format file"
         }
     }
 
@@ -128,8 +132,11 @@ task bwa_aln_pe {
         File read_two_fastq_gz
         File bwa_db_tar_gz
         String read_group
-        String prefix = sub(basename(read_one_fastq_gz), "([_\\.][rR][12])?(\\.subsampled)?\\.(fastq|fq)(\\.gz)?$",
-            "")
+        String prefix = sub(
+            basename(read_one_fastq_gz),
+            "([_\\.][rR][12])?(\\.subsampled)?\\.(fastq|fq)(\\.gz)?$",
+            ""
+        )
         Boolean use_all_cores = false
         Int ncpu = 4
         Int modify_disk_size_gb = 0
@@ -137,10 +144,12 @@ task bwa_aln_pe {
 
     String output_bam = prefix + ".bam"
 
-    Float input_fastq_size = (size(read_one_fastq_gz, "GiB") + size(read_two_fastq_gz, "GiB"
-    ))
-    Float reference_size = size(bwa_db_tar_gz, "GiB")
-    Int disk_size_gb = (ceil((input_fastq_size + reference_size) * 2) + 5 + modify_disk_size_gb
+    Float input_fastq_size = (
+        size(read_one_fastq_gz, "GB") + size(read_two_fastq_gz, "GB")
+    )
+    Float reference_size = size(bwa_db_tar_gz, "GB")
+    Int disk_size_gb = (
+        ceil((input_fastq_size + reference_size) * 2) + 5 + modify_disk_size_gb
     )
 
     command <<<
@@ -193,7 +202,7 @@ task bwa_mem {
     meta {
         description: "Maps FASTQ files to BAM format using bwa mem"
         outputs: {
-            bam: "Aligned BAM format file",
+            bam: "Aligned BAM format file"
         }
     }
 
@@ -221,8 +230,11 @@ task bwa_mem {
         File bwa_db_tar_gz
         String read_group
         File? read_two_fastq_gz
-        String prefix = sub(basename(read_one_fastq_gz), "([_\\.][rR][12])?(\\.subsampled)?\\.(fastq|fq)(\\.gz)?$",
-            "")
+        String prefix = sub(
+            basename(read_one_fastq_gz),
+            "([_\\.][rR][12])?(\\.subsampled)?\\.(fastq|fq)(\\.gz)?$",
+            ""
+        )
         Boolean use_all_cores = false
         Int ncpu = 4
         Int modify_disk_size_gb = 0
@@ -230,10 +242,11 @@ task bwa_mem {
 
     String output_bam = prefix + ".bam"
 
-    Float input_fastq_size = size(read_one_fastq_gz, "GiB") + size(read_two_fastq_gz, "GiB"
-    )
-    Float reference_size = size(bwa_db_tar_gz, "GiB")
-    Int disk_size_gb = (ceil((input_fastq_size + reference_size) * 2) + 10 + modify_disk_size_gb
+    Float input_fastq_size = size(read_one_fastq_gz, "GB")
+        + size(read_two_fastq_gz, "GB")
+    Float reference_size = size(bwa_db_tar_gz, "GB")
+    Int disk_size_gb = (
+        ceil((input_fastq_size + reference_size) * 2) + 10 + modify_disk_size_gb
     )
 
     command <<<
@@ -258,10 +271,9 @@ task bwa_mem {
             -R "~{read_group}" \
             bwa_db/"$PREFIX" \
             "~{basename(read_one_fastq_gz)}" \
-            ~{(if defined(read_two_fastq_gz)
-                then "'" + basename(select_first([
-                    read_two_fastq_gz,
-                ])) + "'"
+            ~{(
+                if defined(read_two_fastq_gz)
+                then "'" + basename(select_first([read_two_fastq_gz])) + "'"
                 else ""
             )} \
             | samtools view --no-PG --threads "$samtools_cores" -hb - \
@@ -269,10 +281,9 @@ task bwa_mem {
 
         rm -r bwa_db
         rm "~{basename(read_one_fastq_gz)}"
-        ~{(if defined(read_two_fastq_gz)
-            then "rm '" + basename(select_first([
-                read_two_fastq_gz,
-            ])) + "'"
+        ~{(
+            if defined(read_two_fastq_gz)
+            then "rm '" + basename(select_first([read_two_fastq_gz])) + "'"
             else ""
         )}
     >>>
@@ -294,7 +305,7 @@ task build_bwa_db {
     meta {
         description: "Creates a BWA index and returns it as a compressed tar archive"
         outputs: {
-            bwa_db_tar_gz: "Tarballed bwa reference files",
+            bwa_db_tar_gz: "Tarballed bwa reference files"
         }
     }
 
@@ -313,7 +324,7 @@ task build_bwa_db {
         Int modify_disk_size_gb = 0
     }
 
-    Float input_fasta_size = size(reference_fasta, "GiB")
+    Float input_fasta_size = size(reference_fasta, "GB")
     Int disk_size_gb = ceil(input_fasta_size * 2) + 10 + modify_disk_size_gb
     String bwa_db_out_name = db_name + ".tar.gz"
 
diff --git a/tools/deeptools.wdl b/tools/deeptools.wdl
index d30738abc..197491901 100755
--- a/tools/deeptools.wdl
+++ b/tools/deeptools.wdl
@@ -33,7 +33,7 @@ task bam_coverage {
         Int modify_disk_size_gb = 0
     }
 
-    Float bam_size = size(bam, "GiB")
+    Float bam_size = size(bam, "GB")
     Int disk_size_gb = ceil(bam_size * 1.5) + 10 + modify_disk_size_gb
 
     command <<<
diff --git a/tools/fastqc.wdl b/tools/fastqc.wdl
index 06aee4034..7b63db9e5 100755
--- a/tools/fastqc.wdl
+++ b/tools/fastqc.wdl
@@ -36,7 +36,7 @@ task fastqc {
 
     String out_tar_gz = prefix + ".tar.gz"
 
-    Float bam_size = size(bam, "GiB")
+    Float bam_size = size(bam, "GB")
     Int disk_size_gb = ceil(bam_size * 2) + 10 + modify_disk_size_gb
 
     command <<<
diff --git a/tools/fq.wdl b/tools/fq.wdl
index b353597f2..b042f0366 100755
--- a/tools/fq.wdl
+++ b/tools/fq.wdl
@@ -63,8 +63,8 @@ task fqlint {
         Int modify_disk_size_gb = 0
     }
 
-    Float read1_size = size(read_one_fastq, "GiB")
-    Float read2_size = size(read_two_fastq, "GiB")
+    Float read1_size = size(read_one_fastq, "GB")
+    Float read2_size = size(read_two_fastq, "GB")
 
     Int memory_gb = (ceil((read1_size + read2_size) * 0.25) + 1 + modify_memory_gb)
 
@@ -131,8 +131,8 @@ task subsample {
         Int modify_disk_size_gb = 0
     }
 
-    Float read1_size = size(read_one_fastq, "GiB")
-    Float read2_size = size(read_two_fastq, "GiB")
+    Float read1_size = size(read_one_fastq, "GB")
+    Float read2_size = size(read_two_fastq, "GB")
 
     Int disk_size_gb = ceil((read1_size + read2_size) * 2) + modify_disk_size_gb
 
diff --git a/tools/gatk4.wdl b/tools/gatk4.wdl
index e4a709e18..1ddea3b69 100644
--- a/tools/gatk4.wdl
+++ b/tools/gatk4.wdl
@@ -457,7 +457,7 @@ task mark_duplicates_spark {
         Int ncpu = 4
     }
 
-    Float bam_size = size(bam, "GiB")
+    Float bam_size = size(bam, "GB")
     Int memory_gb = min(ceil(bam_size + 15), 50) + modify_memory_gb
     Int disk_size_gb = ((if create_bam
         then ceil((bam_size * 2) + 10)
diff --git a/tools/htseq.wdl b/tools/htseq.wdl
index 2b6d619ea..5afba83a0 100755
--- a/tools/htseq.wdl
+++ b/tools/htseq.wdl
@@ -1,4 +1,5 @@
 ## [Homepage](https://github.com/htseq/htseq)
+
 version 1.1
 
 task count {
@@ -8,7 +9,7 @@ task count {
             feature_counts: {
                 description: "A two column TSV file. First column is feature names and second column is counts.",
                 help: "Presence of a header is determined by the `include_custom_header` parameter.",
-            },
+            }
         }
     }
 
@@ -33,6 +34,15 @@ task count {
             description: "GFF attribute to be used as feature ID",
             group: "Common",
         }
+        mode: {
+            description: "Mode to handle reads overlapping more than one feature. `union` is recommended for most use-cases.",
+            external_help: "https://htseq.readthedocs.io/en/latest/htseqcount.html#htseq-count-counting-reads-within-features",
+            choices: [
+                "union",
+                "intersection-strict",
+                "intersection-nonempty",
+            ],
+        }
         include_custom_header: {
             description: "Include a custom header for the output file? If true, the first line of the output file will be `~{idattr}\t~{prefix}`.",
             warning: "This is not an official feature of HTSeq. This may break downstream tools that expect the typical headerless HTSeq output format.",
@@ -70,6 +80,7 @@ task count {
         String prefix = basename(bam, ".bam")
         String feature_type = "exon"
         String idattr = "gene_name"
+        String mode = "union"
         Boolean include_custom_header = true
         Boolean pos_sorted = false
         Boolean nonunique = false
@@ -82,20 +93,13 @@ task count {
 
     String outfile_name = prefix + ".feature-counts.txt"
 
-    # the docs recommend this for most use cases, so we hardcode
-    String mode = "union"
+    Float bam_size = size(bam, "GB")
+    Float gtf_size = size(gtf, "GB")
 
-    Float bam_size = size(bam, "GiB")
-    Float gtf_size = size(gtf, "GiB")
+    Int memory_gb = (if pos_sorted then ceil(bam_size) + 4 else 4) + modify_memory_gb
 
-    Int memory_gb = (if pos_sorted
-        then ceil(bam_size) + 4
-        else 4
-    ) + modify_memory_gb
-
-    Int disk_size_gb = ceil((bam_size + gtf_size) * if pos_sorted
-        then 4
-        else 1
+    Int disk_size_gb = ceil(
+        (bam_size + gtf_size) * if pos_sorted then 4 else 1
     ) + 10 + modify_disk_size_gb
 
     command <<<
@@ -103,29 +107,23 @@ task count {
 
         if ~{include_custom_header}; then
             echo -e "~{idattr}\t~{prefix}" > "~{outfile_name}"
+        else
+            true > "~{outfile_name}"  # ensure file is empty
         fi
 
         # 9223372036854776000 == max 64 bit Float
         htseq-count -f bam \
             --max-reads-in-buffer 9223372036854776000 \
-            -r ~{if pos_sorted
-                then "pos"
-                else "name"
-            } \
+            -r ~{if pos_sorted then "pos" else "name"} \
             -s "~{strandedness}" \
             -a ~{minaqual} \
             -t "~{feature_type}" \
             -m "~{mode}" \
             -i "~{idattr}" \
-            --nonunique ~{if nonunique
-                then "all"
-                else "none"
-            } \
-            --secondary-alignments ~{if secondary_alignments
-                then "score"
-                else "ignore"
-            } \
-            --supplementary-alignments ~{(if supplementary_alignments
+            --nonunique ~{if nonunique then "all" else "none"} \
+            --secondary-alignments ~{if secondary_alignments then "score" else "ignore"} \
+            --supplementary-alignments ~{(
+                if supplementary_alignments
                 then "score"
                 else "ignore"
             )} \
@@ -150,7 +148,7 @@ task calc_tpm {
     meta {
         description: "Given a feature counts file and a feature lengths file, calculate Transcripts Per Million (TPM)"
         outputs: {
-            tpm_file: "Transcripts Per Million (TPM) file. A two column headered TSV file.",
+            tpm_file: "Transcripts Per Million (TPM) file. A two column headered TSV file."
         }
     }
 
@@ -182,10 +180,7 @@ task calc_tpm {
             "~{counts}" \
             "~{feature_lengths}" \
             "~{outfile_name}" \
-            ~{if has_header
-                then "--counts_has_header"
-                else ""
-            }
+            ~{if has_header then "--counts_has_header" else ""}
     >>>
 
     output {
diff --git a/tools/kraken2.wdl b/tools/kraken2.wdl
index 5d2081918..57fc6bd42 100644
--- a/tools/kraken2.wdl
+++ b/tools/kraken2.wdl
@@ -1,4 +1,5 @@
 ## [Homepage](https://github.com/DerrickWood/kraken2)
+
 version 1.1
 
 task download_taxonomy {
@@ -8,7 +9,7 @@ task download_taxonomy {
             taxonomy: {
                 description: "The NCBI taxonomy, which is needed by the `build_db` task.",
                 warning: "This output is not human-readable or meant for anything other than building a Kraken2 database.",
-            },
+            }
         }
     }
 
@@ -26,10 +27,7 @@ task download_taxonomy {
         set -euo pipefail
 
         kraken2-build --download-taxonomy \
-            ~{if protein
-                then "--protein"
-                else ""
-            } \
+            ~{if protein then "--protein" else ""} \
             --use-ftp \
             --db "~{db_name}" 2>&1 \
             | awk '/gunzip:/ { print; exit 42 } !/gunzip:/ { print }' 1>&2
@@ -60,7 +58,7 @@ task download_library {
             library: {
                 description: "A library of reference genomes, which is needed by the `build_db` task.",
                 warning: "This output is not human-readable or meant for anything other than building a Kraken2 database.",
-            },
+            }
         }
     }
 
@@ -99,24 +97,21 @@ task download_library {
     String db_name = "kraken2_" + library_name + "_library"
 
     #@ except: ExpressionSpacing
-    Int disk_size_gb = ((if library_name == "bacteria"
-        then 300
-        else if library_name == "nr"
-        then 600
-        else if library_name == "nt"
-        then 2500
-        else 25
-    ) + modify_disk_size_gb)
+    Int disk_size_gb = (
+        (
+            if library_name == "bacteria" then 300
+            else if library_name == "nr" then 600
+            else if library_name == "nt" then 2500
+            else 25
+        ) + modify_disk_size_gb
+    )
 
     command <<<
         set -euo pipefail
 
         kraken2-build --download-library \
             "~{library_name}" \
-            ~{if protein
-                then "--protein"
-                else ""
-            } \
+            ~{if protein then "--protein" else ""} \
             --use-ftp \
             --db "~{db_name}" 2>&1 \
             | awk '/gunzip:/ { print; exit 42 } !/gunzip:/ { print }' 1>&2
@@ -145,7 +140,7 @@ task create_library_from_fastas {
             custom_library: {
                 description: "Kraken2 compatible library, which is needed by the `build_db` task.",
                 warning: "This output is not human-readable or meant for anything other than building a Kraken2 database.",
-            },
+            }
         }
     }
 
@@ -166,7 +161,7 @@ task create_library_from_fastas {
 
     String db_name = "kraken2_custom_library"
 
-    Float fastas_size = size(fastas_gz, "GiB")
+    Float fastas_size = size(fastas_gz, "GB")
     Int disk_size_gb = ceil(fastas_size * 5) + 10 + modify_disk_size_gb
 
     command <<<
@@ -177,10 +172,7 @@ task create_library_from_fastas {
         while read -r fasta; do
             gunzip -c "$fasta" > tmp.fa
             kraken2-build \
-                ~{if protein
-                    then "--protein"
-                    else ""
-                } \
+                ~{if protein then "--protein" else ""} \
                 --add-to-library tmp.fa \
                 --db "~{db_name}"
         done < fastas.txt
@@ -208,7 +200,7 @@ task build_db {
     meta {
         description: "Builds a custom Kraken2 database"
         outputs: {
-            built_db: "A complete Kraken2 database",
+            built_db: "A complete Kraken2 database"
         }
     }
 
@@ -246,27 +238,24 @@ task build_db {
         String db_name = "kraken2_db"
         Boolean protein = false
         Boolean use_all_cores = false
-        Int kmer_len = if protein
-            then 15
-            else 35
-        Int minimizer_len = if protein
-            then 12
-            else 31
-        Int minimizer_spaces = if protein
-            then 0
-            else 7
+        Int kmer_len = if protein then 15 else 35
+        Int minimizer_len = if protein then 12 else 31
+        Int minimizer_spaces = if protein then 0 else 7
         Int max_db_size_gb = -1
         Int ncpu = 4
         Int modify_memory_gb = 0
         Int modify_disk_size_gb = 0
     }
 
-    Float tarballs_size = size(tarballs, "GiB")
+    Float tarballs_size = size(tarballs, "GB")
     Int disk_size_gb = ceil(tarballs_size * 6) + 10 + modify_disk_size_gb
-    Int memory_gb = ((if (max_db_size_gb > 0)
-        then ceil(max_db_size_gb * 1.2)
-        else ceil(tarballs_size * 2)
-    ) + modify_memory_gb)
+    Int memory_gb = (
+        (
+            if (max_db_size_gb > 0)
+            then ceil(max_db_size_gb * 1.2)
+            else ceil(tarballs_size * 2)
+        ) + modify_memory_gb
+    )
 
     String max_db_size_bytes = "~{max_db_size_gb}000000000"
 
@@ -288,14 +277,12 @@ task build_db {
 
         >&2 echo "*** start DB build ***"
         kraken2-build --build \
-            ~{if protein
-                then "--protein"
-                else ""
-            } \
+            ~{if protein then "--protein" else ""} \
             --kmer-len ~{kmer_len} \
             --minimizer-len ~{minimizer_len} \
             --minimizer-spaces ~{minimizer_spaces} \
-            ~{(if (max_db_size_gb > 0)
+            ~{(
+                if (max_db_size_gb > 0)
                 then "--max-db-size '" + max_db_size_bytes + "'"
                 else ""
             )} \
@@ -372,7 +359,9 @@ task kraken {
         File read_two_fastq_gz
         #@ except: InputName
         File db
-        String prefix = sub(basename(read_one_fastq_gz), "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$",
+        String prefix = sub(
+            basename(read_one_fastq_gz),
+            "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$",
             ""  # Once replacing with capturing groups is supported, replace with group 3
         )
         Boolean store_sequences = false
@@ -384,12 +373,14 @@ task kraken {
         Int modify_disk_size_gb = 0
     }
 
-    Float db_size = size(db, "GiB")
-    Float read1_size = size(read_one_fastq_gz, "GiB")
-    Float read2_size = size(read_two_fastq_gz, "GiB")
-    Int disk_size_gb_calculation = (ceil((db_size * 2) + read1_size + read2_size) + 10 + modify_disk_size_gb
+    Float db_size = size(db, "GB")
+    Float read1_size = size(read_one_fastq_gz, "GB")
+    Float read2_size = size(read_two_fastq_gz, "GB")
+    Int disk_size_gb_calculation = (
+        ceil((db_size * 2) + read1_size + read2_size) + 10 + modify_disk_size_gb
     )
-    Int disk_size_gb = (if store_sequences
+    Int disk_size_gb = (
+        if store_sequences
         then disk_size_gb_calculation + ceil(read1_size + read2_size)
         else disk_size_gb_calculation
     )
@@ -412,18 +403,12 @@ task kraken {
 
         kraken2 --db kraken2_db/ \
             --paired \
-            --output ~{if store_sequences
-                then "'" + out_sequences + "'"
-                else "-"
-            } \
+            --output ~{if store_sequences then "'" + out_sequences + "'" else "-"} \
             --threads "$n_cores" \
             --minimum-base-quality ~{min_base_quality} \
             --report "~{out_report}" \
             --report-zero-counts \
-            ~{if use_names
-                then "--use-names"
-                else ""
-            } \
+            ~{if use_names then "--use-names" else ""} \
             "~{read_one_fastq_gz}" \
             "~{read_two_fastq_gz}"
 
diff --git a/tools/librarian.wdl b/tools/librarian.wdl
index 88b2fd078..4d1d43755 100644
--- a/tools/librarian.wdl
+++ b/tools/librarian.wdl
@@ -1,4 +1,5 @@
 ## # librarian
+
 version 1.1
 
 task librarian {
@@ -23,14 +24,18 @@ task librarian {
 
     input {
         File read_one_fastq
-        String prefix = sub(basename(read_one_fastq), "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$",
+        String prefix = sub(
+            basename(read_one_fastq),
+            "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$",
             ""  # Once replacing with capturing groups is supported, replace with group 3
         ) + ".librarian"
         Int modify_disk_size_gb = 0
     }
 
-    Float read1_size = size(read_one_fastq, "GiB")
-    Int disk_size_gb = (ceil(read1_size) + 10 + modify_disk_size_gb)
+    Float read1_size = size(read_one_fastq, "GB")
+    Int disk_size_gb = (
+        ceil(read1_size) + 10 + modify_disk_size_gb
+    )
 
     command <<<
         set -euo pipefail
diff --git a/tools/md5sum.wdl b/tools/md5sum.wdl
index 8cb35f1dd..aeb8d7780 100755
--- a/tools/md5sum.wdl
+++ b/tools/md5sum.wdl
@@ -19,7 +19,7 @@ task compute_checksum {
         Int modify_disk_size_gb = 0
     }
 
-    Float file_size = size(file, "GiB")
+    Float file_size = size(file, "GB")
     Int disk_size_gb = ceil(file_size) + 10 + modify_disk_size_gb
 
     String outfile_name = basename(file) + ".md5"
diff --git a/tools/mosdepth.wdl b/tools/mosdepth.wdl
index fdf4775c9..63606083d 100644
--- a/tools/mosdepth.wdl
+++ b/tools/mosdepth.wdl
@@ -37,7 +37,7 @@ task coverage {
         Int modify_disk_size_gb = 0
     }
 
-    Float bam_size = size(bam, "GiB")
+    Float bam_size = size(bam, "GB")
     Int disk_size_gb = ceil(bam_size) + 10 + modify_disk_size_gb
 
     command <<<
diff --git a/tools/multiqc.wdl b/tools/multiqc.wdl
index 6ceb97ce8..4d095ccb1 100755
--- a/tools/multiqc.wdl
+++ b/tools/multiqc.wdl
@@ -28,7 +28,7 @@ task multiqc {
         Int modify_disk_size_gb = 0
     }
 
-    Float input_size = size(files, "GiB")
+    Float input_size = size(files, "GB")
     Int disk_size_gb = ceil(input_size) * 2 + modify_disk_size_gb
 
     command <<<
diff --git a/tools/ngsderive.wdl b/tools/ngsderive.wdl
index 72cb92ced..0cab3af85 100644
--- a/tools/ngsderive.wdl
+++ b/tools/ngsderive.wdl
@@ -1,4 +1,5 @@
 ## [Homepage](https://github.com/stjudecloud/ngsderive)
+
 version 1.1
 
 task strandedness {
@@ -46,7 +47,7 @@ task strandedness {
         Int modify_disk_size_gb = 0
     }
 
-    Float bam_size = size(bam, "GiB")
+    Float bam_size = size(bam, "GB")
     Int disk_size_gb = ceil(bam_size) + 10 + modify_disk_size_gb
 
     command <<<
@@ -60,10 +61,7 @@ task strandedness {
         ln -s "~{gene_model}" "$CWD_GFF"
 
         ngsderive strandedness --verbose \
-            ~{if split_by_rg
-                then "--split-by-rg"
-                else ""
-            } \
+            ~{if split_by_rg then "--split-by-rg" else ""} \
             -m ~{min_reads_per_gene} \
             -n ~{num_genes} \
             -q ~{min_mapq} \
@@ -119,7 +117,7 @@ task instrument {
         Int modify_disk_size_gb = 0
     }
 
-    Float bam_size = size(bam, "GiB")
+    Float bam_size = size(bam, "GB")
     Int disk_size_gb = ceil(bam_size) + 10 + modify_disk_size_gb
 
     command <<<
@@ -178,7 +176,7 @@ task read_length {
         Int modify_disk_size_gb = 0
     }
 
-    Float bam_size = size(bam, "GiB")
+    Float bam_size = size(bam, "GB")
     Int disk_size_gb = ceil(bam_size) + 10 + modify_disk_size_gb
 
     command <<<
@@ -235,7 +233,7 @@ task encoding {
         Int modify_disk_size_gb = 0
     }
 
-    Float files_size = size(ngs_files, "GiB")
+    Float files_size = size(ngs_files, "GB")
     Int disk_size_gb = ceil(files_size) + 10 + modify_disk_size_gb
 
     command <<<
@@ -303,7 +301,7 @@ task junction_annotation {
         Int modify_disk_size_gb = 0
     }
 
-    Float bam_size = size(bam, "GiB")
+    Float bam_size = size(bam, "GB")
     Int disk_size_gb = ceil(bam_size) + 10 + modify_disk_size_gb
 
     command <<<
@@ -399,31 +397,22 @@ task endedness {
         Int modify_disk_size_gb = 0
     }
 
-    Float bam_size = size(bam, "GiB")
-    Int memory_gb = (if calc_rpt
-        then (ceil(bam_size * 2.5) + 4 + modify_memory_gb)
+    Float bam_size = size(bam, "GB")
+    Int memory_gb = (
+        if calc_rpt
+        then (
+            ceil(bam_size * 2.5) + 4 + modify_memory_gb
+        )
         else 4
     )
     Int disk_size_gb = ceil(bam_size) + 10 + modify_disk_size_gb
 
     command <<<
         ngsderive endedness --verbose \
-            ~{if lenient
-                then "--lenient"
-                else ""
-            } \
-            ~{if calc_rpt
-                then "-r"
-                else ""
-            } \
-            ~{if round_rpt
-                then "--round-rpt"
-                else ""
-            } \
-            ~{if split_by_rg
-                then "--split-by-rg"
-                else ""
-            } \
+            ~{if lenient then "--lenient" else ""} \
+            ~{if calc_rpt then "-r" else ""} \
+            ~{if round_rpt then "--round-rpt" else ""} \
+            ~{if split_by_rg then "--split-by-rg" else ""} \
             --paired-deviance ~{paired_deviance} \
             -n ~{num_reads} \
             "~{bam}" \
diff --git a/tools/picard.wdl b/tools/picard.wdl
index 22570d083..fb3d49aad 100755
--- a/tools/picard.wdl
+++ b/tools/picard.wdl
@@ -82,7 +82,7 @@ task mark_duplicates {
         Int modify_disk_size_gb = 0
     }
 
-    Float bam_size = size(bam, "GiB")
+    Float bam_size = size(bam, "GB")
     Int memory_gb = min(ceil(bam_size + 12), 50) + modify_memory_gb
     Int disk_size_gb = (
         (
@@ -201,7 +201,7 @@ task validate_bam {
         then "--INDEX_VALIDATION_STRINGENCY LESS_EXHAUSTIVE"
         else ""
     )
-    Float bam_size = size(bam, "GiB")
+    Float bam_size = size(bam, "GB")
     Int disk_size_gb = ceil(bam_size) + 10 + modify_disk_size_gb
     Int java_heap_size = ceil(memory_gb * 0.9)
 
@@ -302,7 +302,7 @@ task sort {
         Int modify_disk_size_gb = 0
     }
 
-    Float bam_size = size(bam, "GiB")
+    Float bam_size = size(bam, "GB")
     Int disk_size_gb = ceil(bam_size * 4) + 10 + modify_disk_size_gb
     Int java_heap_size = ceil(memory_gb * 0.9)
 
@@ -391,7 +391,7 @@ task merge_sam_files {
         Int modify_disk_size_gb = 0
     }
 
-    Float bams_size = size(bams, "GiB")
+    Float bams_size = size(bams, "GB")
     Int disk_size_gb = ceil(bams_size * 2) + 10 + modify_disk_size_gb
     Int java_heap_size = ceil(memory_gb * 0.9)
 
@@ -464,7 +464,7 @@ task clean_sam {
         Int modify_disk_size_gb = 0
     }
 
-    Float bam_size = size(bam, "GiB")
+    Float bam_size = size(bam, "GB")
     Int disk_size_gb = ceil(bam_size * 2) + 10 + modify_disk_size_gb
     Int java_heap_size = ceil(memory_gb * 0.9)
 
@@ -536,7 +536,7 @@ task collect_wgs_metrics {
         Int modify_disk_size_gb = 0
     }
 
-    Float bam_size = size(bam, "GiB")
+    Float bam_size = size(bam, "GB")
     Int disk_size_gb = ceil(bam_size) + 10 + modify_disk_size_gb
     Int java_heap_size = ceil(memory_gb * 0.9)
 
@@ -599,7 +599,7 @@ task collect_alignment_summary_metrics {
         Int modify_disk_size_gb = 0
     }
 
-    Float bam_size = size(bam, "GiB")
+    Float bam_size = size(bam, "GB")
     Int disk_size_gb = ceil(bam_size) + 10 + modify_disk_size_gb
     Int java_heap_size = ceil(memory_gb * 0.9)
 
@@ -667,7 +667,7 @@ task collect_gc_bias_metrics {
         Int modify_disk_size_gb = 0
     }
 
-    Float bam_size = size(bam, "GiB")
+    Float bam_size = size(bam, "GB")
     Int disk_size_gb = ceil(bam_size) + 10 + modify_disk_size_gb
     Int java_heap_size = ceil(memory_gb * 0.9)
 
@@ -733,7 +733,7 @@ task collect_insert_size_metrics {
         Int modify_disk_size_gb = 0
     }
 
-    Float bam_size = size(bam, "GiB")
+    Float bam_size = size(bam, "GB")
     Int disk_size_gb = ceil(bam_size) + 10 + modify_disk_size_gb
     Int java_heap_size = ceil(memory_gb * 0.9)
 
@@ -792,7 +792,7 @@ task quality_score_distribution {
         Int modify_disk_size_gb = 0
     }
 
-    Float bam_size = size(bam, "GiB")
+    Float bam_size = size(bam, "GB")
     Int disk_size_gb = ceil(bam_size) + 10 + modify_disk_size_gb
     Int java_heap_size = ceil(memory_gb * 0.9)
 
@@ -844,7 +844,7 @@ task bam_to_fastq {
         Int modify_disk_size_gb = 0
     }
 
-    Float bam_size = size(bam, "GiB")
+    Float bam_size = size(bam, "GB")
     Int disk_size_gb = ceil(bam_size * 4) + 10 + modify_disk_size_gb
     Int java_heap_size = ceil(memory_gb * 0.9)
 
@@ -902,7 +902,7 @@ task merge_vcfs {
         Int modify_disk_size_gb = 0
     }
 
-    Int disk_size_gb = ceil(size(vcfs, "GiB") * 2) + 10 + modify_disk_size_gb
+    Int disk_size_gb = ceil(size(vcfs, "GB") * 2) + 10 + modify_disk_size_gb
 
     command <<<
         picard -Xms2000m \
@@ -1025,7 +1025,7 @@ task create_sequence_dictionary {
         Int modify_disk_size_gb = 0
     }
 
-    Float fasta_size = size(fasta, "GiB")
+    Float fasta_size = size(fasta, "GB")
     Int disk_size_gb = ceil(fasta_size * 2) + 10 + modify_disk_size_gb
     Int java_heap_size = ceil(memory_gb * 0.9)
 
diff --git a/tools/qualimap.wdl b/tools/qualimap.wdl
index 4aec8447d..59f10fd15 100755
--- a/tools/qualimap.wdl
+++ b/tools/qualimap.wdl
@@ -48,8 +48,8 @@ task rnaseq {
         else ""
 
     Int java_heap_size = ceil(memory_gb * 0.9)
-    Float bam_size = size(bam, "GiB")
-    Float gtf_size = size(gtf, "GiB")
+    Float bam_size = size(bam, "GB")
+    Float gtf_size = size(gtf, "GB")
 
     # Qualimap has an inefficient name sorting algorithm and will
     # use an excessive amount of storage.
@@ -124,7 +124,7 @@ task bamqc {
 
     Int java_heap_size = ceil(memory_gb * 0.9)
 
-    Float bam_size = size(bam, "GiB")
+    Float bam_size = size(bam, "GB")
     Int disk_size_gb = ceil(bam_size) + 10 + modify_disk_size_gb
 
     command <<<
diff --git a/tools/sambamba.wdl b/tools/sambamba.wdl
index ac1fe349b..f5e0eced0 100644
--- a/tools/sambamba.wdl
+++ b/tools/sambamba.wdl
@@ -29,7 +29,7 @@ task index {
         Int modify_disk_size_gb = 0
     }
 
-    Float bam_size = size(bam, "GiB")
+    Float bam_size = size(bam, "GB")
     Int disk_size_gb = ceil(bam_size * 1.2) + 10 + modify_disk_size_gb
 
     String outfile_name = basename(bam) + ".bai"
@@ -88,7 +88,7 @@ task merge {
         Int modify_disk_size_gb = 0
     }
 
-    Float bams_size = size(bams, "GiB")
+    Float bams_size = size(bams, "GB")
     Int disk_size_gb = ceil(bams_size * 2) + 10 + modify_disk_size_gb
 
     command <<<
@@ -145,7 +145,7 @@ task sort {
         Int ncpu = 2
     }
 
-    Float bam_size = size(bam, "GiB")
+    Float bam_size = size(bam, "GB")
     Int disk_size_gb = ceil(bam_size * 3) + 10 + modify_disk_size_gb
 
     String outfile_name = prefix + ".bam"
@@ -205,7 +205,7 @@ task markdup {
         Int ncpu = 2
     }
 
-    Float bam_size = size(bam, "GiB")
+    Float bam_size = size(bam, "GB")
     Int disk_size_gb = ceil((bam_size * 2) + 10) + modify_disk_size_gb
 
     command <<<
@@ -265,7 +265,7 @@ task flagstat {
         Int modify_disk_size_gb = 0
     }
 
-    Float bam_size = size(bam, "GiB")
+    Float bam_size = size(bam, "GB")
     Int disk_size_gb = ceil(bam_size) + 10 + modify_disk_size_gb
 
     command <<<
diff --git a/tools/samtools.wdl b/tools/samtools.wdl
index b75864496..05d75f414 100755
--- a/tools/samtools.wdl
+++ b/tools/samtools.wdl
@@ -1,4 +1,5 @@
 ## [Homepage](http://samtools.sourceforge.net/)
+
 version 1.1
 
 import "../data_structures/flag_filter.wdl"
@@ -19,7 +20,7 @@ task quickcheck {
         Int modify_disk_size_gb = 0
     }
 
-    Float bam_size = size(bam, "GiB")
+    Float bam_size = size(bam, "GB")
     Int disk_size_gb = ceil(bam_size) + 10 + modify_disk_size_gb
 
     command <<<
@@ -37,7 +38,7 @@ task split {
     meta {
         description: "Runs Samtools split on the input BAM file. This splits the BAM by read group into one or more output files."
         outputs: {
-            split_bams: "The split BAM files. The extensions will contain read group IDs, and will end in `.bam`.",
+            split_bams: "The split BAM files. The extensions will contain read group IDs, and will end in `.bam`."
         }
     }
 
@@ -76,7 +77,7 @@ task split {
         Int modify_disk_size_gb = 0
     }
 
-    Float bam_size = size(bam, "GiB")
+    Float bam_size = size(bam, "GB")
     Int disk_size_gb = ceil(bam_size * 2) + 10 + modify_disk_size_gb
 
     command <<<
@@ -130,12 +131,12 @@ task split {
                 rm first_read.sam
             done
         fi
-
+        
         exit $EXITCODE
     >>>
 
     output {
-        Array[File] split_bams = glob("*.bam")
+       Array[File] split_bams = glob("*.bam")
     }
 
     runtime {
@@ -151,7 +152,7 @@ task flagstat {
     meta {
         description: "Produces a `samtools flagstat` report containing statistics about the alignments based on the bit flags set in the BAM"
         outputs: {
-            flagstat_report: "`samtools flagstat` STDOUT redirected to a file",
+            flagstat_report: "`samtools flagstat` STDOUT redirected to a file"
         }
     }
 
@@ -177,7 +178,7 @@ task flagstat {
         Int modify_disk_size_gb = 0
     }
 
-    Float bam_size = size(bam, "GiB")
+    Float bam_size = size(bam, "GB")
     Int disk_size_gb = ceil(bam_size) + 10 + modify_disk_size_gb
 
     command <<<
@@ -194,7 +195,7 @@ task flagstat {
     >>>
 
     output {
-        File flagstat_report = outfile_name
+       File flagstat_report = outfile_name
     }
 
     runtime {
@@ -209,7 +210,7 @@ task index {
     meta {
         description: "Creates a `.bai` BAM index for the input BAM"
         outputs: {
-            bam_index: "A `.bai` BAM index associated with the input BAM. Filename will be `basename(bam) + '.bai'`.",
+            bam_index: "A `.bai` BAM index associated with the input BAM. Filename will be `basename(bam) + '.bai'`."
         }
     }
 
@@ -233,7 +234,7 @@ task index {
         Int modify_disk_size_gb = 0
     }
 
-    Float bam_size = size(bam, "GiB")
+    Float bam_size = size(bam, "GB")
     Int disk_size_gb = ceil(bam_size * 1.2) + 10 + modify_disk_size_gb
 
     String outfile_name = basename(bam) + ".bai"
@@ -303,7 +304,7 @@ task subsample {
 
     String suffixed = prefix + ".sampled"
 
-    Float bam_size = size(bam, "GiB")
+    Float bam_size = size(bam, "GB")
     Int disk_size_gb = ceil(bam_size * 2) + 10 + modify_disk_size_gb
 
     command <<<
@@ -396,6 +397,7 @@ task subsample {
             fi
             rm first_read.sam
         fi
+
     >>>
 
     output {
@@ -417,7 +419,7 @@ task filter {
         description: "Filters a BAM based on its bitwise flag value."
         help: "This task is a wrapper around `samtools view`. This task will fail if there are no reads in the output BAM. This can happen either because the input BAM was empty or because the supplied `bitwise_filter` was too strict. If you want to down-sample a BAM, use the `subsample` task instead."
         outputs: {
-            filtered_bam: "BAM file that has been filtered based on the input flags",
+            filtered_bam: "BAM file that has been filtered based on the input flags"
         }
     }
 
@@ -445,7 +447,7 @@ task filter {
         Int modify_disk_size_gb = 0
     }
 
-    Float bam_size = size(bam, "GiB")
+    Float bam_size = size(bam, "GB")
     Int disk_size_gb = ceil(bam_size * 2) + 10 + modify_disk_size_gb
 
     command <<<
@@ -503,7 +505,7 @@ task merge {
     meta {
         description: "Merges multiple sorted BAMs into a single BAM"
         outputs: {
-            merged_bam: "The BAM resulting from merging all the input BAMs",
+            merged_bam: "The BAM resulting from merging all the input BAMs"
         }
     }
 
@@ -558,8 +560,8 @@ task merge {
         Int modify_disk_size_gb = 0
     }
 
-    Float bams_size = size(bams, "GiB")
-    Float header_size = size(new_header, "GiB")
+    Float bams_size = size(bams, "GB")
+    Float header_size = size(new_header, "GB")
     Int disk_size_gb = ceil(bams_size * 2 + header_size) + 10 + modify_disk_size_gb
 
     command <<<
@@ -582,26 +584,11 @@ task merge {
         samtools merge \
             --threads "$n_cores" \
             ~{"-h \"" + new_header + "\""} \
-            ~{if name_sorted
-                then "-n"
-                else ""
-            } \
-            ~{if (region != "")
-                then "-R \"" + region + "\""
-                else ""
-            } \
-            ~{if attach_rg
-                then "-r"
-                else ""
-            } \
-            ~{if combine_rg
-                then "-c"
-                else ""
-            } \
-            ~{if combine_pg
-                then "-p"
-                else ""
-            } \
+            ~{if name_sorted then "-n" else ""} \
+            ~{if (region != "") then "-R \"" + region + "\"" else ""} \
+            ~{if attach_rg then "-r" else ""} \
+            ~{if combine_rg then "-c" else ""} \
+            ~{if combine_pg then "-p" else ""} \
             "~{prefix}.bam" \
             "${bams[@]}"
 
@@ -626,7 +613,7 @@ task addreplacerg {
     meta {
         description: "Adds or replaces read group tags"
         outputs: {
-            tagged_bam: "The transformed input BAM after read group modifications have been applied",
+            tagged_bam: "The transformed input BAM after read group modifications have been applied"
         }
     }
 
@@ -671,7 +658,7 @@ task addreplacerg {
         Int modify_disk_size_gb = 0
     }
 
-    Float bam_size = size(bam, "GiB")
+    Float bam_size = size(bam, "GB")
     Int disk_size_gb = ceil(bam_size * 2) + 10 + modify_disk_size_gb
 
     String outfile_name = prefix + ".bam"
@@ -690,14 +677,8 @@ task addreplacerg {
             --threads "$n_cores" \
             ~{sep(" ", prefix("-r ", squote(read_group_line)))} \
             ~{"-R \"" + read_group_id + "\""} \
-            -m ~{if orphan_only
-                then "orphan_only"
-                else "overwrite_all"
-            } \
-            ~{if overwrite_header_record
-                then "-w"
-                else ""
-            } \
+            -m ~{if orphan_only then "orphan_only" else "overwrite_all"} \
+            ~{if overwrite_header_record then "-w" else ""} \
             -o "~{outfile_name}" \
             "~{bam}"
     >>>
@@ -719,7 +700,7 @@ task collate {
     meta {
         description: "Runs `samtools collate` on the input BAM file. Shuffles and groups reads together by their names."
         outputs: {
-            collated_bam: "A collated BAM (reads sharing a name next to each other, no other guarantee of sort order)",
+            collated_bam: "A collated BAM (reads sharing a name next to each other, no other guarantee of sort order)"
         }
     }
 
@@ -752,7 +733,7 @@ task collate {
         Int modify_disk_size_gb = 0
     }
 
-    Float bam_size = size(bam, "GiB")
+    Float bam_size = size(bam, "GB")
     Int memory_gb = ceil(bam_size * 0.2) + 4 + modify_memory_gb
     Int disk_size_gb = ceil(bam_size * 4) + 10 + modify_disk_size_gb
 
@@ -770,10 +751,7 @@ task collate {
 
         samtools collate \
             --threads "$n_cores" \
-            ~{if fast_mode
-                then "-f"
-                else ""
-            } \
+            ~{if fast_mode then "-f" else ""} \
             -o "~{outfile_name}" \
             "~{bam}"
     >>>
@@ -875,13 +853,14 @@ task bam_to_fastq {
         Int modify_disk_size_gb = 0
     }
 
-    Float bam_size = size(bam, "GiB")
-    Int memory_gb = (if (collated || !paired_end)
+    Float bam_size = size(bam, "GB")
+    Int memory_gb = (
+        if (collated || !paired_end)
         then 4
         else (ceil(bam_size * 0.4) + 4)
     ) + modify_memory_gb
-    Int disk_size_gb = ceil(bam_size * (if (retain_collated_bam && !collated && paired_end
-    )
+    Int disk_size_gb = ceil(bam_size * (
+        if (retain_collated_bam && !collated && paired_end)
         then 5
         else 2
     )) + 10 + modify_disk_size_gb
@@ -899,18 +878,13 @@ task bam_to_fastq {
         mkfifo bam_pipe
         if ! ~{collated} && ~{paired_end}; then
             samtools collate \
-                ~{if retain_collated_bam
-                    then ""
-                    else "-u"
-                } \
+                ~{if retain_collated_bam then "" else "-u"} \
                 --threads "$n_cores" \
-                ~{if fast_mode
-                    then "-f"
-                    else ""
-                } \
+                ~{if fast_mode then "-f" else ""} \
                 -O \
                 "~{bam}" \
-                | tee ~{(if retain_collated_bam
+                | tee ~{(
+                    if retain_collated_bam
                     then "\"" + prefix + ".collated.bam\""
                     else ""
                 )} \
@@ -926,26 +900,32 @@ task bam_to_fastq {
             -F "~{bitwise_filter.exclude_if_any}" \
             --rf "~{bitwise_filter.include_if_any}" \
             -G "~{bitwise_filter.exclude_if_all}" \
-            ~{(if append_read_number
+            ~{(
+                if append_read_number
                 then "-N"
                 else "-n"
             )} \
-            -1 ~{(if paired_end
+            -1 ~{(
+                if paired_end
                 then "\"" + prefix + ".R1.fastq.gz\""
                 else "\"" + prefix + ".fastq.gz\""
             )} \
-            -2 ~{(if paired_end
+            -2 ~{(
+                if paired_end
                 then "\"" + prefix + ".R2.fastq.gz\""
                 else "\"" + prefix + ".fastq.gz\""
             )} \
-            ~{(if paired_end
-                then (if output_singletons
+            ~{(
+                if paired_end
+                then (
+                    if output_singletons
                     then "-s \"" + prefix + ".singleton.fastq.gz\""
                     else "-s junk.singleton.fastq.gz"
                 )
                 else ""
             )} \
-            -0 ~{(if paired_end
+            -0 ~{(
+                if paired_end
                 then "junk.unknown_bit_setting.fastq.gz"
                 else "\"" + prefix + ".fastq.gz\""
             )} \
@@ -991,7 +971,7 @@ task fixmate {
         description: "Runs `samtools fixmate` on the input BAM file. This fills in mate coordinates and insert size fields among other tags and fields."
         warning: "This task assumes a name-sorted or name-collated input BAM. If you have a position-sorted BAM, please use the `position_sorted_fixmate` task."
         outputs: {
-            fixmate_bam: "The BAM resulting from running `samtools fixmate` on the input BAM",
+            fixmate_bam: "The BAM resulting from running `samtools fixmate` on the input BAM"
         }
     }
 
@@ -1047,7 +1027,7 @@ task fixmate {
         Int modify_disk_size_gb = 0
     }
 
-    Float bam_size = size(bam, "GiB")
+    Float bam_size = size(bam, "GB")
     Int disk_size_gb = ceil(bam_size * 2) + 10 + modify_disk_size_gb
 
     command <<<
@@ -1062,26 +1042,11 @@ task fixmate {
 
         samtools fixmate \
             --threads "$n_cores" \
-            ~{if remove_unaligned_and_secondary
-                then "-r"
-                else ""
-            } \
-            ~{if disable_proper_pair_check
-                then "-p"
-                else ""
-            } \
-            ~{if add_cigar
-                then "-c"
-                else ""
-            } \
-            ~{if add_mate_score
-                then "-m"
-                else ""
-            } \
-            ~{if disable_flag_sanitization
-                then "-z off"
-                else ""
-            } \
+            ~{if remove_unaligned_and_secondary then "-r" else ""} \
+            ~{if disable_proper_pair_check then "-p" else ""} \
+            ~{if add_cigar then "-c" else ""} \
+            ~{if add_mate_score then "-m" else ""} \
+            ~{if disable_flag_sanitization then "-z off" else ""} \
             "~{bam}" \
             "~{prefix}~{extension}"
     >>>
@@ -1105,7 +1070,7 @@ task position_sorted_fixmate {
         warning: "If you already have a collated BAM, please use the `fixmate` task."
         help: "`fixmate` fills in mate coordinates and insert size fields among other tags and fields. This task collates the input BAM, runs `fixmate`, and then resorts the output into a position-sorted BAM."
         outputs: {
-            fixmate_bam: "BAM file with mate information added",
+            fixmate_bam: "BAM file with mate information added"
         }
     }
 
@@ -1156,7 +1121,7 @@ task position_sorted_fixmate {
         Int modify_disk_size_gb = 0
     }
 
-    Float bam_size = size(bam, "GiB")
+    Float bam_size = size(bam, "GB")
     Int memory_gb = ceil(bam_size * 0.2) + 4 + modify_memory_gb
     Int disk_size_gb = ceil(bam_size * 2) + 10 + modify_disk_size_gb
 
@@ -1172,36 +1137,18 @@ task position_sorted_fixmate {
 
         samtools collate \
             --threads "$n_cores" \
-            ~{if fast_mode
-                then "-f"
-                else ""
-            } \
+            ~{if fast_mode then "-f" else ""} \
             -u \
             -O \
             "~{bam}" \
             | samtools fixmate \
                 --threads "$n_cores" \
                 -u \
-                ~{if remove_unaligned_and_secondary
-                    then "-r"
-                    else ""
-                } \
-                ~{if disable_proper_pair_check
-                    then "-p"
-                    else ""
-                } \
-                ~{if add_cigar
-                    then "-c"
-                    else ""
-                } \
-                ~{if add_mate_score
-                    then "-m"
-                    else ""
-                } \
-                ~{if disable_flag_sanitization
-                    then "-z off"
-                    else ""
-                } \
+                ~{if remove_unaligned_and_secondary then "-r" else ""} \
+                ~{if disable_proper_pair_check then "-p" else ""} \
+                ~{if add_cigar then "-c" else ""} \
+                ~{if add_mate_score then "-m" else ""} \
+                ~{if disable_flag_sanitization then "-z off" else ""} \
                 - \
                 - \
                 | samtools sort \
@@ -1316,7 +1263,7 @@ task markdup {
         Int modify_disk_size_gb = 0
     }
 
-    Float bam_size = size(bam, "GiB")
+    Float bam_size = size(bam, "GB")
     Int memory_gb = ceil(bam_size * 3) + 4 + modify_memory_gb
     Int disk_size_gb = ceil(bam_size * 2) + 10 + modify_disk_size_gb
 
@@ -1332,54 +1279,25 @@ task markdup {
 
         samtools markdup \
             --threads "$n_cores" \
-            -f "~{prefix + if json
-                then ".json"
-                else ".txt"
-            }" \
+            -f "~{prefix + if json then ".json" else ".txt"}" \
             --read-coords '~{read_coords_regex}' \
             --coords-order "~{coordinates_order}" \
-            ~{if remove_duplicates
-                then "-r"
-                else ""
-            } \
-            ~{if mark_supp_or_sec_or_unmapped_as_duplicates
-                then "-S"
-                else ""
-            } \
-            ~{if mark_duplicates_with_do_tag
-                then "-t"
-                else ""
-            } \
-            ~{if duplicate_count
-                then "--duplicate-count"
-                else ""
-            } \
-            ~{if include_qc_fails
-                then "--include-fails"
-                else ""
-            } \
-            ~{if duplicates_of_duplicates_check
-                then ""
-                else "--no-multi-dup"
-            } \
-            ~{if use_read_groups
-                then "--use-read-groups"
-                else ""
-            } \
+            ~{if remove_duplicates then "-r" else ""} \
+            ~{if mark_supp_or_sec_or_unmapped_as_duplicates then "-S" else ""} \
+            ~{if mark_duplicates_with_do_tag then "-t" else ""} \
+            ~{if duplicate_count then "--duplicate-count" else ""} \
+            ~{if include_qc_fails then "--include-fails" else ""} \
+            ~{if duplicates_of_duplicates_check then "" else "--no-multi-dup"} \
+            ~{if use_read_groups then "--use-read-groups" else ""} \
             -l ~{max_readlen} \
             -d ~{optical_distance} \
             -c \
             "~{bam}" \
-            "~{if create_bam
-                then prefix + ".bam"
-                else "/dev/null"
-            }"
+            "~{if create_bam then prefix + ".bam" else "/dev/null"}"
     >>>
 
     output {
-        File markdup_report = prefix + if json
-            then ".json"
-            else ".txt"
+        File markdup_report = prefix + if json then ".json" else ".txt"
         File? markdup_bam = prefix + ".bam"
     }
 
@@ -1396,7 +1314,7 @@ task faidx {
     meta {
         description: "Creates a `.fai` FASTA index for the input FASTA"
         outputs: {
-            fasta_index: "A `.fai` FASTA index associated with the input FASTA. Filename will be `basename(fasta) + '.fai'`.",
+            fasta_index: "A `.fai` FASTA index associated with the input FASTA. Filename will be `basename(fasta) + '.fai'`."
         }
     }
 
@@ -1410,7 +1328,7 @@ task faidx {
         Int modify_disk_size_gb = 0
     }
 
-    Float fasta_size = size(fasta, "GiB")
+    Float fasta_size = size(fasta, "GB")
     Int disk_size_gb = ceil(fasta_size * 2.5) + 10 + modify_disk_size_gb
 
     String outfile_name = basename(fasta, ".gz") + ".fai"
diff --git a/tools/star.wdl b/tools/star.wdl
index 8b77ced1b..920e34a19 100755
--- a/tools/star.wdl
+++ b/tools/star.wdl
@@ -1,11 +1,12 @@
 ## [Homepage](https://github.com/alexdobin/STAR)
+
 version 1.1
 
 task build_star_db {
     meta {
         description: "Runs STAR's build command to generate a STAR format reference for alignment"
         outputs: {
-            star_db: "A gzipped TAR file containing the STAR reference files. Suitable as the `star_db_tar_gz` input to the `alignment` task.",
+            star_db: "A gzipped TAR file containing the STAR reference files. Suitable as the `star_db_tar_gz` input to the `alignment` task."
         }
     }
 
@@ -83,9 +84,10 @@ task build_star_db {
 
     String star_db_tar_gz = db_name + ".tar.gz"
 
-    Float reference_fasta_size = size(reference_fasta, "GiB")
-    Float gtf_size = size(gtf, "GiB")
-    Int disk_size_gb = (ceil((reference_fasta_size + gtf_size) * 3) + 10 + modify_disk_size_gb
+    Float reference_fasta_size = size(reference_fasta, "GB")
+    Float gtf_size = size(gtf, "GB")
+    Int disk_size_gb = (
+        ceil((reference_fasta_size + gtf_size) * 3) + 10 + modify_disk_size_gb
     )
 
     # Leave 2GB as system overhead
@@ -556,11 +558,7 @@ task alignment {
         Array[File] read_one_fastqs_gz
         Array[String] read_groups
         Array[File]? read_two_fastqs_gz
-        Array[Int] out_sj_filter_intron_max_vs_read_n = [
-            50000,
-            100000,
-            200000,
-        ]
+        Array[Int] out_sj_filter_intron_max_vs_read_n = [50000, 100000, 200000]
         SpliceJunctionMotifs out_sj_filter_overhang_min = SpliceJunctionMotifs {
             noncanonical_motifs: 30,
             GT_AG_and_CT_AC_motif: 12,
@@ -597,7 +595,9 @@ task alignment {
         Pair[Int, Int] clip_3p_n_bases = (0, 0)
         Pair[Int, Int] clip_3p_after_adapter_n_bases = (0, 0)
         Pair[Int, Int] clip_5p_n_bases = (0, 0)
-        String prefix = sub(basename(read_one_fastqs_gz[0]), "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$",
+        String prefix = sub(
+            basename(read_one_fastqs_gz[0]),
+            "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$",
             ""  # Once replacing with capturing groups is supported, replace with group 3
         )
         String read_name_separator = "/"
@@ -699,16 +699,16 @@ task alignment {
         Int modify_disk_size_gb = 0
     }
 
-    Array[File] read_twos = select_first([
-        read_two_fastqs_gz,
-        [],
-    ])
+    Array[File] read_twos = select_first([read_two_fastqs_gz, []])
 
-    Float read_one_fastqs_size = size(read_one_fastqs_gz, "GiB")
-    Float read_two_fastqs_size = size(read_twos, "GiB")
-    Float star_db_tar_gz_size = size(star_db_tar_gz, "GiB")
-    Int disk_size_gb = ((ceil(read_one_fastqs_size + read_two_fastqs_size + star_db_tar_gz_size
-    ) * 3) + 10 + modify_disk_size_gb)
+    Float read_one_fastqs_size = size(read_one_fastqs_gz, "GB")
+    Float read_two_fastqs_size = size(read_twos, "GB")
+    Float star_db_tar_gz_size = size(star_db_tar_gz, "GB")
+    Int disk_size_gb = (
+        (
+            ceil(read_one_fastqs_size + read_two_fastqs_size + star_db_tar_gz_size) * 3
+        ) + 10 + modify_disk_size_gb
+    )
 
     command <<<
         set -euo pipefail
@@ -733,9 +733,9 @@ task alignment {
             --outFileNamePrefix "~{prefix + "."}" \
             --twopassMode "~{twopass_mode}" \
             --outSAMattrRGline ~{sep(" , ", read_groups)} \
-            --outSJfilterIntronMaxVsReadN ~{sep(" ", quote(
-                out_sj_filter_intron_max_vs_read_n
-            ))} \
+            --outSJfilterIntronMaxVsReadN ~{
+                sep(" ", quote(out_sj_filter_intron_max_vs_read_n))
+            } \
             --outSJfilterOverhangMin ~{sep(" ", quote([
                 out_sj_filter_overhang_min.noncanonical_motifs,
                 out_sj_filter_overhang_min.GT_AG_and_CT_AC_motif,
@@ -766,31 +766,33 @@ task alignment {
                 align_sj_stitch_mismatch_n_max.GC_AG_and_CT_GC_motif,
                 align_sj_stitch_mismatch_n_max.AT_AC_and_GT_AT_motif,
             ]))} \
-            --clip3pAdapterSeq "~{clip_3p_adapter_seq.left}" ~{(if (length(read_twos) != 0
-            )
+            --clip3pAdapterSeq "~{clip_3p_adapter_seq.left}" ~{(
+                if (length(read_twos) != 0)
                 then "'" + clip_3p_adapter_seq.right + "'"
                 else ""
             )} \
-            --clip3pAdapterMMp ~{clip_3p_adapter_mmp.left} ~{(if (length(read_twos) != 0)
+            --clip3pAdapterMMp ~{clip_3p_adapter_mmp.left} ~{(
+                if (length(read_twos) != 0)
                 then clip_3p_adapter_mmp.right
                 else None
             )} \
-            --alignEndsProtrude ~{align_ends_protrude.left} "~{(if (length(read_twos) != 0
-            )
+            --alignEndsProtrude ~{align_ends_protrude.left} "~{(
+                if (length(read_twos) != 0)
                 then align_ends_protrude.right
                 else None
             )}" \
-            --clip3pNbases ~{clip_3p_n_bases.left} ~{(if (length(read_twos) != 0)
+            --clip3pNbases ~{clip_3p_n_bases.left} ~{(
+                if (length(read_twos) != 0)
                 then clip_3p_n_bases.right
                 else None
             )} \
-            --clip3pAfterAdapterNbases ~{clip_3p_after_adapter_n_bases.left} ~{(if (length(
-                read_twos
-            ) != 0)
+            --clip3pAfterAdapterNbases ~{clip_3p_after_adapter_n_bases.left} ~{(
+                if (length(read_twos) != 0)
                 then clip_3p_after_adapter_n_bases.right
                 else None
             )} \
-            --clip5pNbases ~{clip_5p_n_bases.left} ~{(if (length(read_twos) != 0)
+            --clip5pNbases ~{clip_5p_n_bases.left} ~{(
+                if (length(read_twos) != 0)
                 then clip_5p_n_bases.right
                 else None
             )} \
@@ -801,11 +803,11 @@ task alignment {
             --outSAMunmapped "~{out_sam_unmapped}" \
             --outSAMorder "~{out_sam_order}" \
             --outSAMreadID "~{out_sam_read_id}" \
-            --outSAMtlen ~{(if (out_sam_tlen == "left_plus")
+            --outSAMtlen ~{(
+                if (out_sam_tlen == "left_plus")
                 then "1"
-                else (if (out_sam_tlen == "left_any")
-                    then "2"
-                    else "error"
+                else (
+                    if (out_sam_tlen == "left_any") then "2" else "error"
                 )
             )} \
             --outFilterType "~{out_filter_type}" \
diff --git a/tools/util.wdl b/tools/util.wdl
index ed1b0ab71..3e59b5384 100644
--- a/tools/util.wdl
+++ b/tools/util.wdl
@@ -110,7 +110,7 @@ task calc_feature_lengths {
         Int modify_disk_size_gb = 0
     }
 
-    Float gtf_size = size(gtf, "GiB")
+    Float gtf_size = size(gtf, "GB")
     Int disk_size_gb = ceil(gtf_size * 2) + 10 + modify_disk_size_gb
 
     command <<<
@@ -147,7 +147,7 @@ task compression_integrity {
         Int modify_disk_size_gb = 0
     }
 
-    Float file_size = size(bgzipped_file, "GiB")
+    Float file_size = size(bgzipped_file, "GB")
     Int disk_size_gb = ceil(file_size) + 10 + modify_disk_size_gb
 
     command <<<
@@ -186,7 +186,7 @@ task add_to_bam_header {
         Int modify_disk_size_gb = 0
     }
 
-    Float bam_size = size(bam, "GiB")
+    Float bam_size = size(bam, "GB")
     Int disk_size_gb = ceil(bam_size) + 10 + modify_disk_size_gb
 
     String outfile_name = prefix + ".bam"
@@ -228,7 +228,7 @@ task unpack_tarball {
         Int modify_disk_size_gb = 0
     }
 
-    Float tarball_size = size(tarball, "GiB")
+    Float tarball_size = size(tarball, "GB")
     Int disk_size_gb = ceil(tarball_size * 8) + modify_disk_size_gb
 
     command <<<
@@ -286,7 +286,7 @@ task make_coverage_regions_bed {
         Int modify_disk_size_gb = 0
     }
 
-    Float gtf_size = size(gtf, "GiB")
+    Float gtf_size = size(gtf, "GB")
     Int disk_size_gb = ceil(gtf_size * 1.2) + 10 + modify_disk_size_gb
 
     command <<<
@@ -331,7 +331,7 @@ task global_phred_scores {
         Int modify_disk_size_gb = 0
     }
 
-    Float bam_size = size(bam, "GiB")
+    Float bam_size = size(bam, "GB")
     Int disk_size_gb = ceil(bam_size) + 10 + modify_disk_size_gb
 
     String outfile_name = prefix + ".global_PHRED_scores.tsv"
@@ -437,7 +437,7 @@ task split_fastq {
         Int ncpu = 2
     }
 
-    Float fastq_size = size(fastq, "GiB")
+    Float fastq_size = size(fastq, "GB")
     Int disk_size_gb = ceil(fastq_size * 5) + 10 + modify_disk_size_gb
 
     command <<<
diff --git a/workflows/methylation/CHANGELOG.md b/workflows/methylation/CHANGELOG.md
index 6ab572cd9..1eac10878 100644
--- a/workflows/methylation/CHANGELOG.md
+++ b/workflows/methylation/CHANGELOG.md
@@ -13,6 +13,10 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/).
 - Pipeline writes out list of probes that are affected by SNPs [#283](https://github.com/stjudecloud/workflows/pull/283)
 - Pipeline writes out list of probes that were filtered for not mapping to genomic coordinates [#283](https://github.com/stjudecloud/workflows/pull/283)
 
+### Changed
+
+- Converted size checks to GB from GiB [#302](https://github.com/stjudecloud/workflows/pull/302)
+
 ## 2025 September
 
 ### Added
diff --git a/workflows/methylation/methylation-cohort.wdl b/workflows/methylation/methylation-cohort.wdl
index 743e9ae97..16e343e4c 100644
--- a/workflows/methylation/methylation-cohort.wdl
+++ b/workflows/methylation/methylation-cohort.wdl
@@ -38,20 +38,21 @@ workflow methylation_cohort {
     Int beta_length = length(unfiltered_normalized_beta)
     Int pval_length = length(p_values)
 
-    if (beta_length > max_length) {
-        scatter (merge_num in range((beta_length / max_length) + 1)) {
+    if (beta_length > max_length){
+        scatter (merge_num in range((beta_length / max_length) + 1)){
             # Get the sublist of beta files
-            scatter (beta_num in range(max_length)) {
-                Int num = (if merge_num > 0
+            scatter (beta_num in range(max_length)){
+                Int num = (
+                    if merge_num > 0
                     then beta_num + (merge_num * max_length)
                     else beta_num
                 )
-                if (num < beta_length) {
+                if (num < beta_length){
                     File bam_list = unfiltered_normalized_beta[num]
                 }
             }
         }
-        scatter (iter_index in range(length(bam_list))) {
+        scatter (iter_index in range(length(bam_list))){
             call combine_data as inner_merge { input:
                 files_to_combine = select_all(bam_list[iter_index]),
                 combined_file_name = "~{iter_index}.combined.csv",
@@ -63,21 +64,22 @@ workflow methylation_cohort {
             combined_file_name = "combined_beta.csv",
         }
 
-        if (pval_length > 0 && !skip_pvalue_check) {
+        if (pval_length > 0 && !skip_pvalue_check){
             # If p-values are provided, merge those as well
-            scatter (merge_num in range((pval_length / max_length) + 1)) {
+            scatter (merge_num in range((pval_length / max_length) + 1)){
                 # Get the sublist of p-value files
-                scatter (pval_num in range(max_length)) {
-                    Int num_p = (if merge_num > 0
+                scatter (pval_num in range(max_length)){
+                    Int num_p = (
+                        if merge_num > 0
                         then pval_num + (merge_num * max_length)
                         else pval_num
                     )
-                    if (num_p < pval_length) {
+                    if (num_p < pval_length){
                         File pval_list = p_values[num_p]
                     }
                 }
             }
-            scatter (iter_index in range(length(pval_list))) {
+            scatter (iter_index in range(length(pval_list))){
                 call combine_data as inner_merge_pvals { input:
                     files_to_combine = select_all(pval_list[iter_index]),
                     combined_file_name = "~{iter_index}.pvals.combined.csv",
@@ -91,12 +93,12 @@ workflow methylation_cohort {
         }
     }
 
-    if (beta_length <= max_length) {
+    if (beta_length <= max_length){
         call combine_data as simple_merge { input:
             files_to_combine = unfiltered_normalized_beta,
             combined_file_name = "combined_beta.csv",
         }
-        if (pval_length > 0 && !skip_pvalue_check) {
+        if (pval_length > 0 && !skip_pvalue_check){
             call combine_data as simple_merge_pval { input:
                 files_to_combine = p_values,
                 combined_file_name = "combined_pvals.csv",
@@ -104,19 +106,23 @@ workflow methylation_cohort {
         }
     }
 
-    File? pval_file = (if (pval_length > 0 && !skip_pvalue_check)
-        then select_first([
-            final_merge_pvals.combined_file,
-            simple_merge_pval.combined_file,
-        ])
+    File? pval_file = (
+        if (pval_length > 0 && !skip_pvalue_check)
+        then select_first(
+            [
+                final_merge_pvals.combined_file,
+                simple_merge_pval.combined_file,
+            ])
         else None
     )
 
     call filter_probes { input:
-        beta_values = select_first([
-            final_merge.combined_file,
-            simple_merge.combined_file,
-        ]),
+        beta_values = select_first(
+            [
+                final_merge.combined_file,
+                simple_merge.combined_file,
+            ]
+        ),
         p_values = pval_file,
         num_probes,
         additional_probes_to_exclude = select_all([
@@ -134,10 +140,12 @@ workflow methylation_cohort {
     }
 
     output {
-        File combined_beta = select_first([
-            final_merge.combined_file,
-            simple_merge.combined_file,
-        ])
+        File combined_beta = select_first(
+            [
+                final_merge.combined_file,
+                simple_merge.combined_file,
+            ]
+        )
         File filtered_beta = filter_probes.filtered_beta_values
         File filtered_probeset = filter_probes.filtered_probes
         File umap_embedding = generate_umap.umap
@@ -151,7 +159,7 @@ task combine_data {
     meta {
         description: "Combine data from multiple CSV files by column"
         outputs: {
-            combined_file: "Combined CSV file",
+            combined_file: "Combined CSV file"
         }
     }
 
@@ -175,19 +183,16 @@ task combine_data {
         Int modify_memory_gb = 0
     }
 
-    Int memory_gb = ceil(size(files_to_combine, "GiB") * if simple_merge
-        then 2
-        else 1
-    ) + modify_memory_gb + 2
-    Int disk_size_gb = ceil(size(files_to_combine, "GiB") * 2) + 2
+    Int memory_gb = ceil(size(files_to_combine, "GB") *
+        if simple_merge then 2 else 1)
+        + modify_memory_gb
+        + 2
+    Int disk_size_gb = ceil(size(files_to_combine, "GB") * 2) + 2
 
     command <<<
         python /scripts/methylation/combine.py \
             --output-name "~{combined_file_name}" \
-            ~{if simple_merge
-                then "--simple-merge"
-                else ""
-            } \
+            ~{if simple_merge then "--simple-merge" else ""} \
             ~{sep(" ", quote(files_to_combine))}
     >>>
 
@@ -235,7 +240,7 @@ task filter_probes {
         Int num_probes = 10000
     }
 
-    Int disk_size_gb = ceil(size(beta_values, "GiB") * 2) + 2
+    Int disk_size_gb = ceil(size(beta_values, "GB") * 2) + 2
 
     command <<<
         python /scripts/methylation/filter.py \
@@ -268,7 +273,7 @@ task generate_umap {
     meta {
         description: "Generate UMAP embedding"
         outputs: {
-            umap: "UMAP embedding for all samples",
+            umap: "UMAP embedding for all samples"
         }
     }
 
@@ -282,7 +287,7 @@ task generate_umap {
         String prefix = "umap"
     }
 
-    Int disk_size_gb = ceil(size(filtered_beta_values, "GiB") * 2) + 2
+    Int disk_size_gb = ceil(size(filtered_beta_values, "GB") * 2) + 2
 
     command <<<
         python /scripts/methylation/generate_umap.py \
@@ -307,7 +312,7 @@ task plot_umap {
     meta {
         description: "Plot UMAP embedding"
         outputs: {
-            umap_plot: "UMAP plot for all samples",
+            umap_plot: "UMAP plot for all samples"
         }
     }
 

From 7c46ddb88fbe656743293619b43b7151cf896750 Mon Sep 17 00:00:00 2001
From: Ari Frantz <ari.frantz@stjude.org>
Date: Tue, 10 Mar 2026 16:42:57 -0400
Subject: [PATCH 40/47] sprocket format overwrite the repo (#303)

_Describe the problem or feature in addition to a link to the issues._

Before submitting this PR, please make sure:

- [ ] You have added a few sentences describing the PR here.
- [ ] The code passes all CI tests without any errors or warnings.
- [ ] You have added tests (when appropriate).
- [ ] You have added an entry in any relevant CHANGELOGs (when
appropriate).
- [ ] If you have made any changes to the `scripts/` or `docker/`
directories, please ensure any image versions have been incremented
accordingly!
- [ ] You have updated the README or other documentation to account for
these changes (when appropriate).
---
 tools/bwa.wdl                                 |  62 ++---
 tools/fq.wdl                                  |   8 +-
 tools/gatk4.wdl                               |   2 +-
 tools/htseq.wdl                               |  40 +++-
 tools/kraken2.wdl                             |  96 ++++----
 tools/librarian.wdl                           |   9 +-
 tools/ngsderive.wdl                           |  34 ++-
 tools/picard.wdl                              |  63 ++---
 tools/qualimap.wdl                            |   6 +-
 tools/samtools.wdl                            | 224 ++++++++++++------
 tools/star.wdl                                |  77 +++---
 tools/util.wdl                                |  28 +--
 workflows/methylation/methylation-cohort.wdl  |  84 +++----
 .../methylation/methylation-preprocess.wdl    |   8 +-
 .../methylation/methylation-standard.wdl      |  41 ++--
 workflows/qc/quality-check-standard.wdl       |   2 +-
 16 files changed, 439 insertions(+), 345 deletions(-)

diff --git a/tools/bwa.wdl b/tools/bwa.wdl
index 1d65d6df4..a27cb71a0 100644
--- a/tools/bwa.wdl
+++ b/tools/bwa.wdl
@@ -1,12 +1,11 @@
 ## [Homepage](https://github.com/lh3/bwa)
-
 version 1.1
 
 task bwa_aln {
     meta {
         description: "Maps Single-End FASTQ files to BAM format using bwa aln"
         outputs: {
-            bam: "Aligned BAM format file"
+            bam: "Aligned BAM format file",
         }
     }
 
@@ -36,9 +35,7 @@ task bwa_aln {
         File fastq
         File bwa_db_tar_gz
         String read_group
-        String prefix = sub(
-            basename(fastq),
-            "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$",
+        String prefix = sub(basename(fastq), "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$",
             ""  # Once replacing with capturing groups is supported, replace with group 3
         )
         Boolean use_all_cores = false
@@ -50,8 +47,7 @@ task bwa_aln {
 
     Float input_fastq_size = size(fastq, "GB")
     Float reference_size = size(bwa_db_tar_gz, "GB")
-    Int disk_size_gb = (
-        ceil((input_fastq_size + reference_size) * 2) + 10 + modify_disk_size_gb
+    Int disk_size_gb = (ceil((input_fastq_size + reference_size) * 2) + 10 + modify_disk_size_gb
     )
 
     command <<<
@@ -98,7 +94,7 @@ task bwa_aln_pe {
     meta {
         description: "Maps Paired-End FASTQ files to BAM format using bwa aln"
         outputs: {
-            bam: "Aligned BAM format file"
+            bam: "Aligned BAM format file",
         }
     }
 
@@ -132,11 +128,8 @@ task bwa_aln_pe {
         File read_two_fastq_gz
         File bwa_db_tar_gz
         String read_group
-        String prefix = sub(
-            basename(read_one_fastq_gz),
-            "([_\\.][rR][12])?(\\.subsampled)?\\.(fastq|fq)(\\.gz)?$",
-            ""
-        )
+        String prefix = sub(basename(read_one_fastq_gz), "([_\\.][rR][12])?(\\.subsampled)?\\.(fastq|fq)(\\.gz)?$",
+            "")
         Boolean use_all_cores = false
         Int ncpu = 4
         Int modify_disk_size_gb = 0
@@ -144,12 +137,10 @@ task bwa_aln_pe {
 
     String output_bam = prefix + ".bam"
 
-    Float input_fastq_size = (
-        size(read_one_fastq_gz, "GB") + size(read_two_fastq_gz, "GB")
-    )
+    Float input_fastq_size = (size(read_one_fastq_gz, "GB") + size(read_two_fastq_gz, "GB"
+    ))
     Float reference_size = size(bwa_db_tar_gz, "GB")
-    Int disk_size_gb = (
-        ceil((input_fastq_size + reference_size) * 2) + 5 + modify_disk_size_gb
+    Int disk_size_gb = (ceil((input_fastq_size + reference_size) * 2) + 5 + modify_disk_size_gb
     )
 
     command <<<
@@ -202,7 +193,7 @@ task bwa_mem {
     meta {
         description: "Maps FASTQ files to BAM format using bwa mem"
         outputs: {
-            bam: "Aligned BAM format file"
+            bam: "Aligned BAM format file",
         }
     }
 
@@ -230,11 +221,8 @@ task bwa_mem {
         File bwa_db_tar_gz
         String read_group
         File? read_two_fastq_gz
-        String prefix = sub(
-            basename(read_one_fastq_gz),
-            "([_\\.][rR][12])?(\\.subsampled)?\\.(fastq|fq)(\\.gz)?$",
-            ""
-        )
+        String prefix = sub(basename(read_one_fastq_gz), "([_\\.][rR][12])?(\\.subsampled)?\\.(fastq|fq)(\\.gz)?$",
+            "")
         Boolean use_all_cores = false
         Int ncpu = 4
         Int modify_disk_size_gb = 0
@@ -242,11 +230,9 @@ task bwa_mem {
 
     String output_bam = prefix + ".bam"
 
-    Float input_fastq_size = size(read_one_fastq_gz, "GB")
-        + size(read_two_fastq_gz, "GB")
+    Float input_fastq_size = size(read_one_fastq_gz, "GB") + size(read_two_fastq_gz, "GB")
     Float reference_size = size(bwa_db_tar_gz, "GB")
-    Int disk_size_gb = (
-        ceil((input_fastq_size + reference_size) * 2) + 10 + modify_disk_size_gb
+    Int disk_size_gb = (ceil((input_fastq_size + reference_size) * 2) + 10 + modify_disk_size_gb
     )
 
     command <<<
@@ -271,21 +257,23 @@ task bwa_mem {
             -R "~{read_group}" \
             bwa_db/"$PREFIX" \
             "~{basename(read_one_fastq_gz)}" \
-            ~{(
-                if defined(read_two_fastq_gz)
-                then "'" + basename(select_first([read_two_fastq_gz])) + "'"
+            ~{if defined(read_two_fastq_gz)
+                then "'" + basename(select_first([
+                    read_two_fastq_gz,
+                ])) + "'"
                 else ""
-            )} \
+            } \
             | samtools view --no-PG --threads "$samtools_cores" -hb - \
             > "~{output_bam}"
 
         rm -r bwa_db
         rm "~{basename(read_one_fastq_gz)}"
-        ~{(
-            if defined(read_two_fastq_gz)
-            then "rm '" + basename(select_first([read_two_fastq_gz])) + "'"
+        ~{if defined(read_two_fastq_gz)
+            then "rm '" + basename(select_first([
+                read_two_fastq_gz,
+            ])) + "'"
             else ""
-        )}
+        }
     >>>
 
     output {
@@ -305,7 +293,7 @@ task build_bwa_db {
     meta {
         description: "Creates a BWA index and returns it as a compressed tar archive"
         outputs: {
-            bwa_db_tar_gz: "Tarballed bwa reference files"
+            bwa_db_tar_gz: "Tarballed bwa reference files",
         }
     }
 
diff --git a/tools/fq.wdl b/tools/fq.wdl
index b042f0366..b353597f2 100755
--- a/tools/fq.wdl
+++ b/tools/fq.wdl
@@ -63,8 +63,8 @@ task fqlint {
         Int modify_disk_size_gb = 0
     }
 
-    Float read1_size = size(read_one_fastq, "GB")
-    Float read2_size = size(read_two_fastq, "GB")
+    Float read1_size = size(read_one_fastq, "GiB")
+    Float read2_size = size(read_two_fastq, "GiB")
 
     Int memory_gb = (ceil((read1_size + read2_size) * 0.25) + 1 + modify_memory_gb)
 
@@ -131,8 +131,8 @@ task subsample {
         Int modify_disk_size_gb = 0
     }
 
-    Float read1_size = size(read_one_fastq, "GB")
-    Float read2_size = size(read_two_fastq, "GB")
+    Float read1_size = size(read_one_fastq, "GiB")
+    Float read2_size = size(read_two_fastq, "GiB")
 
     Int disk_size_gb = ceil((read1_size + read2_size) * 2) + modify_disk_size_gb
 
diff --git a/tools/gatk4.wdl b/tools/gatk4.wdl
index 1ddea3b69..e4a709e18 100644
--- a/tools/gatk4.wdl
+++ b/tools/gatk4.wdl
@@ -457,7 +457,7 @@ task mark_duplicates_spark {
         Int ncpu = 4
     }
 
-    Float bam_size = size(bam, "GB")
+    Float bam_size = size(bam, "GiB")
     Int memory_gb = min(ceil(bam_size + 15), 50) + modify_memory_gb
     Int disk_size_gb = ((if create_bam
         then ceil((bam_size * 2) + 10)
diff --git a/tools/htseq.wdl b/tools/htseq.wdl
index 5afba83a0..0552586b5 100755
--- a/tools/htseq.wdl
+++ b/tools/htseq.wdl
@@ -1,5 +1,4 @@
 ## [Homepage](https://github.com/htseq/htseq)
-
 version 1.1
 
 task count {
@@ -9,7 +8,7 @@ task count {
             feature_counts: {
                 description: "A two column TSV file. First column is feature names and second column is counts.",
                 help: "Presence of a header is determined by the `include_custom_header` parameter.",
-            }
+            },
         }
     }
 
@@ -96,10 +95,14 @@ task count {
     Float bam_size = size(bam, "GB")
     Float gtf_size = size(gtf, "GB")
 
-    Int memory_gb = (if pos_sorted then ceil(bam_size) + 4 else 4) + modify_memory_gb
+    Int memory_gb = (if pos_sorted
+        then ceil(bam_size) + 4
+        else 4
+    ) + modify_memory_gb
 
-    Int disk_size_gb = ceil(
-        (bam_size + gtf_size) * if pos_sorted then 4 else 1
+    Int disk_size_gb = ceil((bam_size + gtf_size) * if pos_sorted
+        then 4
+        else 1
     ) + 10 + modify_disk_size_gb
 
     command <<<
@@ -114,19 +117,27 @@ task count {
         # 9223372036854776000 == max 64 bit Float
         htseq-count -f bam \
             --max-reads-in-buffer 9223372036854776000 \
-            -r ~{if pos_sorted then "pos" else "name"} \
+            -r ~{if pos_sorted
+                then "pos"
+                else "name"
+            } \
             -s "~{strandedness}" \
             -a ~{minaqual} \
             -t "~{feature_type}" \
             -m "~{mode}" \
             -i "~{idattr}" \
-            --nonunique ~{if nonunique then "all" else "none"} \
-            --secondary-alignments ~{if secondary_alignments then "score" else "ignore"} \
-            --supplementary-alignments ~{(
-                if supplementary_alignments
+            --nonunique ~{if nonunique
+                then "all"
+                else "none"
+            } \
+            --secondary-alignments ~{if secondary_alignments
                 then "score"
                 else "ignore"
-            )} \
+            } \
+            --supplementary-alignments ~{if supplementary_alignments
+                then "score"
+                else "ignore"
+            } \
             "~{bam}" \
             "~{gtf}" \
             >> "~{outfile_name}"
@@ -148,7 +159,7 @@ task calc_tpm {
     meta {
         description: "Given a feature counts file and a feature lengths file, calculate Transcripts Per Million (TPM)"
         outputs: {
-            tpm_file: "Transcripts Per Million (TPM) file. A two column headered TSV file."
+            tpm_file: "Transcripts Per Million (TPM) file. A two column headered TSV file.",
         }
     }
 
@@ -180,7 +191,10 @@ task calc_tpm {
             "~{counts}" \
             "~{feature_lengths}" \
             "~{outfile_name}" \
-            ~{if has_header then "--counts_has_header" else ""}
+            ~{if has_header
+                then "--counts_has_header"
+                else ""
+            }
     >>>
 
     output {
diff --git a/tools/kraken2.wdl b/tools/kraken2.wdl
index 57fc6bd42..601f8c1f6 100644
--- a/tools/kraken2.wdl
+++ b/tools/kraken2.wdl
@@ -1,5 +1,4 @@
 ## [Homepage](https://github.com/DerrickWood/kraken2)
-
 version 1.1
 
 task download_taxonomy {
@@ -9,7 +8,7 @@ task download_taxonomy {
             taxonomy: {
                 description: "The NCBI taxonomy, which is needed by the `build_db` task.",
                 warning: "This output is not human-readable or meant for anything other than building a Kraken2 database.",
-            }
+            },
         }
     }
 
@@ -27,7 +26,10 @@ task download_taxonomy {
         set -euo pipefail
 
         kraken2-build --download-taxonomy \
-            ~{if protein then "--protein" else ""} \
+            ~{if protein
+                then "--protein"
+                else ""
+            } \
             --use-ftp \
             --db "~{db_name}" 2>&1 \
             | awk '/gunzip:/ { print; exit 42 } !/gunzip:/ { print }' 1>&2
@@ -58,7 +60,7 @@ task download_library {
             library: {
                 description: "A library of reference genomes, which is needed by the `build_db` task.",
                 warning: "This output is not human-readable or meant for anything other than building a Kraken2 database.",
-            }
+            },
         }
     }
 
@@ -96,22 +98,24 @@ task download_library {
 
     String db_name = "kraken2_" + library_name + "_library"
 
-    #@ except: ExpressionSpacing
-    Int disk_size_gb = (
-        (
-            if library_name == "bacteria" then 300
-            else if library_name == "nr" then 600
-            else if library_name == "nt" then 2500
-            else 25
-        ) + modify_disk_size_gb
-    )
+    Int disk_size_gb = (if library_name == "bacteria"
+        then 300
+        else if library_name == "nr"
+            then 600
+            else if library_name == "nt"
+                then 2500
+                else 25
+    ) + modify_disk_size_gb
 
     command <<<
         set -euo pipefail
 
         kraken2-build --download-library \
             "~{library_name}" \
-            ~{if protein then "--protein" else ""} \
+            ~{if protein
+                then "--protein"
+                else ""
+            } \
             --use-ftp \
             --db "~{db_name}" 2>&1 \
             | awk '/gunzip:/ { print; exit 42 } !/gunzip:/ { print }' 1>&2
@@ -140,7 +144,7 @@ task create_library_from_fastas {
             custom_library: {
                 description: "Kraken2 compatible library, which is needed by the `build_db` task.",
                 warning: "This output is not human-readable or meant for anything other than building a Kraken2 database.",
-            }
+            },
         }
     }
 
@@ -172,7 +176,10 @@ task create_library_from_fastas {
         while read -r fasta; do
             gunzip -c "$fasta" > tmp.fa
             kraken2-build \
-                ~{if protein then "--protein" else ""} \
+                ~{if protein
+                    then "--protein"
+                    else ""
+                } \
                 --add-to-library tmp.fa \
                 --db "~{db_name}"
         done < fastas.txt
@@ -200,7 +207,7 @@ task build_db {
     meta {
         description: "Builds a custom Kraken2 database"
         outputs: {
-            built_db: "A complete Kraken2 database"
+            built_db: "A complete Kraken2 database",
         }
     }
 
@@ -238,9 +245,15 @@ task build_db {
         String db_name = "kraken2_db"
         Boolean protein = false
         Boolean use_all_cores = false
-        Int kmer_len = if protein then 15 else 35
-        Int minimizer_len = if protein then 12 else 31
-        Int minimizer_spaces = if protein then 0 else 7
+        Int kmer_len = if protein
+            then 15
+            else 35
+        Int minimizer_len = if protein
+            then 12
+            else 31
+        Int minimizer_spaces = if protein
+            then 0
+            else 7
         Int max_db_size_gb = -1
         Int ncpu = 4
         Int modify_memory_gb = 0
@@ -249,13 +262,10 @@ task build_db {
 
     Float tarballs_size = size(tarballs, "GB")
     Int disk_size_gb = ceil(tarballs_size * 6) + 10 + modify_disk_size_gb
-    Int memory_gb = (
-        (
-            if (max_db_size_gb > 0)
-            then ceil(max_db_size_gb * 1.2)
-            else ceil(tarballs_size * 2)
-        ) + modify_memory_gb
-    )
+    Int memory_gb = (if (max_db_size_gb > 0)
+        then ceil(max_db_size_gb * 1.2)
+        else ceil(tarballs_size * 2)
+    ) + modify_memory_gb
 
     String max_db_size_bytes = "~{max_db_size_gb}000000000"
 
@@ -277,15 +287,17 @@ task build_db {
 
         >&2 echo "*** start DB build ***"
         kraken2-build --build \
-            ~{if protein then "--protein" else ""} \
+            ~{if protein
+                then "--protein"
+                else ""
+            } \
             --kmer-len ~{kmer_len} \
             --minimizer-len ~{minimizer_len} \
             --minimizer-spaces ~{minimizer_spaces} \
-            ~{(
-                if (max_db_size_gb > 0)
+            ~{if (max_db_size_gb > 0)
                 then "--max-db-size '" + max_db_size_bytes + "'"
                 else ""
-            )} \
+            } \
             --threads "$n_cores" \
             --db "~{db_name}"
 
@@ -359,9 +371,7 @@ task kraken {
         File read_two_fastq_gz
         #@ except: InputName
         File db
-        String prefix = sub(
-            basename(read_one_fastq_gz),
-            "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$",
+        String prefix = sub(basename(read_one_fastq_gz), "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$",
             ""  # Once replacing with capturing groups is supported, replace with group 3
         )
         Boolean store_sequences = false
@@ -376,15 +386,11 @@ task kraken {
     Float db_size = size(db, "GB")
     Float read1_size = size(read_one_fastq_gz, "GB")
     Float read2_size = size(read_two_fastq_gz, "GB")
-    Int disk_size_gb_calculation = (
-        ceil((db_size * 2) + read1_size + read2_size) + 10 + modify_disk_size_gb
+    Int disk_size_gb_calculation = (ceil((db_size * 2) + read1_size + read2_size) + 10 + modify_disk_size_gb
     )
-    Int disk_size_gb = (
-        if store_sequences
+    Int disk_size_gb = if store_sequences
         then disk_size_gb_calculation + ceil(read1_size + read2_size)
         else disk_size_gb_calculation
-    )
-
     Int memory_gb = ceil(db_size * 2) + modify_memory_gb
 
     String out_report = prefix + ".kraken2.txt"
@@ -403,12 +409,18 @@ task kraken {
 
         kraken2 --db kraken2_db/ \
             --paired \
-            --output ~{if store_sequences then "'" + out_sequences + "'" else "-"} \
+            --output ~{if store_sequences
+                then "'" + out_sequences + "'"
+                else "-"
+            } \
             --threads "$n_cores" \
             --minimum-base-quality ~{min_base_quality} \
             --report "~{out_report}" \
             --report-zero-counts \
-            ~{if use_names then "--use-names" else ""} \
+            ~{if use_names
+                then "--use-names"
+                else ""
+            } \
             "~{read_one_fastq_gz}" \
             "~{read_two_fastq_gz}"
 
diff --git a/tools/librarian.wdl b/tools/librarian.wdl
index 4d1d43755..9fe3efac5 100644
--- a/tools/librarian.wdl
+++ b/tools/librarian.wdl
@@ -1,5 +1,4 @@
 ## # librarian
-
 version 1.1
 
 task librarian {
@@ -24,18 +23,14 @@ task librarian {
 
     input {
         File read_one_fastq
-        String prefix = sub(
-            basename(read_one_fastq),
-            "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$",
+        String prefix = sub(basename(read_one_fastq), "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$",
             ""  # Once replacing with capturing groups is supported, replace with group 3
         ) + ".librarian"
         Int modify_disk_size_gb = 0
     }
 
     Float read1_size = size(read_one_fastq, "GB")
-    Int disk_size_gb = (
-        ceil(read1_size) + 10 + modify_disk_size_gb
-    )
+    Int disk_size_gb = (ceil(read1_size) + 10 + modify_disk_size_gb)
 
     command <<<
         set -euo pipefail
diff --git a/tools/ngsderive.wdl b/tools/ngsderive.wdl
index 0cab3af85..f7856be9a 100644
--- a/tools/ngsderive.wdl
+++ b/tools/ngsderive.wdl
@@ -1,5 +1,4 @@
 ## [Homepage](https://github.com/stjudecloud/ngsderive)
-
 version 1.1
 
 task strandedness {
@@ -61,7 +60,10 @@ task strandedness {
         ln -s "~{gene_model}" "$CWD_GFF"
 
         ngsderive strandedness --verbose \
-            ~{if split_by_rg then "--split-by-rg" else ""} \
+            ~{if split_by_rg
+                then "--split-by-rg"
+                else ""
+            } \
             -m ~{min_reads_per_gene} \
             -n ~{num_genes} \
             -q ~{min_mapq} \
@@ -398,21 +400,29 @@ task endedness {
     }
 
     Float bam_size = size(bam, "GB")
-    Int memory_gb = (
-        if calc_rpt
-        then (
-            ceil(bam_size * 2.5) + 4 + modify_memory_gb
-        )
+    Int memory_gb = if calc_rpt
+        then (ceil(bam_size * 2.5) + 4 + modify_memory_gb)
         else 4
-    )
     Int disk_size_gb = ceil(bam_size) + 10 + modify_disk_size_gb
 
     command <<<
         ngsderive endedness --verbose \
-            ~{if lenient then "--lenient" else ""} \
-            ~{if calc_rpt then "-r" else ""} \
-            ~{if round_rpt then "--round-rpt" else ""} \
-            ~{if split_by_rg then "--split-by-rg" else ""} \
+            ~{if lenient
+                then "--lenient"
+                else ""
+            } \
+            ~{if calc_rpt
+                then "-r"
+                else ""
+            } \
+            ~{if round_rpt
+                then "--round-rpt"
+                else ""
+            } \
+            ~{if split_by_rg
+                then "--split-by-rg"
+                else ""
+            } \
             --paired-deviance ~{paired_deviance} \
             -n ~{num_reads} \
             "~{bam}" \
diff --git a/tools/picard.wdl b/tools/picard.wdl
index fb3d49aad..4b37db614 100755
--- a/tools/picard.wdl
+++ b/tools/picard.wdl
@@ -1,5 +1,4 @@
 ## [Homepage](https://broadinstitute.github.io/picard/)
-
 version 1.1
 
 task mark_duplicates {
@@ -84,13 +83,10 @@ task mark_duplicates {
 
     Float bam_size = size(bam, "GB")
     Int memory_gb = min(ceil(bam_size + 12), 50) + modify_memory_gb
-    Int disk_size_gb = (
-        (
-            if create_bam
-            then ceil((bam_size * 2) + 10)
-            else ceil(bam_size + 10)
-        ) + modify_disk_size_gb
-    )
+    Int disk_size_gb = (if create_bam
+        then ceil((bam_size * 2) + 10)
+        else ceil(bam_size + 10)
+    ) + modify_disk_size_gb
 
     Int java_heap_size = ceil(memory_gb * 0.9)
 
@@ -100,13 +96,17 @@ task mark_duplicates {
         picard -Xmx~{java_heap_size}g MarkDuplicates \
             -I "~{bam}" \
             --METRICS_FILE "~{prefix}.metrics.txt" \
-            -O "~{if create_bam then prefix + ".bam" else "/dev/null"}" \
+            -O "~{if create_bam
+                then prefix + ".bam"
+                else "/dev/null"
+            }" \
             --CREATE_INDEX ~{create_bam} \
             --CREATE_MD5_FILE ~{create_bam} \
             --VALIDATION_STRINGENCY "~{validation_stringency}" \
             --DUPLICATE_SCORING_STRATEGY "~{duplicate_scoring_strategy}" \
-            --READ_NAME_REGEX '~{
-                if (optical_distance > 0) then read_name_regex else "null"
+            --READ_NAME_REGEX '~{if (optical_distance > 0)
+                then read_name_regex
+                else "null"
             }' \
             --TAGGING_POLICY "~{tagging_policy}" \
             --CLEAR_DT ~{clear_dt} \
@@ -194,13 +194,15 @@ task validate_bam {
         Int modify_disk_size_gb = 0
     }
 
-    String outfile = if summary_mode then outfile_name else outfile_name + ".gz"
-    String mode_arg = if (summary_mode) then "--MODE SUMMARY" else ""
-    String stringency_arg = (
-        if (index_validation_stringency_less_exhaustive)
+    String outfile = if summary_mode
+        then outfile_name
+        else outfile_name + ".gz"
+    String mode_arg = if summary_mode
+        then "--MODE SUMMARY"
+        else ""
+    String stringency_arg = if index_validation_stringency_less_exhaustive
         then "--INDEX_VALIDATION_STRINGENCY LESS_EXHAUSTIVE"
         else ""
-    )
     Float bam_size = size(bam, "GB")
     Int disk_size_gb = ceil(bam_size) + 10 + modify_disk_size_gb
     Int java_heap_size = ceil(memory_gb * 0.9)
@@ -218,7 +220,10 @@ task validate_bam {
             --VALIDATION_STRINGENCY "~{validation_stringency}" \
             ~{sep(" ", prefix("--IGNORE ", squote(ignore_list)))} \
             --MAX_OUTPUT ~{max_errors} \
-            ~{if !summary_mode then "| gzip" else ""} \
+            ~{if !summary_mode
+                then "| gzip"
+                else ""
+            } \
             > "~{outfile}" \
             || rc=$?
 
@@ -420,8 +425,10 @@ task merge_sam_files {
         File merged_bam_md5 = outfile_name + ".md5"
     }
 
-    runtime{
-        cpu: if threading then 2 else 1
+    runtime {
+        cpu: if threading
+            then 2
+            else 1
         memory: "~{memory_gb} GB"
         disks: "~{disk_size_gb} GB"
         container: "quay.io/biocontainers/picard:3.1.1--hdfd78af_0"
@@ -506,7 +513,7 @@ task collect_wgs_metrics {
             wgs_metrics: {
                 description: "Output report of `picard CollectWgsMetrics`",
                 external_help: "https://broadinstitute.github.io/picard/picard-metric-definitions.html#CollectWgsMetrics.WgsMetrics",
-            }
+            },
         }
     }
 
@@ -853,16 +860,18 @@ task bam_to_fastq {
 
         picard -Xmx~{java_heap_size}g SamToFastq INPUT="~{bam}" \
             FASTQ="~{prefix}.R1.fastq" \
-            ~{(
-                if paired
+            ~{if paired
                 then "SECOND_END_FASTQ='" + prefix + ".R2.fastq'"
                 else ""
-            )} \
+            } \
             RE_REVERSE=true \
             VALIDATION_STRINGENCY=SILENT
 
         gzip "~{prefix}.R1.fastq" \
-            ~{if paired then "'" + prefix + ".R2.fastq'" else ""}
+            ~{if paired
+                then "'" + prefix + ".R2.fastq'"
+                else ""
+            }
     >>>
 
     output {
@@ -870,7 +879,7 @@ task bam_to_fastq {
         File? read_two_fastq_gz = "~{prefix}.R2.fastq.gz"
     }
 
-    runtime{
+    runtime {
         memory: "~{memory_gb} GB"
         disks: "~{disk_size_gb} GB"
         container: "quay.io/biocontainers/picard:3.1.1--hdfd78af_0"
@@ -934,7 +943,7 @@ task scatter_interval_list {
         }
     }
 
-    parameter_meta  {
+    parameter_meta {
         interval_list: "Input interval list to split"
         scatter_count: "Number of interval lists to create"
         subdivision_mode: {
@@ -1001,7 +1010,7 @@ task create_sequence_dictionary {
         description: "Creates a sequence dictionary for the input FASTA file using Picard"
         external_help: "https://gatk.broadinstitute.org/hc/en-us/articles/13832748622491-CreateSequenceDictionary-Picard-"
         outputs: {
-            dictionary: "Sequence dictionary produced by `picard CreateSequenceDictionary`."
+            dictionary: "Sequence dictionary produced by `picard CreateSequenceDictionary`.",
         }
     }
 
diff --git a/tools/qualimap.wdl b/tools/qualimap.wdl
index 59f10fd15..4aec8447d 100755
--- a/tools/qualimap.wdl
+++ b/tools/qualimap.wdl
@@ -48,8 +48,8 @@ task rnaseq {
         else ""
 
     Int java_heap_size = ceil(memory_gb * 0.9)
-    Float bam_size = size(bam, "GB")
-    Float gtf_size = size(gtf, "GB")
+    Float bam_size = size(bam, "GiB")
+    Float gtf_size = size(gtf, "GiB")
 
     # Qualimap has an inefficient name sorting algorithm and will
     # use an excessive amount of storage.
@@ -124,7 +124,7 @@ task bamqc {
 
     Int java_heap_size = ceil(memory_gb * 0.9)
 
-    Float bam_size = size(bam, "GB")
+    Float bam_size = size(bam, "GiB")
     Int disk_size_gb = ceil(bam_size) + 10 + modify_disk_size_gb
 
     command <<<
diff --git a/tools/samtools.wdl b/tools/samtools.wdl
index 05d75f414..719b079b3 100755
--- a/tools/samtools.wdl
+++ b/tools/samtools.wdl
@@ -1,5 +1,4 @@
 ## [Homepage](http://samtools.sourceforge.net/)
-
 version 1.1
 
 import "../data_structures/flag_filter.wdl"
@@ -38,7 +37,7 @@ task split {
     meta {
         description: "Runs Samtools split on the input BAM file. This splits the BAM by read group into one or more output files."
         outputs: {
-            split_bams: "The split BAM files. The extensions will contain read group IDs, and will end in `.bam`."
+            split_bams: "The split BAM files. The extensions will contain read group IDs, and will end in `.bam`.",
         }
     }
 
@@ -131,12 +130,12 @@ task split {
                 rm first_read.sam
             done
         fi
-        
+
         exit $EXITCODE
     >>>
 
     output {
-       Array[File] split_bams = glob("*.bam")
+        Array[File] split_bams = glob("*.bam")
     }
 
     runtime {
@@ -152,7 +151,7 @@ task flagstat {
     meta {
         description: "Produces a `samtools flagstat` report containing statistics about the alignments based on the bit flags set in the BAM"
         outputs: {
-            flagstat_report: "`samtools flagstat` STDOUT redirected to a file"
+            flagstat_report: "`samtools flagstat` STDOUT redirected to a file",
         }
     }
 
@@ -195,7 +194,7 @@ task flagstat {
     >>>
 
     output {
-       File flagstat_report = outfile_name
+        File flagstat_report = outfile_name
     }
 
     runtime {
@@ -210,7 +209,7 @@ task index {
     meta {
         description: "Creates a `.bai` BAM index for the input BAM"
         outputs: {
-            bam_index: "A `.bai` BAM index associated with the input BAM. Filename will be `basename(bam) + '.bai'`."
+            bam_index: "A `.bai` BAM index associated with the input BAM. Filename will be `basename(bam) + '.bai'`.",
         }
     }
 
@@ -397,7 +396,6 @@ task subsample {
             fi
             rm first_read.sam
         fi
-
     >>>
 
     output {
@@ -419,7 +417,7 @@ task filter {
         description: "Filters a BAM based on its bitwise flag value."
         help: "This task is a wrapper around `samtools view`. This task will fail if there are no reads in the output BAM. This can happen either because the input BAM was empty or because the supplied `bitwise_filter` was too strict. If you want to down-sample a BAM, use the `subsample` task instead."
         outputs: {
-            filtered_bam: "BAM file that has been filtered based on the input flags"
+            filtered_bam: "BAM file that has been filtered based on the input flags",
         }
     }
 
@@ -505,7 +503,7 @@ task merge {
     meta {
         description: "Merges multiple sorted BAMs into a single BAM"
         outputs: {
-            merged_bam: "The BAM resulting from merging all the input BAMs"
+            merged_bam: "The BAM resulting from merging all the input BAMs",
         }
     }
 
@@ -584,11 +582,26 @@ task merge {
         samtools merge \
             --threads "$n_cores" \
             ~{"-h \"" + new_header + "\""} \
-            ~{if name_sorted then "-n" else ""} \
-            ~{if (region != "") then "-R \"" + region + "\"" else ""} \
-            ~{if attach_rg then "-r" else ""} \
-            ~{if combine_rg then "-c" else ""} \
-            ~{if combine_pg then "-p" else ""} \
+            ~{if name_sorted
+                then "-n"
+                else ""
+            } \
+            ~{if (region != "")
+                then "-R \"" + region + "\""
+                else ""
+            } \
+            ~{if attach_rg
+                then "-r"
+                else ""
+            } \
+            ~{if combine_rg
+                then "-c"
+                else ""
+            } \
+            ~{if combine_pg
+                then "-p"
+                else ""
+            } \
             "~{prefix}.bam" \
             "${bams[@]}"
 
@@ -613,7 +626,7 @@ task addreplacerg {
     meta {
         description: "Adds or replaces read group tags"
         outputs: {
-            tagged_bam: "The transformed input BAM after read group modifications have been applied"
+            tagged_bam: "The transformed input BAM after read group modifications have been applied",
         }
     }
 
@@ -677,8 +690,14 @@ task addreplacerg {
             --threads "$n_cores" \
             ~{sep(" ", prefix("-r ", squote(read_group_line)))} \
             ~{"-R \"" + read_group_id + "\""} \
-            -m ~{if orphan_only then "orphan_only" else "overwrite_all"} \
-            ~{if overwrite_header_record then "-w" else ""} \
+            -m ~{if orphan_only
+                then "orphan_only"
+                else "overwrite_all"
+            } \
+            ~{if overwrite_header_record
+                then "-w"
+                else ""
+            } \
             -o "~{outfile_name}" \
             "~{bam}"
     >>>
@@ -700,7 +719,7 @@ task collate {
     meta {
         description: "Runs `samtools collate` on the input BAM file. Shuffles and groups reads together by their names."
         outputs: {
-            collated_bam: "A collated BAM (reads sharing a name next to each other, no other guarantee of sort order)"
+            collated_bam: "A collated BAM (reads sharing a name next to each other, no other guarantee of sort order)",
         }
     }
 
@@ -751,7 +770,10 @@ task collate {
 
         samtools collate \
             --threads "$n_cores" \
-            ~{if fast_mode then "-f" else ""} \
+            ~{if fast_mode
+                then "-f"
+                else ""
+            } \
             -o "~{outfile_name}" \
             "~{bam}"
     >>>
@@ -854,16 +876,14 @@ task bam_to_fastq {
     }
 
     Float bam_size = size(bam, "GB")
-    Int memory_gb = (
-        if (collated || !paired_end)
+    Int memory_gb = (if (collated || !paired_end)
         then 4
         else (ceil(bam_size * 0.4) + 4)
     ) + modify_memory_gb
-    Int disk_size_gb = ceil(bam_size * (
-        if (retain_collated_bam && !collated && paired_end)
+    Int disk_size_gb = ceil(bam_size * if (retain_collated_bam && !collated && paired_end)
         then 5
         else 2
-    )) + 10 + modify_disk_size_gb
+    ) + 10 + modify_disk_size_gb
 
     command <<<
         set -euo pipefail
@@ -878,16 +898,21 @@ task bam_to_fastq {
         mkfifo bam_pipe
         if ! ~{collated} && ~{paired_end}; then
             samtools collate \
-                ~{if retain_collated_bam then "" else "-u"} \
+                ~{if retain_collated_bam
+                    then ""
+                    else "-u"
+                } \
                 --threads "$n_cores" \
-                ~{if fast_mode then "-f" else ""} \
+                ~{if fast_mode
+                    then "-f"
+                    else ""
+                } \
                 -O \
                 "~{bam}" \
-                | tee ~{(
-                    if retain_collated_bam
+                | tee ~{if retain_collated_bam
                     then "\"" + prefix + ".collated.bam\""
                     else ""
-                )} \
+                } \
                 > bam_pipe \
                 &
         else
@@ -900,35 +925,28 @@ task bam_to_fastq {
             -F "~{bitwise_filter.exclude_if_any}" \
             --rf "~{bitwise_filter.include_if_any}" \
             -G "~{bitwise_filter.exclude_if_all}" \
-            ~{(
-                if append_read_number
+            ~{if append_read_number
                 then "-N"
                 else "-n"
-            )} \
-            -1 ~{(
-                if paired_end
+            } \
+            -1 ~{if paired_end
                 then "\"" + prefix + ".R1.fastq.gz\""
                 else "\"" + prefix + ".fastq.gz\""
-            )} \
-            -2 ~{(
-                if paired_end
+            } \
+            -2 ~{if paired_end
                 then "\"" + prefix + ".R2.fastq.gz\""
                 else "\"" + prefix + ".fastq.gz\""
-            )} \
-            ~{(
-                if paired_end
-                then (
-                    if output_singletons
+            } \
+            ~{if paired_end
+                then if output_singletons
                     then "-s \"" + prefix + ".singleton.fastq.gz\""
                     else "-s junk.singleton.fastq.gz"
-                )
                 else ""
-            )} \
-            -0 ~{(
-                if paired_end
+            } \
+            -0 ~{if paired_end
                 then "junk.unknown_bit_setting.fastq.gz"
                 else "\"" + prefix + ".fastq.gz\""
-            )} \
+            } \
             bam_pipe
 
         rm bam_pipe
@@ -971,7 +989,7 @@ task fixmate {
         description: "Runs `samtools fixmate` on the input BAM file. This fills in mate coordinates and insert size fields among other tags and fields."
         warning: "This task assumes a name-sorted or name-collated input BAM. If you have a position-sorted BAM, please use the `position_sorted_fixmate` task."
         outputs: {
-            fixmate_bam: "The BAM resulting from running `samtools fixmate` on the input BAM"
+            fixmate_bam: "The BAM resulting from running `samtools fixmate` on the input BAM",
         }
     }
 
@@ -1042,11 +1060,26 @@ task fixmate {
 
         samtools fixmate \
             --threads "$n_cores" \
-            ~{if remove_unaligned_and_secondary then "-r" else ""} \
-            ~{if disable_proper_pair_check then "-p" else ""} \
-            ~{if add_cigar then "-c" else ""} \
-            ~{if add_mate_score then "-m" else ""} \
-            ~{if disable_flag_sanitization then "-z off" else ""} \
+            ~{if remove_unaligned_and_secondary
+                then "-r"
+                else ""
+            } \
+            ~{if disable_proper_pair_check
+                then "-p"
+                else ""
+            } \
+            ~{if add_cigar
+                then "-c"
+                else ""
+            } \
+            ~{if add_mate_score
+                then "-m"
+                else ""
+            } \
+            ~{if disable_flag_sanitization
+                then "-z off"
+                else ""
+            } \
             "~{bam}" \
             "~{prefix}~{extension}"
     >>>
@@ -1070,7 +1103,7 @@ task position_sorted_fixmate {
         warning: "If you already have a collated BAM, please use the `fixmate` task."
         help: "`fixmate` fills in mate coordinates and insert size fields among other tags and fields. This task collates the input BAM, runs `fixmate`, and then resorts the output into a position-sorted BAM."
         outputs: {
-            fixmate_bam: "BAM file with mate information added"
+            fixmate_bam: "BAM file with mate information added",
         }
     }
 
@@ -1137,18 +1170,36 @@ task position_sorted_fixmate {
 
         samtools collate \
             --threads "$n_cores" \
-            ~{if fast_mode then "-f" else ""} \
+            ~{if fast_mode
+                then "-f"
+                else ""
+            } \
             -u \
             -O \
             "~{bam}" \
             | samtools fixmate \
                 --threads "$n_cores" \
                 -u \
-                ~{if remove_unaligned_and_secondary then "-r" else ""} \
-                ~{if disable_proper_pair_check then "-p" else ""} \
-                ~{if add_cigar then "-c" else ""} \
-                ~{if add_mate_score then "-m" else ""} \
-                ~{if disable_flag_sanitization then "-z off" else ""} \
+                ~{if remove_unaligned_and_secondary
+                    then "-r"
+                    else ""
+                } \
+                ~{if disable_proper_pair_check
+                    then "-p"
+                    else ""
+                } \
+                ~{if add_cigar
+                    then "-c"
+                    else ""
+                } \
+                ~{if add_mate_score
+                    then "-m"
+                    else ""
+                } \
+                ~{if disable_flag_sanitization
+                    then "-z off"
+                    else ""
+                } \
                 - \
                 - \
                 | samtools sort \
@@ -1279,25 +1330,54 @@ task markdup {
 
         samtools markdup \
             --threads "$n_cores" \
-            -f "~{prefix + if json then ".json" else ".txt"}" \
+            -f "~{prefix + if json
+                then ".json"
+                else ".txt"
+            }" \
             --read-coords '~{read_coords_regex}' \
             --coords-order "~{coordinates_order}" \
-            ~{if remove_duplicates then "-r" else ""} \
-            ~{if mark_supp_or_sec_or_unmapped_as_duplicates then "-S" else ""} \
-            ~{if mark_duplicates_with_do_tag then "-t" else ""} \
-            ~{if duplicate_count then "--duplicate-count" else ""} \
-            ~{if include_qc_fails then "--include-fails" else ""} \
-            ~{if duplicates_of_duplicates_check then "" else "--no-multi-dup"} \
-            ~{if use_read_groups then "--use-read-groups" else ""} \
+            ~{if remove_duplicates
+                then "-r"
+                else ""
+            } \
+            ~{if mark_supp_or_sec_or_unmapped_as_duplicates
+                then "-S"
+                else ""
+            } \
+            ~{if mark_duplicates_with_do_tag
+                then "-t"
+                else ""
+            } \
+            ~{if duplicate_count
+                then "--duplicate-count"
+                else ""
+            } \
+            ~{if include_qc_fails
+                then "--include-fails"
+                else ""
+            } \
+            ~{if duplicates_of_duplicates_check
+                then ""
+                else "--no-multi-dup"
+            } \
+            ~{if use_read_groups
+                then "--use-read-groups"
+                else ""
+            } \
             -l ~{max_readlen} \
             -d ~{optical_distance} \
             -c \
             "~{bam}" \
-            "~{if create_bam then prefix + ".bam" else "/dev/null"}"
+            "~{if create_bam
+                then prefix + ".bam"
+                else "/dev/null"
+            }"
     >>>
 
     output {
-        File markdup_report = prefix + if json then ".json" else ".txt"
+        File markdup_report = prefix + if json
+            then ".json"
+            else ".txt"
         File? markdup_bam = prefix + ".bam"
     }
 
@@ -1314,7 +1394,7 @@ task faidx {
     meta {
         description: "Creates a `.fai` FASTA index for the input FASTA"
         outputs: {
-            fasta_index: "A `.fai` FASTA index associated with the input FASTA. Filename will be `basename(fasta) + '.fai'`."
+            fasta_index: "A `.fai` FASTA index associated with the input FASTA. Filename will be `basename(fasta) + '.fai'`.",
         }
     }
 
diff --git a/tools/star.wdl b/tools/star.wdl
index 920e34a19..f22d2cbef 100755
--- a/tools/star.wdl
+++ b/tools/star.wdl
@@ -1,12 +1,11 @@
 ## [Homepage](https://github.com/alexdobin/STAR)
-
 version 1.1
 
 task build_star_db {
     meta {
         description: "Runs STAR's build command to generate a STAR format reference for alignment"
         outputs: {
-            star_db: "A gzipped TAR file containing the STAR reference files. Suitable as the `star_db_tar_gz` input to the `alignment` task."
+            star_db: "A gzipped TAR file containing the STAR reference files. Suitable as the `star_db_tar_gz` input to the `alignment` task.",
         }
     }
 
@@ -86,8 +85,7 @@ task build_star_db {
 
     Float reference_fasta_size = size(reference_fasta, "GB")
     Float gtf_size = size(gtf, "GB")
-    Int disk_size_gb = (
-        ceil((reference_fasta_size + gtf_size) * 3) + 10 + modify_disk_size_gb
+    Int disk_size_gb = (ceil((reference_fasta_size + gtf_size) * 3) + 10 + modify_disk_size_gb
     )
 
     # Leave 2GB as system overhead
@@ -558,7 +556,11 @@ task alignment {
         Array[File] read_one_fastqs_gz
         Array[String] read_groups
         Array[File]? read_two_fastqs_gz
-        Array[Int] out_sj_filter_intron_max_vs_read_n = [50000, 100000, 200000]
+        Array[Int] out_sj_filter_intron_max_vs_read_n = [
+            50000,
+            100000,
+            200000,
+        ]
         SpliceJunctionMotifs out_sj_filter_overhang_min = SpliceJunctionMotifs {
             noncanonical_motifs: 30,
             GT_AG_and_CT_AC_motif: 12,
@@ -595,9 +597,7 @@ task alignment {
         Pair[Int, Int] clip_3p_n_bases = (0, 0)
         Pair[Int, Int] clip_3p_after_adapter_n_bases = (0, 0)
         Pair[Int, Int] clip_5p_n_bases = (0, 0)
-        String prefix = sub(
-            basename(read_one_fastqs_gz[0]),
-            "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$",
+        String prefix = sub(basename(read_one_fastqs_gz[0]), "(([_.][rR](?:ead)?[12])((?:[_.-][^_.-]*?)*?))?\\.(fastq|fq)(\\.gz)?$",
             ""  # Once replacing with capturing groups is supported, replace with group 3
         )
         String read_name_separator = "/"
@@ -699,16 +699,16 @@ task alignment {
         Int modify_disk_size_gb = 0
     }
 
-    Array[File] read_twos = select_first([read_two_fastqs_gz, []])
+    Array[File] read_twos = select_first([
+        read_two_fastqs_gz,
+        [],
+    ])
 
     Float read_one_fastqs_size = size(read_one_fastqs_gz, "GB")
     Float read_two_fastqs_size = size(read_twos, "GB")
     Float star_db_tar_gz_size = size(star_db_tar_gz, "GB")
-    Int disk_size_gb = (
-        (
-            ceil(read_one_fastqs_size + read_two_fastqs_size + star_db_tar_gz_size) * 3
-        ) + 10 + modify_disk_size_gb
-    )
+    Int disk_size_gb = ((ceil(read_one_fastqs_size + read_two_fastqs_size + star_db_tar_gz_size
+    ) * 3) + 10 + modify_disk_size_gb)
 
     command <<<
         set -euo pipefail
@@ -733,9 +733,9 @@ task alignment {
             --outFileNamePrefix "~{prefix + "."}" \
             --twopassMode "~{twopass_mode}" \
             --outSAMattrRGline ~{sep(" , ", read_groups)} \
-            --outSJfilterIntronMaxVsReadN ~{
-                sep(" ", quote(out_sj_filter_intron_max_vs_read_n))
-            } \
+            --outSJfilterIntronMaxVsReadN ~{sep(" ", quote(
+                out_sj_filter_intron_max_vs_read_n
+            ))} \
             --outSJfilterOverhangMin ~{sep(" ", quote([
                 out_sj_filter_overhang_min.noncanonical_motifs,
                 out_sj_filter_overhang_min.GT_AG_and_CT_AC_motif,
@@ -766,36 +766,32 @@ task alignment {
                 align_sj_stitch_mismatch_n_max.GC_AG_and_CT_GC_motif,
                 align_sj_stitch_mismatch_n_max.AT_AC_and_GT_AT_motif,
             ]))} \
-            --clip3pAdapterSeq "~{clip_3p_adapter_seq.left}" ~{(
-                if (length(read_twos) != 0)
+            --clip3pAdapterSeq "~{clip_3p_adapter_seq.left}" ~{if (length(read_twos) != 0)
                 then "'" + clip_3p_adapter_seq.right + "'"
                 else ""
-            )} \
-            --clip3pAdapterMMp ~{clip_3p_adapter_mmp.left} ~{(
-                if (length(read_twos) != 0)
+            } \
+            --clip3pAdapterMMp ~{clip_3p_adapter_mmp.left} ~{if (length(read_twos) != 0)
                 then clip_3p_adapter_mmp.right
                 else None
-            )} \
-            --alignEndsProtrude ~{align_ends_protrude.left} "~{(
-                if (length(read_twos) != 0)
+            } \
+            --alignEndsProtrude ~{align_ends_protrude.left} "~{if (length(read_twos) != 0)
                 then align_ends_protrude.right
                 else None
-            )}" \
-            --clip3pNbases ~{clip_3p_n_bases.left} ~{(
-                if (length(read_twos) != 0)
+            }" \
+            --clip3pNbases ~{clip_3p_n_bases.left} ~{if (length(read_twos) != 0)
                 then clip_3p_n_bases.right
                 else None
-            )} \
-            --clip3pAfterAdapterNbases ~{clip_3p_after_adapter_n_bases.left} ~{(
-                if (length(read_twos) != 0)
+            } \
+            --clip3pAfterAdapterNbases ~{clip_3p_after_adapter_n_bases.left} ~{if (length(
+                read_twos
+            ) != 0)
                 then clip_3p_after_adapter_n_bases.right
                 else None
-            )} \
-            --clip5pNbases ~{clip_5p_n_bases.left} ~{(
-                if (length(read_twos) != 0)
+            } \
+            --clip5pNbases ~{clip_5p_n_bases.left} ~{if (length(read_twos) != 0)
                 then clip_5p_n_bases.right
                 else None
-            )} \
+            } \
             --readNameSeparator "~{read_name_separator}" \
             --clipAdapterType "~{clip_adapter_type}" \
             --outSAMstrandField "~{out_sam_strand_field}" \
@@ -803,13 +799,12 @@ task alignment {
             --outSAMunmapped "~{out_sam_unmapped}" \
             --outSAMorder "~{out_sam_order}" \
             --outSAMreadID "~{out_sam_read_id}" \
-            --outSAMtlen ~{(
-                if (out_sam_tlen == "left_plus")
+            --outSAMtlen ~{if (out_sam_tlen == "left_plus")
                 then "1"
-                else (
-                    if (out_sam_tlen == "left_any") then "2" else "error"
-                )
-            )} \
+                else if (out_sam_tlen == "left_any")
+                    then "2"
+                    else "error"
+            } \
             --outFilterType "~{out_filter_type}" \
             --outFilterIntronMotifs "~{out_filter_intron_motifs}" \
             --outFilterIntronStrands "~{out_filter_intron_strands}" \
diff --git a/tools/util.wdl b/tools/util.wdl
index 3e59b5384..be141df2c 100644
--- a/tools/util.wdl
+++ b/tools/util.wdl
@@ -40,7 +40,7 @@ task download {
 
     runtime {
         disks: "~{disk_size_gb} GB"
-        container: "ghcr.io/stjudecloud/util:3.0.3"
+        container: "ghcr.io/stjudecloud/util:3.0.1"
         maxRetries: 1
     }
 }
@@ -79,7 +79,7 @@ task split_string {
     }
 
     runtime {
-        container: "ghcr.io/stjudecloud/util:3.0.3"
+        container: "ghcr.io/stjudecloud/util:3.0.1"
         maxRetries: 1
     }
 }
@@ -110,7 +110,7 @@ task calc_feature_lengths {
         Int modify_disk_size_gb = 0
     }
 
-    Float gtf_size = size(gtf, "GB")
+    Float gtf_size = size(gtf, "GiB")
     Int disk_size_gb = ceil(gtf_size * 2) + 10 + modify_disk_size_gb
 
     command <<<
@@ -127,7 +127,7 @@ task calc_feature_lengths {
     runtime {
         memory: "16 GB"
         disks: "~{disk_size_gb} GB"
-        container: "ghcr.io/stjudecloud/util:3.0.3"
+        container: "ghcr.io/stjudecloud/util:3.0.1"
         maxRetries: 1
     }
 }
@@ -147,7 +147,7 @@ task compression_integrity {
         Int modify_disk_size_gb = 0
     }
 
-    Float file_size = size(bgzipped_file, "GB")
+    Float file_size = size(bgzipped_file, "GiB")
     Int disk_size_gb = ceil(file_size) + 10 + modify_disk_size_gb
 
     command <<<
@@ -186,7 +186,7 @@ task add_to_bam_header {
         Int modify_disk_size_gb = 0
     }
 
-    Float bam_size = size(bam, "GB")
+    Float bam_size = size(bam, "GiB")
     Int disk_size_gb = ceil(bam_size) + 10 + modify_disk_size_gb
 
     String outfile_name = prefix + ".bam"
@@ -228,7 +228,7 @@ task unpack_tarball {
         Int modify_disk_size_gb = 0
     }
 
-    Float tarball_size = size(tarball, "GB")
+    Float tarball_size = size(tarball, "GiB")
     Int disk_size_gb = ceil(tarball_size * 8) + modify_disk_size_gb
 
     command <<<
@@ -246,7 +246,7 @@ task unpack_tarball {
 
     runtime {
         disks: "~{disk_size_gb} GB"
-        container: "ghcr.io/stjudecloud/util:3.0.3"
+        container: "ghcr.io/stjudecloud/util:3.0.1"
         maxRetries: 1
     }
 }
@@ -286,7 +286,7 @@ task make_coverage_regions_bed {
         Int modify_disk_size_gb = 0
     }
 
-    Float gtf_size = size(gtf, "GB")
+    Float gtf_size = size(gtf, "GiB")
     Int disk_size_gb = ceil(gtf_size * 1.2) + 10 + modify_disk_size_gb
 
     command <<<
@@ -331,7 +331,7 @@ task global_phred_scores {
         Int modify_disk_size_gb = 0
     }
 
-    Float bam_size = size(bam, "GB")
+    Float bam_size = size(bam, "GiB")
     Int disk_size_gb = ceil(bam_size) + 10 + modify_disk_size_gb
 
     String outfile_name = prefix + ".global_PHRED_scores.tsv"
@@ -353,7 +353,7 @@ task global_phred_scores {
     runtime {
         memory: "4 GB"
         disks: "~{disk_size_gb} GB"
-        container: "ghcr.io/stjudecloud/util:3.0.3"
+        container: "ghcr.io/stjudecloud/util:3.0.1"
         maxRetries: 1
     }
 }
@@ -402,7 +402,7 @@ task check_fastq_and_rg_concordance {
     >>>
 
     runtime {
-        container: "ghcr.io/stjudecloud/util:3.0.3"
+        container: "ghcr.io/stjudecloud/util:3.0.1"
         maxRetries: 1
     }
 }
@@ -437,7 +437,7 @@ task split_fastq {
         Int ncpu = 2
     }
 
-    Float fastq_size = size(fastq, "GB")
+    Float fastq_size = size(fastq, "GiB")
     Int disk_size_gb = ceil(fastq_size * 5) + 10 + modify_disk_size_gb
 
     command <<<
@@ -462,7 +462,7 @@ task split_fastq {
         cpu: ncpu
         memory: "4 GB"
         disks: "~{disk_size_gb} GB"
-        container: "ghcr.io/stjudecloud/util:3.0.3"
+        container: "ghcr.io/stjudecloud/util:3.0.1"
         maxRetries: 1
     }
 }
diff --git a/workflows/methylation/methylation-cohort.wdl b/workflows/methylation/methylation-cohort.wdl
index 16e343e4c..652ab9c0d 100644
--- a/workflows/methylation/methylation-cohort.wdl
+++ b/workflows/methylation/methylation-cohort.wdl
@@ -38,21 +38,19 @@ workflow methylation_cohort {
     Int beta_length = length(unfiltered_normalized_beta)
     Int pval_length = length(p_values)
 
-    if (beta_length > max_length){
-        scatter (merge_num in range((beta_length / max_length) + 1)){
+    if (beta_length > max_length) {
+        scatter (merge_num in range((beta_length / max_length) + 1)) {
             # Get the sublist of beta files
-            scatter (beta_num in range(max_length)){
-                Int num = (
-                    if merge_num > 0
+            scatter (beta_num in range(max_length)) {
+                Int num = if merge_num > 0
                     then beta_num + (merge_num * max_length)
                     else beta_num
-                )
-                if (num < beta_length){
+                if (num < beta_length) {
                     File bam_list = unfiltered_normalized_beta[num]
                 }
             }
         }
-        scatter (iter_index in range(length(bam_list))){
+        scatter (iter_index in range(length(bam_list))) {
             call combine_data as inner_merge { input:
                 files_to_combine = select_all(bam_list[iter_index]),
                 combined_file_name = "~{iter_index}.combined.csv",
@@ -64,22 +62,20 @@ workflow methylation_cohort {
             combined_file_name = "combined_beta.csv",
         }
 
-        if (pval_length > 0 && !skip_pvalue_check){
+        if (pval_length > 0 && !skip_pvalue_check) {
             # If p-values are provided, merge those as well
-            scatter (merge_num in range((pval_length / max_length) + 1)){
+            scatter (merge_num in range((pval_length / max_length) + 1)) {
                 # Get the sublist of p-value files
-                scatter (pval_num in range(max_length)){
-                    Int num_p = (
-                        if merge_num > 0
+                scatter (pval_num in range(max_length)) {
+                    Int num_p = if merge_num > 0
                         then pval_num + (merge_num * max_length)
                         else pval_num
-                    )
-                    if (num_p < pval_length){
+                    if (num_p < pval_length) {
                         File pval_list = p_values[num_p]
                     }
                 }
             }
-            scatter (iter_index in range(length(pval_list))){
+            scatter (iter_index in range(length(pval_list))) {
                 call combine_data as inner_merge_pvals { input:
                     files_to_combine = select_all(pval_list[iter_index]),
                     combined_file_name = "~{iter_index}.pvals.combined.csv",
@@ -93,12 +89,12 @@ workflow methylation_cohort {
         }
     }
 
-    if (beta_length <= max_length){
+    if (beta_length <= max_length) {
         call combine_data as simple_merge { input:
             files_to_combine = unfiltered_normalized_beta,
             combined_file_name = "combined_beta.csv",
         }
-        if (pval_length > 0 && !skip_pvalue_check){
+        if (pval_length > 0 && !skip_pvalue_check) {
             call combine_data as simple_merge_pval { input:
                 files_to_combine = p_values,
                 combined_file_name = "combined_pvals.csv",
@@ -106,23 +102,18 @@ workflow methylation_cohort {
         }
     }
 
-    File? pval_file = (
-        if (pval_length > 0 && !skip_pvalue_check)
-        then select_first(
-            [
-                final_merge_pvals.combined_file,
-                simple_merge_pval.combined_file,
-            ])
+    File? pval_file = if (pval_length > 0 && !skip_pvalue_check)
+        then select_first([
+            final_merge_pvals.combined_file,
+            simple_merge_pval.combined_file,
+        ])
         else None
-    )
 
     call filter_probes { input:
-        beta_values = select_first(
-            [
-                final_merge.combined_file,
-                simple_merge.combined_file,
-            ]
-        ),
+        beta_values = select_first([
+            final_merge.combined_file,
+            simple_merge.combined_file,
+        ]),
         p_values = pval_file,
         num_probes,
         additional_probes_to_exclude = select_all([
@@ -140,12 +131,10 @@ workflow methylation_cohort {
     }
 
     output {
-        File combined_beta = select_first(
-            [
-                final_merge.combined_file,
-                simple_merge.combined_file,
-            ]
-        )
+        File combined_beta = select_first([
+            final_merge.combined_file,
+            simple_merge.combined_file,
+        ])
         File filtered_beta = filter_probes.filtered_beta_values
         File filtered_probeset = filter_probes.filtered_probes
         File umap_embedding = generate_umap.umap
@@ -159,7 +148,7 @@ task combine_data {
     meta {
         description: "Combine data from multiple CSV files by column"
         outputs: {
-            combined_file: "Combined CSV file"
+            combined_file: "Combined CSV file",
         }
     }
 
@@ -183,16 +172,19 @@ task combine_data {
         Int modify_memory_gb = 0
     }
 
-    Int memory_gb = ceil(size(files_to_combine, "GB") *
-        if simple_merge then 2 else 1)
-        + modify_memory_gb
-        + 2
+    Int memory_gb = ceil(size(files_to_combine, "GB") * if simple_merge
+        then 2
+        else 1
+    ) + modify_memory_gb + 2
     Int disk_size_gb = ceil(size(files_to_combine, "GB") * 2) + 2
 
     command <<<
         python /scripts/methylation/combine.py \
             --output-name "~{combined_file_name}" \
-            ~{if simple_merge then "--simple-merge" else ""} \
+            ~{if simple_merge
+                then "--simple-merge"
+                else ""
+            } \
             ~{sep(" ", quote(files_to_combine))}
     >>>
 
@@ -273,7 +265,7 @@ task generate_umap {
     meta {
         description: "Generate UMAP embedding"
         outputs: {
-            umap: "UMAP embedding for all samples"
+            umap: "UMAP embedding for all samples",
         }
     }
 
@@ -312,7 +304,7 @@ task plot_umap {
     meta {
         description: "Plot UMAP embedding"
         outputs: {
-            umap_plot: "UMAP plot for all samples"
+            umap_plot: "UMAP plot for all samples",
         }
     }
 
diff --git a/workflows/methylation/methylation-preprocess.wdl b/workflows/methylation/methylation-preprocess.wdl
index 9d174b130..76fdafe25 100644
--- a/workflows/methylation/methylation-preprocess.wdl
+++ b/workflows/methylation/methylation-preprocess.wdl
@@ -79,13 +79,15 @@ task list_sex_probes {
     meta {
         description: "List probes that map to the sex chromosomes"
         outputs: {
-            probe_list: "List of probe names that map to the sex chromosomes"
+            probe_list: "List of probe names that map to the sex chromosomes",
         }
     }
 
-    parameter_meta {}
+    parameter_meta {
+    }
 
-    input {}
+    input {
+    }
 
     command <<<
         set -euo pipefail
diff --git a/workflows/methylation/methylation-standard.wdl b/workflows/methylation/methylation-standard.wdl
index c30fe9d63..b3c3247bd 100644
--- a/workflows/methylation/methylation-standard.wdl
+++ b/workflows/methylation/methylation-standard.wdl
@@ -42,7 +42,8 @@ workflow methylation {
         }
     }
 
-    call preprocess.list_sex_probes {}
+    call preprocess.list_sex_probes {
+    }
 
     call cohort.methylation_cohort { input:
         unfiltered_normalized_beta = process_raw_idats.beta_swan_norm_unfiltered_genomic,
@@ -55,21 +56,19 @@ workflow methylation {
     Int probelist_length = length(probe_files)
     Int max_length = 100
 
-    if (probelist_length > max_length){
-        scatter (merge_num in range((probelist_length / max_length) + 1)){
+    if (probelist_length > max_length) {
+        scatter (merge_num in range((probelist_length / max_length) + 1)) {
             # Get the sublist of probe files
-            scatter (probe_num in range(max_length)){
-                Int num = (
-                    if merge_num > 0
+            scatter (probe_num in range(max_length)) {
+                Int num = if merge_num > 0
                     then probe_num + (merge_num * max_length)
                     else probe_num
-                )
-                if (num < probelist_length){
+                if (num < probelist_length) {
                     File probe_file_batches = probe_files[num]
                 }
             }
         }
-        scatter (iter_index in range(length(probe_file_batches))){
+        scatter (iter_index in range(length(probe_file_batches))) {
             call concat_and_uniq { input:
                 files_to_combine = select_all(probe_file_batches[iter_index]),
                 output_file_name = "probes_with_snps_part_~{iter_index}.tab",
@@ -78,13 +77,13 @@ workflow methylation {
 
         call concat_and_uniq as final_cat { input:
             files_to_combine = flatten([
-                concat_and_uniq.combined_file
+                concat_and_uniq.combined_file,
             ]),
             output_file_name = "probes_with_snps.tab",
         }
     }
 
-    if (probelist_length <= max_length){
+    if (probelist_length <= max_length) {
         call concat_and_uniq as simple_merge { input:
             files_to_combine = probe_files,
             output_file_name = "probes_with_snps.tab",
@@ -94,21 +93,19 @@ workflow methylation {
     Array[File] non_genomic_probe_list = process_raw_idats.non_genomic_probes
     Int non_genomic_probelist_length = length(non_genomic_probe_list)
 
-    if (non_genomic_probelist_length > max_length){
-        scatter (merge_num in range((non_genomic_probelist_length / max_length) + 1)){
+    if (non_genomic_probelist_length > max_length) {
+        scatter (merge_num in range((non_genomic_probelist_length / max_length) + 1)) {
             # Get the sublist of probe files
-            scatter (probe_num in range(max_length)){
-                Int num_ng = (
-                    if merge_num > 0
+            scatter (probe_num in range(max_length)) {
+                Int num_ng = if merge_num > 0
                     then probe_num + (merge_num * max_length)
                     else probe_num
-                )
-                if (num_ng < non_genomic_probelist_length){
+                if (num_ng < non_genomic_probelist_length) {
                     File non_genomic_probe_batches = non_genomic_probe_list[num_ng]
                 }
             }
         }
-        scatter (iter_index in range(length(non_genomic_probe_batches))){
+        scatter (iter_index in range(length(non_genomic_probe_batches))) {
             call concat_and_uniq as non_genomic_concat { input:
                 files_to_combine = select_all(non_genomic_probe_batches[iter_index]),
                 output_file_name = "non_genomic_probes_part_~{iter_index}.tab",
@@ -117,13 +114,13 @@ workflow methylation {
 
         call concat_and_uniq as final_cat_non_genomic { input:
             files_to_combine = flatten([
-                non_genomic_concat.combined_file
+                non_genomic_concat.combined_file,
             ]),
             output_file_name = "non_genomic_probes.tab",
         }
     }
 
-    if (non_genomic_probelist_length <= max_length){
+    if (non_genomic_probelist_length <= max_length) {
         call concat_and_uniq as simple_merge_non_genomic { input:
             files_to_combine = non_genomic_probe_list,
             output_file_name = "non_genomic_probes.tab",
@@ -155,7 +152,7 @@ task concat_and_uniq {
     meta {
         description: "Concatenate multiple files and retain unique lines"
         outputs: {
-            combined_file: "File containing unique lines from all input files"
+            combined_file: "File containing unique lines from all input files",
         }
     }
 
diff --git a/workflows/qc/quality-check-standard.wdl b/workflows/qc/quality-check-standard.wdl
index bd7fb0e9d..3762e2883 100644
--- a/workflows/qc/quality-check-standard.wdl
+++ b/workflows/qc/quality-check-standard.wdl
@@ -600,7 +600,7 @@ task parse_input {
     }
 
     runtime {
-        container: "ghcr.io/stjudecloud/util:3.0.3"
+        container: "ghcr.io/stjudecloud/util:3.0.1"
         maxRetries: 1
     }
 }

From 0e4f7a8505a880215fadcacad4e0ebcb9a3740a2 Mon Sep 17 00:00:00 2001
From: Ari Frantz <ari.frantz@stjude.org>
Date: Mon, 23 Mar 2026 14:06:56 -0400
Subject: [PATCH 41/47] ci: rm redundant or unneeded actions

---
 .github/workflows/miniwdl-check.yaml  | 29 ---------------------------
 .github/workflows/sprocket-check.yaml | 11 ----------
 2 files changed, 40 deletions(-)
 delete mode 100644 .github/workflows/miniwdl-check.yaml
 delete mode 100644 .github/workflows/sprocket-check.yaml

diff --git a/.github/workflows/miniwdl-check.yaml b/.github/workflows/miniwdl-check.yaml
deleted file mode 100644
index 14fa817bc..000000000
--- a/.github/workflows/miniwdl-check.yaml
+++ /dev/null
@@ -1,29 +0,0 @@
-name: Workflows Miniwdl Check
-
-on: [push]
-
-jobs:
-  miniwdl_check:
-    runs-on: ubuntu-latest
-    steps:
-    - uses: actions/checkout@v2
-    - name: Set up Python
-      uses: actions/setup-python@v1
-      with:
-        python-version: '3.10'
-    - name: Install miniwdl
-      run: |
-        python -m pip install --upgrade pip
-        pip install miniwdl
-    - name: Run miniwdl
-      run: |
-        EXITCODE=0
-        echo "Checking WDL files using \`miniwdl check\`."
-        shopt -s extglob
-        files=$(find ./!(template) -name '*.wdl')
-        for file in $files; do
-          echo "  [***] $file [***]"
-          miniwdl check "$file"
-          EXITCODE=$(($? || EXITCODE))
-        done
-        exit $EXITCODE
diff --git a/.github/workflows/sprocket-check.yaml b/.github/workflows/sprocket-check.yaml
deleted file mode 100644
index a2cc9ca39..000000000
--- a/.github/workflows/sprocket-check.yaml
+++ /dev/null
@@ -1,11 +0,0 @@
-name: Workflows Sprocket Check
-
-on: [push]
-
-jobs:
-    sprocket_check:
-        runs-on: ubuntu-latest
-        steps:
-        - uses: actions/checkout@v2
-        - name: Run sprocket
-          uses: stjude-rust-labs/sprocket-action@main

From 4f3db73304e78a1029abda6b725d19f56731840c Mon Sep 17 00:00:00 2001
From: Ari Frantz <ari.frantz@stjude.org>
Date: Mon, 23 Mar 2026 14:07:08 -0400
Subject: [PATCH 42/47] ci: add a format check

---
 .github/workflows/sprocket-lint.yaml | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/.github/workflows/sprocket-lint.yaml b/.github/workflows/sprocket-lint.yaml
index e289f28f7..6ce5cbb60 100644
--- a/.github/workflows/sprocket-lint.yaml
+++ b/.github/workflows/sprocket-lint.yaml
@@ -3,12 +3,16 @@ name: Workflows Sprocket Lint
 on: [push]
 
 jobs:
-    sprocket_lint:
-        runs-on: ubuntu-latest
-        steps:
-        - uses: actions/checkout@v2
-        - name: Run sprocket
-          uses: stjude-rust-labs/sprocket-action@main
-          with:
-            lint: true
-            except: KnownRules
+  sprocket_lint:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+    - name: Run 'sprocket lint'
+      uses: stjude-rust-labs/sprocket-action@main
+      with:
+        lint: true
+        except: KnownRules
+    - name: Run 'sprocket format'
+      uses: stjude-rust-labs/sprocket-action@feat/format
+      with:
+        action: format

From 08d5a59ab1b3eaff79d1653a8e73b67ca2a214a8 Mon Sep 17 00:00:00 2001
From: Ari Frantz <ari.frantz@stjude.org>
Date: Mon, 23 Mar 2026 14:16:57 -0400
Subject: [PATCH 43/47] sprocket lints

---
 data_structures/read_group.wdl          |  4 +--
 sprocket.toml                           |  3 +++
 tools/arriba.wdl                        |  1 -
 tools/gatk4.wdl                         |  3 ---
 tools/kraken2.wdl                       | 34 +++++++++++++------------
 workflows/chipseq/chipseq-standard.wdl  |  3 ---
 workflows/general/alignment-post.wdl    |  1 -
 workflows/qc/quality-check-standard.wdl |  1 -
 8 files changed, 23 insertions(+), 27 deletions(-)

diff --git a/data_structures/read_group.wdl b/data_structures/read_group.wdl
index 30955325c..b4247a62b 100644
--- a/data_structures/read_group.wdl
+++ b/data_structures/read_group.wdl
@@ -149,14 +149,14 @@ task validate_read_group {
     }
 
     parameter_meta {
-        read_group: "`ReadGroup` struct to validate"
         required_fields: "Array of read group fields that must be defined. The ID field is always required and does not need to be specified."
+        read_group: "`ReadGroup` struct to validate"
         restrictive: "If true, run a stricter validation of field values. Otherwise, check against SAM spec-defined values."
     }
 
     input {
-        ReadGroup read_group
         Array[String] required_fields
+        ReadGroup read_group
         Boolean restrictive
     }
 
diff --git a/sprocket.toml b/sprocket.toml
index b84f31f61..912ce6a3b 100644
--- a/sprocket.toml
+++ b/sprocket.toml
@@ -3,6 +3,9 @@ all_lint_rules = true
 except = ["ContainerUri"]
 deny_notes = true
 
+[format]
+sort_inputs = true
+
 [run.task]
 cpu_limit_behavior = "try_with_max"
 memory_limit_behavior = "try_with_max"
diff --git a/tools/arriba.wdl b/tools/arriba.wdl
index 84da082c0..fb5d0a149 100644
--- a/tools/arriba.wdl
+++ b/tools/arriba.wdl
@@ -169,7 +169,6 @@ task arriba {
             "NC_*",
         ]
         Array[String] disable_filters = []
-        #@ except: LineWidth
         String feature_name = "gene_name=gene_name|gene_id,gene_id=gene_id,transcript_id=transcript_id,feature_exon=exon,feature_CDS=CDS"
         String prefix = basename(bam, ".bam") + ".fusions"
         String strandedness = "auto"
diff --git a/tools/gatk4.wdl b/tools/gatk4.wdl
index e4a709e18..c54a4e9f9 100644
--- a/tools/gatk4.wdl
+++ b/tools/gatk4.wdl
@@ -116,7 +116,6 @@ task base_recalibrator {
     Int disk_size_gb = ceil(size(bam, "GB") + 1) * 3 + ceil(size(fasta, "GB")) + modify_disk_size_gb
     Int java_heap_size = ceil(memory_gb * 0.9)
 
-    #@ except: LineWidth
     command <<<
         # shellcheck disable=SC2102
         gatk \
@@ -184,7 +183,6 @@ task apply_bqsr {
     Int disk_size_gb = ceil(size(bam, "GB") * 2) + 30 + modify_disk_size_gb
     Int java_heap_size = ceil(memory_gb * 0.9)
 
-    #@ except: LineWidth
     command <<<
         set -euo pipefail
 
@@ -273,7 +271,6 @@ task haplotype_caller {
     Int disk_size_gb = ceil(size(bam, "GB") * 2) + 30 + ceil(size(fasta, "GB")) + modify_disk_size_gb
     Int java_heap_size = ceil(memory_gb * 0.9)
 
-    #@ except: LineWidth
     command <<<
         gatk \
            --java-options "-Xms6000m -Xmx~{java_heap_size}g -XX:GCTimeLimit=50 -XX:GCHeapFreeLimit=10" \
diff --git a/tools/kraken2.wdl b/tools/kraken2.wdl
index 601f8c1f6..018921bd2 100644
--- a/tools/kraken2.wdl
+++ b/tools/kraken2.wdl
@@ -98,14 +98,14 @@ task download_library {
 
     String db_name = "kraken2_" + library_name + "_library"
 
-    Int disk_size_gb = (if library_name == "bacteria"
+    Int disk_size_gb = ((if library_name == "bacteria"
         then 300
         else if library_name == "nr"
-            then 600
-            else if library_name == "nt"
-                then 2500
-                else 25
-    ) + modify_disk_size_gb
+        then 600
+        else if library_name == "nt"
+        then 2500
+        else 25
+    ) + modify_disk_size_gb)
 
     command <<<
         set -euo pipefail
@@ -165,7 +165,7 @@ task create_library_from_fastas {
 
     String db_name = "kraken2_custom_library"
 
-    Float fastas_size = size(fastas_gz, "GB")
+    Float fastas_size = size(fastas_gz, "GiB")
     Int disk_size_gb = ceil(fastas_size * 5) + 10 + modify_disk_size_gb
 
     command <<<
@@ -260,12 +260,12 @@ task build_db {
         Int modify_disk_size_gb = 0
     }
 
-    Float tarballs_size = size(tarballs, "GB")
+    Float tarballs_size = size(tarballs, "GiB")
     Int disk_size_gb = ceil(tarballs_size * 6) + 10 + modify_disk_size_gb
-    Int memory_gb = (if (max_db_size_gb > 0)
+    Int memory_gb = ((if (max_db_size_gb > 0)
         then ceil(max_db_size_gb * 1.2)
         else ceil(tarballs_size * 2)
-    ) + modify_memory_gb
+    ) + modify_memory_gb)
 
     String max_db_size_bytes = "~{max_db_size_gb}000000000"
 
@@ -294,10 +294,10 @@ task build_db {
             --kmer-len ~{kmer_len} \
             --minimizer-len ~{minimizer_len} \
             --minimizer-spaces ~{minimizer_spaces} \
-            ~{if (max_db_size_gb > 0)
+            ~{(if (max_db_size_gb > 0)
                 then "--max-db-size '" + max_db_size_bytes + "'"
                 else ""
-            } \
+            )} \
             --threads "$n_cores" \
             --db "~{db_name}"
 
@@ -383,14 +383,16 @@ task kraken {
         Int modify_disk_size_gb = 0
     }
 
-    Float db_size = size(db, "GB")
-    Float read1_size = size(read_one_fastq_gz, "GB")
-    Float read2_size = size(read_two_fastq_gz, "GB")
+    Float db_size = size(db, "GiB")
+    Float read1_size = size(read_one_fastq_gz, "GiB")
+    Float read2_size = size(read_two_fastq_gz, "GiB")
     Int disk_size_gb_calculation = (ceil((db_size * 2) + read1_size + read2_size) + 10 + modify_disk_size_gb
     )
-    Int disk_size_gb = if store_sequences
+    Int disk_size_gb = (if store_sequences
         then disk_size_gb_calculation + ceil(read1_size + read2_size)
         else disk_size_gb_calculation
+    )
+
     Int memory_gb = ceil(db_size * 2) + modify_memory_gb
 
     String out_report = prefix + ".kraken2.txt"
diff --git a/workflows/chipseq/chipseq-standard.wdl b/workflows/chipseq/chipseq-standard.wdl
index aa30759fe..5f7296726 100755
--- a/workflows/chipseq/chipseq-standard.wdl
+++ b/workflows/chipseq/chipseq-standard.wdl
@@ -8,13 +8,10 @@ import "../../tools/picard.wdl"
 import "../../tools/samtools.wdl"
 import "../../tools/util.wdl"
 import "../general/bam-to-fastqs.wdl" as b2fq
-#@ except: LineWidth
 import "https://raw.githubusercontent.com/stjude/seaseq/2.3/workflows/workflows/mapping.wdl"
     as seaseq_map
-#@ except: LineWidth
 import "https://raw.githubusercontent.com/stjude/seaseq/3.0/workflows/tasks/samtools.wdl"
     as seaseq_samtools
-#@ except: LineWidth
 import "https://raw.githubusercontent.com/stjude/seaseq/3.0/workflows/tasks/seaseq_util.wdl"
     as seaseq_util
 
diff --git a/workflows/general/alignment-post.wdl b/workflows/general/alignment-post.wdl
index 9caa5344c..618871b9d 100644
--- a/workflows/general/alignment-post.wdl
+++ b/workflows/general/alignment-post.wdl
@@ -3,7 +3,6 @@ version 1.1
 import "../../tools/md5sum.wdl"
 import "../../tools/picard.wdl"
 import "../../tools/samtools.wdl"
-#@ except: LineWidth
 import "https://raw.githubusercontent.com/stjude/XenoCP/4.0.0-alpha/wdl/workflows/xenocp.wdl"
     as xenocp_wf
 
diff --git a/workflows/qc/quality-check-standard.wdl b/workflows/qc/quality-check-standard.wdl
index 3762e2883..2f7a3bff3 100644
--- a/workflows/qc/quality-check-standard.wdl
+++ b/workflows/qc/quality-check-standard.wdl
@@ -125,7 +125,6 @@ workflow quality_check_standard {
         File bam_index
         File kraken_db
         File? gtf
-        #@ except: LineWidth
         File multiqc_config = "https://raw.githubusercontent.com/stjudecloud/workflows/main/workflows/qc/multiqc_config/multiqc_config.yaml"
         Array[File] extra_multiqc_inputs = []
         Array[File] coverage_beds = []

From c39480b3c343ae40c505c6260668f25dbb2dbf42 Mon Sep 17 00:00:00 2001
From: Ari Frantz <ari.frantz@stjude.org>
Date: Mon, 23 Mar 2026 14:17:11 -0400
Subject: [PATCH 44/47] ignore failing methylation files

---
 .github/workflows/sprocket-lint.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/sprocket-lint.yaml b/.github/workflows/sprocket-lint.yaml
index 6ce5cbb60..a7204d5b6 100644
--- a/.github/workflows/sprocket-lint.yaml
+++ b/.github/workflows/sprocket-lint.yaml
@@ -12,6 +12,7 @@ jobs:
       with:
         lint: true
         except: KnownRules
+        exclusions: methylation
     - name: Run 'sprocket format'
       uses: stjude-rust-labs/sprocket-action@feat/format
       with:

From 21a7752bc6bb25ff76fa9b8b1ab18e164dcce82c Mon Sep 17 00:00:00 2001
From: Ari Frantz <ari.frantz@stjude.org>
Date: Mon, 23 Mar 2026 14:19:36 -0400
Subject: [PATCH 45/47] ci fix

---
 .github/workflows/sprocket-lint.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/sprocket-lint.yaml b/.github/workflows/sprocket-lint.yaml
index a7204d5b6..60f48a8e5 100644
--- a/.github/workflows/sprocket-lint.yaml
+++ b/.github/workflows/sprocket-lint.yaml
@@ -12,7 +12,7 @@ jobs:
       with:
         lint: true
         except: KnownRules
-        exclusions: methylation
+        ignore-patterns: methylation
     - name: Run 'sprocket format'
       uses: stjude-rust-labs/sprocket-action@feat/format
       with:

From a7b661daeee82a1988bb2a568110da160baccc18 Mon Sep 17 00:00:00 2001
From: Ari Frantz <ari.frantz@stjude.org>
Date: Fri, 27 Mar 2026 09:43:39 -0400
Subject: [PATCH 46/47] wip: resolving bad merge

---
 tools/arriba.wdl   | 14 +++++++-------
 tools/fq.wdl       |  8 ++++----
 tools/gatk4.wdl    |  2 +-
 tools/kraken2.wdl  | 10 +++++-----
 tools/qualimap.wdl |  6 +++---
 tools/util.wdl     | 14 +++++++-------
 6 files changed, 27 insertions(+), 27 deletions(-)

diff --git a/tools/arriba.wdl b/tools/arriba.wdl
index fb5d0a149..fff44ceb4 100644
--- a/tools/arriba.wdl
+++ b/tools/arriba.wdl
@@ -199,9 +199,9 @@ task arriba {
         Int modify_disk_size_gb = 0
     }
 
-    Int bam_size_gb = ceil(size(bam, "GiB"))
-    Int disk_size_gb = bam_size_gb + ceil(size(gtf, "GiB")) + ceil(size(reference_fasta_gz,
-        "GiB")) + modify_disk_size_gb
+    Int bam_size_gb = ceil(size(bam, "GB"))
+    Int disk_size_gb = bam_size_gb + ceil(size(gtf, "GB")) + ceil(size(reference_fasta_gz,
+        "GB")) + modify_disk_size_gb
     Int memory_gb = bam_size_gb + modify_memory_gb
 
     command <<<
@@ -301,8 +301,8 @@ task arriba_tsv_to_vcf {
         Int modify_disk_size_gb = 0
     }
 
-    Int input_size_gb = ceil(size(fusions, "GiB"))
-    Int disk_size_gb = ceil(input_size_gb) + (ceil(size(reference_fasta, "GiB")) * 3) + modify_disk_size_gb
+    Int input_size_gb = ceil(size(fusions, "GB"))
+    Int disk_size_gb = ceil(input_size_gb) + (ceil(size(reference_fasta, "GB")) * 3) + modify_disk_size_gb
 
     command <<<
         set -euo pipefail
@@ -355,7 +355,7 @@ task arriba_extract_fusion_supporting_alignments {
         Int modify_disk_size_gb = 0
     }
 
-    Int input_size_gb = ceil(size(bam, "GiB"))
+    Int input_size_gb = ceil(size(bam, "GB"))
     Int disk_size_gb = ceil(input_size_gb) + 5 + modify_disk_size_gb
 
     command <<<
@@ -400,7 +400,7 @@ task arriba_annotate_exon_numbers {
         Int modify_disk_size_gb = 0
     }
 
-    Int input_size_gb = ceil(size(gtf, "GiB"))
+    Int input_size_gb = ceil(size(gtf, "GB"))
     Int disk_size_gb = ceil(input_size_gb) + 5 + modify_disk_size_gb
 
     command <<<
diff --git a/tools/fq.wdl b/tools/fq.wdl
index b353597f2..b042f0366 100755
--- a/tools/fq.wdl
+++ b/tools/fq.wdl
@@ -63,8 +63,8 @@ task fqlint {
         Int modify_disk_size_gb = 0
     }
 
-    Float read1_size = size(read_one_fastq, "GiB")
-    Float read2_size = size(read_two_fastq, "GiB")
+    Float read1_size = size(read_one_fastq, "GB")
+    Float read2_size = size(read_two_fastq, "GB")
 
     Int memory_gb = (ceil((read1_size + read2_size) * 0.25) + 1 + modify_memory_gb)
 
@@ -131,8 +131,8 @@ task subsample {
         Int modify_disk_size_gb = 0
     }
 
-    Float read1_size = size(read_one_fastq, "GiB")
-    Float read2_size = size(read_two_fastq, "GiB")
+    Float read1_size = size(read_one_fastq, "GB")
+    Float read2_size = size(read_two_fastq, "GB")
 
     Int disk_size_gb = ceil((read1_size + read2_size) * 2) + modify_disk_size_gb
 
diff --git a/tools/gatk4.wdl b/tools/gatk4.wdl
index c54a4e9f9..04f0a4a70 100644
--- a/tools/gatk4.wdl
+++ b/tools/gatk4.wdl
@@ -454,7 +454,7 @@ task mark_duplicates_spark {
         Int ncpu = 4
     }
 
-    Float bam_size = size(bam, "GiB")
+    Float bam_size = size(bam, "GB")
     Int memory_gb = min(ceil(bam_size + 15), 50) + modify_memory_gb
     Int disk_size_gb = ((if create_bam
         then ceil((bam_size * 2) + 10)
diff --git a/tools/kraken2.wdl b/tools/kraken2.wdl
index 018921bd2..98bdbaa34 100644
--- a/tools/kraken2.wdl
+++ b/tools/kraken2.wdl
@@ -165,7 +165,7 @@ task create_library_from_fastas {
 
     String db_name = "kraken2_custom_library"
 
-    Float fastas_size = size(fastas_gz, "GiB")
+    Float fastas_size = size(fastas_gz, "GB")
     Int disk_size_gb = ceil(fastas_size * 5) + 10 + modify_disk_size_gb
 
     command <<<
@@ -260,7 +260,7 @@ task build_db {
         Int modify_disk_size_gb = 0
     }
 
-    Float tarballs_size = size(tarballs, "GiB")
+    Float tarballs_size = size(tarballs, "GB")
     Int disk_size_gb = ceil(tarballs_size * 6) + 10 + modify_disk_size_gb
     Int memory_gb = ((if (max_db_size_gb > 0)
         then ceil(max_db_size_gb * 1.2)
@@ -383,9 +383,9 @@ task kraken {
         Int modify_disk_size_gb = 0
     }
 
-    Float db_size = size(db, "GiB")
-    Float read1_size = size(read_one_fastq_gz, "GiB")
-    Float read2_size = size(read_two_fastq_gz, "GiB")
+    Float db_size = size(db, "GB")
+    Float read1_size = size(read_one_fastq_gz, "GB")
+    Float read2_size = size(read_two_fastq_gz, "GB")
     Int disk_size_gb_calculation = (ceil((db_size * 2) + read1_size + read2_size) + 10 + modify_disk_size_gb
     )
     Int disk_size_gb = (if store_sequences
diff --git a/tools/qualimap.wdl b/tools/qualimap.wdl
index 4aec8447d..59f10fd15 100755
--- a/tools/qualimap.wdl
+++ b/tools/qualimap.wdl
@@ -48,8 +48,8 @@ task rnaseq {
         else ""
 
     Int java_heap_size = ceil(memory_gb * 0.9)
-    Float bam_size = size(bam, "GiB")
-    Float gtf_size = size(gtf, "GiB")
+    Float bam_size = size(bam, "GB")
+    Float gtf_size = size(gtf, "GB")
 
     # Qualimap has an inefficient name sorting algorithm and will
     # use an excessive amount of storage.
@@ -124,7 +124,7 @@ task bamqc {
 
     Int java_heap_size = ceil(memory_gb * 0.9)
 
-    Float bam_size = size(bam, "GiB")
+    Float bam_size = size(bam, "GB")
     Int disk_size_gb = ceil(bam_size) + 10 + modify_disk_size_gb
 
     command <<<
diff --git a/tools/util.wdl b/tools/util.wdl
index be141df2c..1e4f1ed92 100644
--- a/tools/util.wdl
+++ b/tools/util.wdl
@@ -110,7 +110,7 @@ task calc_feature_lengths {
         Int modify_disk_size_gb = 0
     }
 
-    Float gtf_size = size(gtf, "GiB")
+    Float gtf_size = size(gtf, "GB")
     Int disk_size_gb = ceil(gtf_size * 2) + 10 + modify_disk_size_gb
 
     command <<<
@@ -147,7 +147,7 @@ task compression_integrity {
         Int modify_disk_size_gb = 0
     }
 
-    Float file_size = size(bgzipped_file, "GiB")
+    Float file_size = size(bgzipped_file, "GB")
     Int disk_size_gb = ceil(file_size) + 10 + modify_disk_size_gb
 
     command <<<
@@ -186,7 +186,7 @@ task add_to_bam_header {
         Int modify_disk_size_gb = 0
     }
 
-    Float bam_size = size(bam, "GiB")
+    Float bam_size = size(bam, "GB")
     Int disk_size_gb = ceil(bam_size) + 10 + modify_disk_size_gb
 
     String outfile_name = prefix + ".bam"
@@ -228,7 +228,7 @@ task unpack_tarball {
         Int modify_disk_size_gb = 0
     }
 
-    Float tarball_size = size(tarball, "GiB")
+    Float tarball_size = size(tarball, "GB")
     Int disk_size_gb = ceil(tarball_size * 8) + modify_disk_size_gb
 
     command <<<
@@ -286,7 +286,7 @@ task make_coverage_regions_bed {
         Int modify_disk_size_gb = 0
     }
 
-    Float gtf_size = size(gtf, "GiB")
+    Float gtf_size = size(gtf, "GB")
     Int disk_size_gb = ceil(gtf_size * 1.2) + 10 + modify_disk_size_gb
 
     command <<<
@@ -331,7 +331,7 @@ task global_phred_scores {
         Int modify_disk_size_gb = 0
     }
 
-    Float bam_size = size(bam, "GiB")
+    Float bam_size = size(bam, "GB")
     Int disk_size_gb = ceil(bam_size) + 10 + modify_disk_size_gb
 
     String outfile_name = prefix + ".global_PHRED_scores.tsv"
@@ -437,7 +437,7 @@ task split_fastq {
         Int ncpu = 2
     }
 
-    Float fastq_size = size(fastq, "GiB")
+    Float fastq_size = size(fastq, "GB")
     Int disk_size_gb = ceil(fastq_size * 5) + 10 + modify_disk_size_gb
 
     command <<<

From b974e4ea2e4c3d27630b1adaf1f926b97e5b2935 Mon Sep 17 00:00:00 2001
From: Ari Frantz <ari.frantz@stjude.org>
Date: Fri, 27 Mar 2026 10:51:16 -0400
Subject: [PATCH 47/47] more bad merging resolutions

---
 tools/fastp.wdl                      | 16 +++++-----
 tools/fq.wdl                         | 11 ++++---
 tools/gatk4.wdl                      | 45 +++++++++++++++-------------
 tools/qualimap.wdl                   | 26 ++++++++--------
 tools/util.wdl                       | 32 ++++++++++----------
 workflows/general/bam-to-fastqs.wdl  |  3 +-
 workflows/general/samtools-merge.wdl |  3 +-
 workflows/rnaseq/rnaseq-core.wdl     |  9 ++----
 8 files changed, 70 insertions(+), 75 deletions(-)

diff --git a/tools/fastp.wdl b/tools/fastp.wdl
index 64d1ea545..2701ec7b6 100644
--- a/tools/fastp.wdl
+++ b/tools/fastp.wdl
@@ -159,17 +159,16 @@ task fastp {
         fastp \
             -i "~{read_one_fastq}" \
             ~{"-I '" + read_two_fastq + "'"} \
-            ~{(if output_fastq
-                then "-o '" + (if defined(read_two_fastq)
+            ~{if output_fastq
+                then "-o '" + if defined(read_two_fastq)
                     then "~{prefix}.R1.fastq.gz"
-                    else "~{prefix}.fastq.gz"
-                ) + "'"
+                    else "~{prefix}.fastq.gz" + "'"
                 else ""
-            )} \
-            ~{(if (defined(read_two_fastq) && output_fastq)
+            } \
+            ~{if (defined(read_two_fastq) && output_fastq)
                 then "-O '" + prefix + ".R2.fastq.gz'"
                 else ""
-            )} \
+            } \
             --reads_to_process ~{first_n_reads} \
             ~{if deduplicate
                 then "--dedup"
@@ -249,10 +248,9 @@ task fastp {
 
     runtime {
         cpu: ncpu
-        memory: (if disable_duplicate_eval
+        memory: if disable_duplicate_eval
             then "4 GB"
             else dup_acc_to_mem[duplicate_accuracy]
-        )
         disks: "~{disk_size_gb} GB"
         container: "quay.io/biocontainers/fastp:1.0.1--heae3180_0"
         maxRetries: 1
diff --git a/tools/fq.wdl b/tools/fq.wdl
index b353597f2..bffb332be 100755
--- a/tools/fq.wdl
+++ b/tools/fq.wdl
@@ -63,8 +63,8 @@ task fqlint {
         Int modify_disk_size_gb = 0
     }
 
-    Float read1_size = size(read_one_fastq, "GiB")
-    Float read2_size = size(read_two_fastq, "GiB")
+    Float read1_size = size(read_one_fastq, "GB")
+    Float read2_size = size(read_two_fastq, "GB")
 
     Int memory_gb = (ceil((read1_size + read2_size) * 0.25) + 1 + modify_memory_gb)
 
@@ -131,15 +131,14 @@ task subsample {
         Int modify_disk_size_gb = 0
     }
 
-    Float read1_size = size(read_one_fastq, "GiB")
-    Float read2_size = size(read_two_fastq, "GiB")
+    Float read1_size = size(read_one_fastq, "GB")
+    Float read2_size = size(read_two_fastq, "GB")
 
     Int disk_size_gb = ceil((read1_size + read2_size) * 2) + modify_disk_size_gb
 
-    String probability_arg = (if (probability < 1.0 && probability > 0)
+    String probability_arg = if (probability < 1.0 && probability > 0)
         then "-p ~{probability}"
         else ""
-    )
     String record_count_arg = if (record_count > 0)
         then "-n ~{record_count}"
         else ""
diff --git a/tools/gatk4.wdl b/tools/gatk4.wdl
index c54a4e9f9..a723b1d28 100644
--- a/tools/gatk4.wdl
+++ b/tools/gatk4.wdl
@@ -40,15 +40,16 @@ task split_n_cigar_reads {
     Int java_heap_size = ceil(memory_gb * 0.9)
 
     command <<<
-         set -euo pipefail
-
-         gatk \
-             --java-options "-Xms4000m -Xmx~{java_heap_size}g" \
-             SplitNCigarReads \
-             -R "~{fasta}" \
-             -I "~{bam}" \
-             -O "~{prefix}.bam" \
-             -OBM true
+        set -euo pipefail
+
+        gatk \
+            --java-options "-Xms4000m -Xmx~{java_heap_size}g" \
+            SplitNCigarReads \
+            -R "~{fasta}" \
+            -I "~{bam}" \
+            -O "~{prefix}.bam" \
+            -OBM true
+
         # GATK is unreasonable and uses the plain ".bai" suffix.
         mv "~{prefix}.bai" "~{prefix}.bam.bai"
     >>>
@@ -116,19 +117,19 @@ task base_recalibrator {
     Int disk_size_gb = ceil(size(bam, "GB") + 1) * 3 + ceil(size(fasta, "GB")) + modify_disk_size_gb
     Int java_heap_size = ceil(memory_gb * 0.9)
 
+    #@ except: LineWidth
     command <<<
         # shellcheck disable=SC2102
         gatk \
-            --java-options "-XX:GCTimeLimit=50 -XX:GCHeapFreeLimit=10 -Xms4000m -Xmx~{
-                java_heap_size
-            }g" \
+            --java-options \
+                "-XX:GCTimeLimit=50 -XX:GCHeapFreeLimit=10 -Xms4000m -Xmx~{java_heap_size}g" \
             BaseRecalibratorSpark \
             -R "~{fasta}" \
             -I "~{bam}" \
-            ~{(if use_original_quality_scores
+            ~{if use_original_quality_scores
                 then "--use-original-qualities"
                 else ""
-            )} \
+            } \
             -O "~{outfile_name}" \
             --known-sites "~{dbSNP_vcf}" \
             ~{sep(" ", prefix("--known-sites ", squote(known_indels_sites_vcfs)))} \
@@ -183,14 +184,14 @@ task apply_bqsr {
     Int disk_size_gb = ceil(size(bam, "GB") * 2) + 30 + modify_disk_size_gb
     Int java_heap_size = ceil(memory_gb * 0.9)
 
+    #@ except: LineWidth
     command <<<
         set -euo pipefail
 
         # shellcheck disable=SC2102
         gatk \
-            --java-options "-XX:GCTimeLimit=50 -XX:GCHeapFreeLimit=10 -Xms3000m -Xmx~{
-                java_heap_size
-            }g" \
+            --java-options \
+                "-XX:GCTimeLimit=50 -XX:GCHeapFreeLimit=10 -Xms3000m -Xmx~{java_heap_size}g" \
             ApplyBQSRSpark \
             --spark-master local[~{ncpu}] \
             -I "~{bam}" \
@@ -271,9 +272,11 @@ task haplotype_caller {
     Int disk_size_gb = ceil(size(bam, "GB") * 2) + 30 + ceil(size(fasta, "GB")) + modify_disk_size_gb
     Int java_heap_size = ceil(memory_gb * 0.9)
 
+    #@ except: LineWidth
     command <<<
         gatk \
-           --java-options "-Xms6000m -Xmx~{java_heap_size}g -XX:GCTimeLimit=50 -XX:GCHeapFreeLimit=10" \
+            --java-options \
+                "-Xms6000m -Xmx~{java_heap_size}g -XX:GCTimeLimit=50 -XX:GCHeapFreeLimit=10" \
             HaplotypeCaller \
             -R "~{fasta}" \
             -I "~{bam}" \
@@ -454,12 +457,12 @@ task mark_duplicates_spark {
         Int ncpu = 4
     }
 
-    Float bam_size = size(bam, "GiB")
+    Float bam_size = size(bam, "GB")
     Int memory_gb = min(ceil(bam_size + 15), 50) + modify_memory_gb
-    Int disk_size_gb = ((if create_bam
+    Int disk_size_gb = (if create_bam
         then ceil((bam_size * 2) + 10)
         else ceil(bam_size + 10)
-    ) + modify_disk_size_gb)
+    ) + modify_disk_size_gb
 
     Int java_heap_size = ceil(memory_gb * 0.9)
 
diff --git a/tools/qualimap.wdl b/tools/qualimap.wdl
index 4aec8447d..498bdc797 100755
--- a/tools/qualimap.wdl
+++ b/tools/qualimap.wdl
@@ -40,23 +40,23 @@ task rnaseq {
     }
 
     String out_tar_gz = prefix + ".tar.gz"
-    String name_sorted_arg = if (name_sorted)
+    String name_sorted_arg = if name_sorted
         then "-s"
         else ""
-    String paired_end_arg = if (paired_end)
+    String paired_end_arg = if paired_end
         then "-pe"
         else ""
 
     Int java_heap_size = ceil(memory_gb * 0.9)
-    Float bam_size = size(bam, "GiB")
-    Float gtf_size = size(gtf, "GiB")
+    Float bam_size = size(bam, "GB")
+    Float gtf_size = size(gtf, "GB")
 
     # Qualimap has an inefficient name sorting algorithm and will
     # use an excessive amount of storage.
-    Int disk_size_gb = ((if name_sorted
+    Int disk_size_gb = (if name_sorted
         then ceil(bam_size + gtf_size + 15)
         else ceil(((bam_size + gtf_size) * 12) + 10)
-    ) + modify_disk_size_gb)
+    ) + modify_disk_size_gb
 
     command <<<
         set -euo pipefail
@@ -68,12 +68,12 @@ task rnaseq {
         # '-oc qualimap_counts.txt' puts the file in '-outdir'
         # shellcheck disable=SC2086
         qualimap rnaseq -bam "~{bam}" \
-                        -oc qualimap_counts.txt \
-                        -gtf "$gtf_name" \
-                        -outdir "~{prefix}" \
-                        ~{name_sorted_arg} \
-                        ~{paired_end_arg} \
-                        --java-mem-size=~{java_heap_size}G
+            -oc qualimap_counts.txt \
+            -gtf "$gtf_name" \
+            -outdir "~{prefix}" \
+            ~{name_sorted_arg} \
+            ~{paired_end_arg} \
+            --java-mem-size=~{java_heap_size}G
         rm "$gtf_name"
 
         tar -czf "~{out_tar_gz}" "~{prefix}"
@@ -124,7 +124,7 @@ task bamqc {
 
     Int java_heap_size = ceil(memory_gb * 0.9)
 
-    Float bam_size = size(bam, "GiB")
+    Float bam_size = size(bam, "GB")
     Int disk_size_gb = ceil(bam_size) + 10 + modify_disk_size_gb
 
     command <<<
diff --git a/tools/util.wdl b/tools/util.wdl
index be141df2c..e1511d3e7 100644
--- a/tools/util.wdl
+++ b/tools/util.wdl
@@ -40,7 +40,7 @@ task download {
 
     runtime {
         disks: "~{disk_size_gb} GB"
-        container: "ghcr.io/stjudecloud/util:3.0.1"
+        container: "ghcr.io/stjudecloud/util:3.0.3"
         maxRetries: 1
     }
 }
@@ -79,7 +79,7 @@ task split_string {
     }
 
     runtime {
-        container: "ghcr.io/stjudecloud/util:3.0.1"
+        container: "ghcr.io/stjudecloud/util:3.0.3"
         maxRetries: 1
     }
 }
@@ -110,7 +110,7 @@ task calc_feature_lengths {
         Int modify_disk_size_gb = 0
     }
 
-    Float gtf_size = size(gtf, "GiB")
+    Float gtf_size = size(gtf, "GB")
     Int disk_size_gb = ceil(gtf_size * 2) + 10 + modify_disk_size_gb
 
     command <<<
@@ -127,7 +127,7 @@ task calc_feature_lengths {
     runtime {
         memory: "16 GB"
         disks: "~{disk_size_gb} GB"
-        container: "ghcr.io/stjudecloud/util:3.0.1"
+        container: "ghcr.io/stjudecloud/util:3.0.3"
         maxRetries: 1
     }
 }
@@ -147,7 +147,7 @@ task compression_integrity {
         Int modify_disk_size_gb = 0
     }
 
-    Float file_size = size(bgzipped_file, "GiB")
+    Float file_size = size(bgzipped_file, "GB")
     Int disk_size_gb = ceil(file_size) + 10 + modify_disk_size_gb
 
     command <<<
@@ -186,7 +186,7 @@ task add_to_bam_header {
         Int modify_disk_size_gb = 0
     }
 
-    Float bam_size = size(bam, "GiB")
+    Float bam_size = size(bam, "GB")
     Int disk_size_gb = ceil(bam_size) + 10 + modify_disk_size_gb
 
     String outfile_name = prefix + ".bam"
@@ -228,7 +228,7 @@ task unpack_tarball {
         Int modify_disk_size_gb = 0
     }
 
-    Float tarball_size = size(tarball, "GiB")
+    Float tarball_size = size(tarball, "GB")
     Int disk_size_gb = ceil(tarball_size * 8) + modify_disk_size_gb
 
     command <<<
@@ -246,7 +246,7 @@ task unpack_tarball {
 
     runtime {
         disks: "~{disk_size_gb} GB"
-        container: "ghcr.io/stjudecloud/util:3.0.1"
+        container: "ghcr.io/stjudecloud/util:3.0.3"
         maxRetries: 1
     }
 }
@@ -286,7 +286,7 @@ task make_coverage_regions_bed {
         Int modify_disk_size_gb = 0
     }
 
-    Float gtf_size = size(gtf, "GiB")
+    Float gtf_size = size(gtf, "GB")
     Int disk_size_gb = ceil(gtf_size * 1.2) + 10 + modify_disk_size_gb
 
     command <<<
@@ -331,7 +331,7 @@ task global_phred_scores {
         Int modify_disk_size_gb = 0
     }
 
-    Float bam_size = size(bam, "GiB")
+    Float bam_size = size(bam, "GB")
     Int disk_size_gb = ceil(bam_size) + 10 + modify_disk_size_gb
 
     String outfile_name = prefix + ".global_PHRED_scores.tsv"
@@ -353,7 +353,7 @@ task global_phred_scores {
     runtime {
         memory: "4 GB"
         disks: "~{disk_size_gb} GB"
-        container: "ghcr.io/stjudecloud/util:3.0.1"
+        container: "ghcr.io/stjudecloud/util:3.0.3"
         maxRetries: 1
     }
 }
@@ -394,15 +394,15 @@ task check_fastq_and_rg_concordance {
     command <<<
         python3 /scripts/util/check_FQs_and_RGs.py \
             --read-one-fastqs "~{sep(",", read_one_names)}" \
-            ~{(if length(read_twos) > 0
+            ~{if length(read_twos) > 0
                 then "--read-two-fastqs \"" + sep(",", squote(read_twos)) + "\""
                 else ""
-            )} \
+            } \
             --read-groups "~{sep(",", read_groups)}"
     >>>
 
     runtime {
-        container: "ghcr.io/stjudecloud/util:3.0.1"
+        container: "ghcr.io/stjudecloud/util:3.0.3"
         maxRetries: 1
     }
 }
@@ -437,7 +437,7 @@ task split_fastq {
         Int ncpu = 2
     }
 
-    Float fastq_size = size(fastq, "GiB")
+    Float fastq_size = size(fastq, "GB")
     Int disk_size_gb = ceil(fastq_size * 5) + 10 + modify_disk_size_gb
 
     command <<<
@@ -462,7 +462,7 @@ task split_fastq {
         cpu: ncpu
         memory: "4 GB"
         disks: "~{disk_size_gb} GB"
-        container: "ghcr.io/stjudecloud/util:3.0.1"
+        container: "ghcr.io/stjudecloud/util:3.0.3"
         maxRetries: 1
     }
 }
diff --git a/workflows/general/bam-to-fastqs.wdl b/workflows/general/bam-to-fastqs.wdl
index 573db7c7d..e5bcbedc9 100644
--- a/workflows/general/bam-to-fastqs.wdl
+++ b/workflows/general/bam-to-fastqs.wdl
@@ -67,10 +67,9 @@ workflow bam_to_fastqs {
     }
 
     output {
-        Array[File] read1s = (if paired_end
+        Array[File] read1s = if paired_end
             then select_all(bam_to_fastq.read_one_fastq_gz)
             else select_all(bam_to_fastq.single_end_reads_fastq_gz)
-        )
         Array[File?] read2s = bam_to_fastq.read_two_fastq_gz
     }
 }
diff --git a/workflows/general/samtools-merge.wdl b/workflows/general/samtools-merge.wdl
index fe6825278..de9053218 100644
--- a/workflows/general/samtools-merge.wdl
+++ b/workflows/general/samtools-merge.wdl
@@ -35,10 +35,9 @@ workflow samtools_merge {
         scatter (merge_num in range((bam_length / max_length) + 1)) {
             # Get the sublist of bams
             scatter (bam_num in range(max_length)) {
-                Int num = (if merge_num > 0
+                Int num = if merge_num > 0
                     then bam_num + (merge_num * max_length)
                     else bam_num
-                )
                 if (num < bam_length) {
                     File bam_list = bams[num]
                 }
diff --git a/workflows/rnaseq/rnaseq-core.wdl b/workflows/rnaseq/rnaseq-core.wdl
index afad29046..5dbf385f5 100644
--- a/workflows/rnaseq/rnaseq-core.wdl
+++ b/workflows/rnaseq/rnaseq-core.wdl
@@ -199,14 +199,12 @@ workflow rnaseq_core {
         }
     }
 
-    Array[File] chosen_r1s = (if enable_read_trimming
+    Array[File] chosen_r1s = if enable_read_trimming
         then select_all(trim.read_one_fastq_gz)
         else read_one_fastqs_gz
-    )
-    Array[File] chosen_r2s = (if enable_read_trimming
+    Array[File] chosen_r2s = if enable_read_trimming
         then select_all(trim.read_two_fastq_gz)
         else read_two_fastqs_gz
-    )
 
     call star.alignment after validate { input:
         read_one_fastqs_gz = chosen_r1s,
@@ -248,10 +246,9 @@ workflow rnaseq_core {
         gene_model = gtf,
     }
 
-    String htseq_strandedness = (if (provided_strandedness != "")
+    String htseq_strandedness = if (provided_strandedness != "")
         then htseq_strandedness_mapping[provided_strandedness]
         else htseq_strandedness_mapping[ngsderive_strandedness.strandedness_string]
-    )
 
     call htseq.count as htseq_count { input:
         bam = alignment_post.processed_bam,