diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
index 3b4ec9ac40da235c2b2e4e6a682442a6dd5d4832..372071eedba04dd24461a81b4806096474a348ad 100644
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -2,3 +2,4 @@
 - [ ] Pull request details were added to CHANGELOG.md.
 - [ ] Documentation was updated (if required).
 - [ ] `parameter_meta` was added/updated (if required).
+- [ ] Submodule branches are on develop or a tagged commit.
diff --git a/.github/lint-environment.yml b/.github/lint-environment.yml
new file mode 100644
index 0000000000000000000000000000000000000000..63b538fce9790456e089e63ddda41859a8ba9f06
--- /dev/null
+++ b/.github/lint-environment.yml
@@ -0,0 +1,9 @@
+name: biowdl-lint
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+dependencies:
+  - cromwell
+  - wdl-aid
+  - miniwdl
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
deleted file mode 100644
index 785661119a9f3d1ca173a9c92a64a52bbf930328..0000000000000000000000000000000000000000
--- a/.github/workflows/ci.yml
+++ /dev/null
@@ -1,30 +0,0 @@
-name: Continuous integration
-
-on: 
-  pull_request:
-    paths_ignore:
-      - "docs/**"
-
-defaults:
-  run:
-    # This is needed for miniconda, see:
-    # https://github.com/marketplace/actions/setup-miniconda#important
-    shell: bash -l {0}  
-
-jobs:
-  lint:
-    runs-on: ubuntu-latest
-    name: Womtool validate and submodule up to date.
-    steps:
-      - uses: actions/checkout@v2.3.4
-        with:
-          submodules: recursive
-      - name: install miniconda
-        uses: conda-incubator/setup-miniconda@v2.0.1
-        with:
-          channels: conda-forge,bioconda,defaults
-        # Conda-incubator uses 'test' environment by default.
-      - name: install requirements
-        run: conda install -n test cromwell miniwdl wdl-aid
-      - name: run linting
-        run: bash scripts/biowdl_lint.sh
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
new file mode 100644
index 0000000000000000000000000000000000000000..7ef19e58f4bc9c209c3cfffa3ea51359c9afe069
--- /dev/null
+++ b/.github/workflows/lint.yml
@@ -0,0 +1,93 @@
+name: Linting
+
+on: 
+  pull_request:
+    paths_ignore:
+      - "docs/**"
+
+defaults:
+  run:
+    # This is needed for miniconda, see:
+    # https://github.com/marketplace/actions/setup-miniconda#important
+    shell: bash -l {0}  
+
+jobs:
+  lint:
+    runs-on: ubuntu-latest
+    name: Linting checks
+    steps:
+      - uses: actions/checkout@v2.3.4
+        with:
+          submodules: recursive
+      
+      - name: Set cache date
+        run: echo "DATE=$(date +'%Y%m%d')" >> $GITHUB_ENV
+
+      - name: Cache conda environment
+        # Use an always upload cache to prevent solving conda environment again and again on failing linting.
+        uses: pat-s/always-upload-cache@v2.1.5
+        env:
+          # Increase this value to manually invalidate the cache
+          CACHE_NUMBER: 0
+        with:
+          path: /usr/share/miniconda/envs/biowdl-lint
+          key:
+            ${{runner.os}}-biowdl-lint-${{ env.CACHE_NUMBER }}-${{env.DATE}}-${{ hashFiles('.github/lint-environment.yml') }}
+        id: env_cache
+      
+      # Use the builtin conda. This is the fastest installation. It may not be
+      # the fastest for resolving, but the package cache mitigates that problem.
+      # Since this installs fastest, it is fastest for all runs where a cache
+      # hit occurs.
+      - name: install miniconda
+        uses: conda-incubator/setup-miniconda@v2.1.1
+        with:
+          channels: conda-forge,bioconda,defaults
+          channel-priority: strict
+          auto-activate-base: false
+          use-only-tar-bz2: true  # Needed for proper caching according to the documentation.
+        # activate-environment is broken! This always seems to create a new environment.
+        # Activation is therefore done separately.
+      
+      - name: Create test environment if no cache is present
+        run: conda env create -n biowdl-lint -f .github/lint-environment.yml
+        if: steps.env_cache.outputs.cache-hit != 'true'
+      
+      - name: Activate test environment
+        # The new PATH should be passed to the environment, otherwise it won't register.
+        run: |
+          conda activate biowdl-lint
+          echo "PATH=$PATH" >> $GITHUB_ENV
+      
+      - name: Fetch develop branch for comparisons
+        run: git fetch --depth=1 origin develop
+
+      - name: run womtool validate
+        # Only check files that have changed from the base reference.
+        # Womtool validate checks very slowly, so this saves a lot of time.
+        run: |
+          set -x
+          for WDL_FILE in $(git diff --name-only origin/${{github.base_ref}} | grep -E '*.wdl$'); do
+            womtool validate $WDL_FILE
+          done
+      - name: run miniwdl check
+        run: |
+          set -x
+          bash -c 'miniwdl check $(git ls-files *.wdl)'
+
+      - name: Check copyright headers
+        run: | 
+          set -x
+          for WDL_FILE in $(git diff --name-only origin/${{github.base_ref}} | grep -E '*.wdl$'); do
+            grep Copyright $WDL_FILE || bash -c "echo No copyright header in $WDL_FILE && exit 1"
+          done
+      - name: Check parameter_meta for inputs
+        run: |
+          set -x
+          for WDL_FILE in $(git diff --name-only origin/${{github.base_ref}} | grep -E '*.wdl$'); do
+            wdl-aid --strict $WDL_FILE > /dev/null 2> wdl-aid_stderr || 
+            if grep -z  'ValueError: Missing parameter_meta for inputs:' wdl-aid_stderr
+            then
+                exit 1
+            fi
+          done
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6c0db94722449604660b6d39e98957fd6380da93..986582ddcd7fef050f497a352b5af3f581f8a13e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,6 +9,14 @@ that users understand how the changes affect the new version.
 -->
 version 5.1.0-dev
 ---------------------------
++ Added a task to add SVTYPE annotations to GRIDSS results
+  (`AnnotateSvTypes`).
++ The GRIDSS task will now run tabix separately if GRIDSS doesn't
+  produce a vcf index.
++ Add a script to subtract UMI's from the read name and add them as
+  a BAM tag for each BAM record. The script is in umi.BamReadNameToUmiTag.
++ Add fgbio.AnnotateBamWithUmis.
++ Add picard.UmiAwareMarkDuplicatesWithMateCigar.
 + Added a task for SnpEff.
 + Adjusted runtime settings for sambamba Markdup.
 + Added a task for sambamba Flagstat.
@@ -28,7 +36,7 @@ version 5.1.0-dev
   + Sage
   + VirusInterpreter
 + Added a task for VirusBreakend.
-+ Added a task for GridssAnnotateVcfRepeatmasker. 
++ Added a task for GridssAnnotateVcfRepeatmasker.
 + Bumped GRIDSS version to 2.12.2.
 + Adjusted GRIDSS runtime settings.
 + Added optional inputs to GRIDSS:
@@ -147,7 +155,7 @@ version 4.0.0
 + Picard MergeVcf now uses compression level 1 by default.
 + bwa mem, bwa mem+kit and hisat2 have their samtools sort threads tweaked. The
   number of threads is now related to the number of threads on the aligner.
-  Using more threads reduces the chance of the samtools sort pipe getting 
+  Using more threads reduces the chance of the samtools sort pipe getting
   blocked if it's full.
 + Renamed a few inputs in centrifuge.wdl, isoseq3.wdl, talon.wdl,
   transcriptclean.wdl to be more descriptive.
diff --git a/bcftools.wdl b/bcftools.wdl
index 88d97cd09465fee1384edc6b498504bbd2adf917..2bf1c7323dad2b7c45ba1b91380e4f2396d80730 100644
--- a/bcftools.wdl
+++ b/bcftools.wdl
@@ -186,8 +186,8 @@ task Sort {
         String outputPath = "output.vcf.gz"
         String tmpDir = "./sorting-tmp"
 
-        String memory = "256M"
-        Int timeMinutes = 1 + ceil(size(inputFile, "G"))
+        String memory = "5G"
+        Int timeMinutes = 1 + ceil(size(inputFile, "G")) * 5
         String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2"
     }
 
diff --git a/bwa.wdl b/bwa.wdl
index 1cb170b70b5dc19321ea6094a31aa53418e6aab8..373de6280552d55006d1468af5c67915fd9ca787 100644
--- a/bwa.wdl
+++ b/bwa.wdl
@@ -94,6 +94,7 @@ task Mem {
         outputPrefix: {description: "The prefix of the output files, including any parent directories.", category: "required"}
         sixtyFour: {description: "Whether or not the index uses the '.64' suffixes.", category: "common"}
         usePostalt: {description: "Whether to use the postalt script from bwa kit."}
+        useSoftclippingForSupplementary: {description: "Use soft-clipping for supplementary alignments instead of hard-clipping", category: "common"}
         sortMemoryPerThreadGb: {description: "The amount of memory for each sorting thread in gigabytes.", category: "advanced"}
         compressionLevel: {description: "The compression level of the output BAM.", category: "advanced"}
         readgroup: {description: "A readgroup identifier.", category: "common"}
diff --git a/fgbio.wdl b/fgbio.wdl
new file mode 100644
index 0000000000000000000000000000000000000000..d50906d33ef5cfd62ad004561039379c074d15ab
--- /dev/null
+++ b/fgbio.wdl
@@ -0,0 +1,68 @@
+version 1.0
+
+# Copyright (c) 2017 Leiden University Medical Center
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+task AnnotateBamWithUmis {
+    input {
+        File inputBam
+        File inputUmi
+        String outputPath
+
+        String memory = "120G"
+        Int timeMinutes = 360
+        String javaXmx="100G"
+        String dockerImage = "quay.io/biocontainers/fgbio:1.4.0--hdfd78af_0"
+    }
+
+    command {
+        set -e 
+        mkdir -p "$(dirname ~{outputPath})"
+        fgbio -Xmx~{javaXmx} \
+        AnnotateBamWithUmis \
+        -i ~{inputBam} \
+        -f ~{inputUmi} \
+        -o ~{outputPath} 
+    }
+
+    output {
+        File outputBam = outputPath
+    }
+
+    runtime {
+        memory: memory
+        time_minutes: timeMinutes
+        docker: dockerImage
+    }
+
+    parameter_meta {
+        # inputs
+        inputBam: {description: "The input BAM file.", category: "required"}
+        inputUmi: {description: "The input fastq file with UMIs.", category: "required"}
+        outputPath: {description: "Output directory path + output file.", category: "required"}
+        javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"}
+        memory: {description: "The amount of memory this job will use.", category: "advanced"}
+        timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
+        dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+        # outputs
+        outputBam: {description: "UMI-annotated output BAM file."}
+    }
+}
diff --git a/gridss.wdl b/gridss.wdl
index d3d251a5b2a811d38db2776784e2a41a60f1578c..35e41d21d6cc067384c5f73e99544ffb982d1574 100644
--- a/gridss.wdl
+++ b/gridss.wdl
@@ -79,6 +79,74 @@ task AnnotateInsertedSequence {
     }
 }
 
+task AnnotateSvTypes {
+    input {
+        File gridssVcf
+        File gridssVcfIndex
+        String outputPath = "./gridss.svtyped.vcf.bgz"
+
+        String memory = "32G"
+        String dockerImage = "quay.io/biocontainers/bioconductor-structuralvariantannotation:1.10.0--r41hdfd78af_0"
+        Int timeMinutes = 240
+    }
+
+    String effectiveOutputPath = sub(outputPath, "\\.bgz", "")
+    String index = if effectiveOutputPath != outputPath then "T" else "F"
+
+
+    # Based on https://github.com/PapenfussLab/gridss/issues/74
+    command <<<
+        set -e
+        mkdir -p "$(dirname ~{outputPath})"
+        R --vanilla << "EOF"
+        library(VariantAnnotation)
+        library(StructuralVariantAnnotation)
+
+        vcf_path <- "~{gridssVcf}"
+        out_path <- "~{effectiveOutputPath}"
+
+        # Simple SV type classifier
+        simpleEventType <- function(gr) {
+          return(ifelse(seqnames(gr) != seqnames(partner(gr)), "BND", # inter-chromosomosal
+                  ifelse(gr$insLen >= abs(gr$svLen) * 0.7, "INS",
+                   ifelse(strand(gr) == strand(partner(gr)), "INV",
+                    ifelse(xor(start(gr) < start(partner(gr)), strand(gr) == "-"), "DEL",
+                     "DUP")))))
+        }
+
+        header <- scanVcfHeader(vcf_path)
+        vcf <- readVcf(vcf_path, seqinfo(header))
+        gr <- breakpointRanges(vcf)
+        svtype <- simpleEventType(gr)
+        info(vcf[gr$sourceId])$SVTYPE <- svtype
+        # GRIDSS doesn't supply a GT, so we estimate GT based on AF (assuming CN of 2, might be inaccurate)
+        geno(vcf)$GT <- ifelse(geno(vcf)$AF > 0.75, "1/1", ifelse(geno(vcf)$AF < 0.25, "0/0", "0/1"))
+        writeVcf(vcf, out_path, index=~{index})
+        EOF
+    >>>
+
+    output {
+        File vcf = outputPath
+        File? vcfIndex = outputPath + ".tbi"
+    }
+
+    runtime {
+        memory: memory
+        time_minutes: timeMinutes # !UnknownRuntimeKey
+        docker: dockerImage
+    }
+
+    parameter_meta {
+        gridssVcf: {description: "The VCF produced by GRIDSS.", category: "required"}
+        gridssVcfIndex: {description: "The index for the VCF produced by GRIDSS.", category: "required"}
+        outputPath: {description: "The path the output should be written to.",  category: "common"}
+        memory: {description: "The amount of memory this job will use.", category: "advanced"}
+        timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
+        dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
+                      category: "advanced"}
+    }
+}
+
 task GRIDSS {
     input {
         File tumorBam
@@ -116,6 +184,12 @@ task GRIDSS {
         ~{normalBam} \
         ~{tumorBam}
         samtools index ~{outputPrefix}_assembly.bam ~{outputPrefix}_assembly.bai
+
+        # For some reason the VCF index is sometimes missing
+        if [ ! -e ~{outputPrefix}.vcf.gz.tbi ]
+          then
+            tabix ~{outputPrefix}.vcf.gz
+        fi
     }
 
     output {
diff --git a/macs2.wdl b/macs2.wdl
index eb71ac1dd8ba5fe7e165068d97ecfb870d1befa6..2afe3bbee058473155d9b472145bd1fb7306f6a5 100644
--- a/macs2.wdl
+++ b/macs2.wdl
@@ -28,6 +28,7 @@ task PeakCalling {
         Array[File] controlBamsIndex
         String outDir = "macs2"
         String sampleName
+        String format = "AUTO"
         Boolean nomodel = false
         Int timeMinutes = 600  # Default to 10 hours
         String memory = "8G"
@@ -41,6 +42,7 @@ task PeakCalling {
         ~{true="--control" false="" length(controlBams) > 0} ~{sep = ' ' controlBams} \
         --outdir ~{outDir} \
         --name ~{sampleName} \
+        -f ~{format} \
         ~{true='--nomodel' false='' nomodel}
     }
 
@@ -65,6 +67,6 @@ task PeakCalling {
         memory: {description: "The amount of memory this job will use.", category: "advanced"}
         timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
         dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
-
+        format: {description: "Which format to use. Use BAMPE for paired-end reads.", category: "common"}
     }
 }
diff --git a/picard.wdl b/picard.wdl
index 9a935045bd9804ce37f7773b5f8db480fa3563b2..3d835829f4c54c3a20ee162cf8d115cea3d0b3ac 100644
--- a/picard.wdl
+++ b/picard.wdl
@@ -29,7 +29,7 @@ task BedToIntervalList {
         String javaXmx = "3G"
         String memory = "4G"
         Int timeMinutes = 5
-        String dockerImage = "quay.io/biocontainers/picard:2.23.8--0"
+        String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0"
     }
 
     command {
@@ -89,7 +89,7 @@ task CollectHsMetrics {
         # Additional * 2 because picard multiple metrics reads the
         # reference fasta twice.
         Int timeMinutes = 1 + ceil(size(referenceFasta, "G") * 3 * 2) + ceil(size(inputBam, "G") * 6)
-        String dockerImage = "quay.io/biocontainers/picard:2.23.8--0"
+        String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0"
     }
 
     command {
@@ -158,7 +158,7 @@ task CollectMultipleMetrics {
         Int memoryMb = javaXmxMb + 512
         # Additional * 2 because picard multiple metrics reads the reference fasta twice.
         Int timeMinutes = 1 + ceil(size(referenceFasta, "G") * 3 * 2) + ceil(size(inputBam, "G") * 6)
-        String dockerImage = "quay.io/biocontainers/picard:2.23.8--0"
+        String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0"
     }
 
     command {
@@ -284,7 +284,7 @@ task CollectRnaSeqMetrics {
         String memory = "9G"
         # With 6 minutes per G there were several timeouts.
         Int timeMinutes = 1 + ceil(size(inputBam, "G") * 12)
-        String dockerImage = "quay.io/biocontainers/picard:2.23.8--0"
+        String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0"
     }
 
     command {
@@ -342,7 +342,7 @@ task CollectTargetedPcrMetrics {
         String javaXmx = "3G"
         String memory = "4G"
         Int timeMinutes = 1 + ceil(size(inputBam, "G") * 6)
-        String dockerImage = "quay.io/biocontainers/picard:2.23.8--0"
+        String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0"
     }
 
     command {
@@ -404,7 +404,7 @@ task CollectVariantCallingMetrics {
         String javaXmx =  "8G"
         String memory = "9G"
         Int timeMinutes = 1440
-        String dockerImage = "quay.io/biocontainers/picard:2.23.8--0"
+        String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0"
     }
 
     command {
@@ -517,7 +517,7 @@ task CreateSequenceDictionary {
 
         String javaXmx = "2G"
         String memory = "3G"
-        String dockerImage = "quay.io/biocontainers/picard:2.23.8--0"
+        String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0"
     }
 
     command {
@@ -561,13 +561,15 @@ task GatherBamFiles {
         String outputBamPath
         Boolean createMd5File = false
 
-        Int? compressionLevel
+        Int compressionLevel = 1
+        Boolean useJdkInflater = false
+        Boolean useJdkDeflater = true  # Achieves much better compression rates than the intel deflater
 
         Int javaXmxMb = 1024
         Int memoryMb = javaXmxMb + 512
         # One minute per input gigabyte.
         Int timeMinutes = 1 + ceil(size(inputBams, "G") * 1)
-        String dockerImage = "quay.io/biocontainers/picard:2.23.8--0"
+        String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0"
     }
 
     command {
@@ -577,7 +579,9 @@ task GatherBamFiles {
         GatherBamFiles \
         INPUT=~{sep=' INPUT=' inputBams} \
         OUTPUT=~{outputBamPath} \
-        ~{"COMPRESSION_LEVEL=" + compressionLevel} \
+        COMPRESSION_LEVEL=~{compressionLevel} \
+        USE_JDK_INFLATER=~{true="true" false="false" useJdkInflater} \
+        USE_JDK_DEFLATER=~{true="true" false="false" useJdkDeflater} \
         CREATE_INDEX=true \
         CREATE_MD5_FILE=~{true="true" false="false" createMd5File}
     }
@@ -600,7 +604,9 @@ task GatherBamFiles {
         inputBamsIndex: {description: "The indexes of the input BAM files.", category: "required"}
         outputBamPath: {description: "The path where the merged BAM file will be written.", caregory: "required"}
         createMd5File: {decription: "Whether to create an md5 file of the output BAM.", category: "advanced"}
-        compressionLevel: {description: "The compression level of the output BAM.", category: "advanced"}
+        compressionLevel: {description: "The compression level at which the BAM files are written.", category: "advanced"}
+        useJdkInflater: {description: "True, uses the java inflater. False, uses the optimized intel inflater.", category: "advanced"}
+        useJdkDeflater: {description: "True, uses the java deflator to compress the BAM files. False uses the optimized intel deflater.", category: "advanced"}
         javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", category: "advanced"}
         memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"}
         timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
@@ -619,10 +625,14 @@ task GatherVcfs {
         Array[File]+ inputVcfIndexes
         String outputVcfPath = "out.vcf.gz"
 
+        Int compressionLevel = 1
+        Boolean useJdkInflater = false
+        Boolean useJdkDeflater = true  # Achieves much better compression rates than the intel deflater
+
         String javaXmx = "4G"
         String memory = "5G"
         Int timeMinutes = 1 + ceil(size(inputVcfs, "G") * 2)
-        String dockerImage = "quay.io/biocontainers/picard:2.23.8--0"
+        String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0"
     }
 
     command {
@@ -630,6 +640,10 @@ task GatherVcfs {
         mkdir -p "$(dirname ~{outputVcfPath})"
         picard -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \
         GatherVcfs \
+        COMPRESSION_LEVEL=~{compressionLevel} \
+        USE_JDK_INFLATER=~{true="true" false="false" useJdkInflater} \
+        USE_JDK_DEFLATER=~{true="true" false="false" useJdkDeflater} \
+        CREATE_INDEX=true \
         INPUT=~{sep=' INPUT=' inputVcfs} \
         OUTPUT=~{outputVcfPath}
     }
@@ -654,6 +668,10 @@ task GatherVcfs {
         timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
         dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
 
+        compressionLevel: {description: "The compression level at which the BAM files are written.", category: "advanced"}
+        useJdkInflater: {description: "True, uses the java inflater. False, uses the optimized intel inflater.", category: "advanced"}
+        useJdkDeflater: {description: "True, uses the java deflator to compress the BAM files. False uses the optimized intel deflater.", category: "advanced"}
+
         # outputs
         outputVcf: {description: "Multiple VCF files gathered into one file."}
     }
@@ -665,14 +683,11 @@ task MarkDuplicates {
         Array[File]+ inputBams
         String outputBamPath
         String metricsPath
-        Int compressionLevel = 1
         Boolean createMd5File = false
-        Boolean useJdkInflater = true  # Slightly faster than the intel one.
-        # Better results for compression level 1 (much smaller).
-        # Higher compression levels similar to intel deflater.
-        # NOTE: this might change in the future when the intel
-        # deflater is updated!
-        Boolean useJdkDeflater = true
+        
+        Int compressionLevel = 1
+        Boolean useJdkInflater = false
+        Boolean useJdkDeflater = true  # Achieves much better compression rates than the intel deflater
 
         # The program default for READ_NAME_REGEX is appropriate in nearly every case.
         # Sometimes we wish to supply "null" in order to turn off optical duplicate detection.
@@ -686,7 +701,7 @@ task MarkDuplicates {
         String memoryMb = javaXmxMb + 512
 
         Int timeMinutes = 1 + ceil(size(inputBams, "G") * 8)
-        String dockerImage = "quay.io/biocontainers/picard:2.23.8--0"
+        String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0"
     }
 
     # Task is assuming query-sorted input so that the Secondary and Supplementary reads get
@@ -702,6 +717,8 @@ task MarkDuplicates {
         OUTPUT=~{outputBamPath} \
         METRICS_FILE=~{metricsPath} \
         COMPRESSION_LEVEL=~{compressionLevel} \
+        USE_JDK_INFLATER=~{true="true" false="false" useJdkInflater} \
+        USE_JDK_DEFLATER=~{true="true" false="false" useJdkDeflater} \
         VALIDATION_STRINGENCY=SILENT \
         ~{"READ_NAME_REGEX=" + read_name_regex} \
         OPTICAL_DUPLICATE_PIXEL_DISTANCE=2500 \
@@ -709,8 +726,6 @@ task MarkDuplicates {
         CREATE_INDEX=true \
         ADD_PG_TAG_TO_READS=false \
         CREATE_MD5_FILE=~{true="true" false="false" createMd5File} \
-        USE_JDK_INFLATER=~{true="true" false="false" useJdkInflater} \
-        USE_JDK_DEFLATER=~{true="true" false="false" useJdkDeflater}
     }
 
     output {
@@ -732,9 +747,9 @@ task MarkDuplicates {
         outputBamPath: {description: "The location where the ouptut BAM file should be written.", category: "required"}
         metricsPath: {description: "The location where the output metrics file should be written.", category: "required"}
         compressionLevel: {description: "The compression level at which the BAM files are written.", category: "advanced"}
-        createMd5File: {description: "Whether to create a md5 file for the created BAM file.", category: "advanced"}
         useJdkInflater: {description: "True, uses the java inflater. False, uses the optimized intel inflater.", category: "advanced"}
         useJdkDeflater: {description: "True, uses the java deflator to compress the BAM files. False uses the optimized intel deflater.", category: "advanced"}
+        createMd5File: {description: "Whether to create a md5 file for the created BAM file.", category: "advanced"}
         read_name_regex: {description: "Equivalent to the `READ_NAME_REGEX` option of MarkDuplicates.", category: "advanced"}
         javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", category: "advanced"}
         memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"}
@@ -756,16 +771,20 @@ task MergeVCFs {
         Array[File]+ inputVCFsIndexes
         String outputVcfPath
         Int compressionLevel = 1
-        Boolean useJdkInflater = true # Slightly faster than the intel one.
+        Boolean useJdkInflater = false
         # Better results for compression level 1 (much smaller).
         # Higher compression levels similar to intel deflater.
         # NOTE: this might change in the future when the intel deflater is updated!
-        Boolean useJdkDeflater = true
+        # Second NOTE: No it did not change. Only the fastest algorithm with
+        # worse compression is wrapped in the intel GKL. Instead of using
+        # one of the slightly slower but better compressing alternatives from ISA-L. 
+        # (Which are also faster than zlib.)
+        Boolean useJdkDeflater = true  # Achieves much better compression rates than the intel deflater
 
         String javaXmx = "4G"
         String memory = "5G"
         Int timeMinutes = 1 + ceil(size(inputVCFs, "G")) * 2
-        String dockerImage = "quay.io/biocontainers/picard:2.23.8--0"
+        String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0"
     }
 
     # Using MergeVcfs instead of GatherVcfs so we can create indices.
@@ -821,7 +840,7 @@ task SamToFastq {
         String javaXmx = "16G" # High memory default to avoid crashes.
         String memory = "17G"
         Int timeMinutes = 30
-        String dockerImage = "quay.io/biocontainers/picard:2.23.8--0"
+        String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0"
 
         File? noneFile
     }
@@ -882,7 +901,7 @@ task ScatterIntervalList {
 
         String javaXmx = "3G"
         String memory = "4G"
-        String dockerImage = "quay.io/biocontainers/picard:2.23.8--0"
+        String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0"
     }
 
     command {
@@ -917,13 +936,15 @@ task SortSam {
         Boolean createMd5File = false
         Int maxRecordsInRam = 500000
         Int compressionLevel = 1
+        Boolean useJdkInflater = false
+        Boolean useJdkDeflater = true  # Achieves much better compression rates than the intel deflater
 
         # Default ram of 4 GB. Using 125001.0  to prevent an answer of
         # 4.000000001 which gets rounded to 5.
         # GATK Best practices uses 75000 here: https://github.com/gatk-workflows/broad-prod-wgs-germline-snps-indels/blob/d2934ed656ade44801f9cfe1c0e78d4f80684b7b/PairedEndSingleSampleWf-fc-hg38.wdl#L778
         Int XmxGb = ceil(maxRecordsInRam / 125001.0)
         Int timeMinutes = 1 + ceil(size(inputBam, "G") * 3)
-        String dockerImage = "quay.io/biocontainers/picard:2.23.8--0"
+        String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0"
     }
 
     command {
@@ -936,6 +957,8 @@ task SortSam {
         SORT_ORDER=~{true="queryname" false="coordinate" sortByName} \
         CREATE_INDEX=true \
         COMPRESSION_LEVEL=~{compressionLevel} \
+        USE_JDK_INFLATER=~{true="true" false="false" useJdkInflater} \
+        USE_JDK_DEFLATER=~{true="true" false="false" useJdkDeflater} \
         VALIDATION_STRINGENCY=SILENT \
         CREATE_MD5_FILE=~{true="true" false="false" createMd5File}
 
@@ -960,7 +983,9 @@ task SortSam {
         sortByName: {description: "Sort the output file by name, default is position.", category: "advanced"}
         createMd5File: {description: "Whether to create an MD5 digest for any BAM or FASTQ files created.", category: "advanced"}
         maxRecordsInRam: {description: "This will specify the number of records stored in RAM before spilling to disk.", category: "advanced"}
-        compressionLevel: {description: "Compression level for all compressed files created.", category: "advanced"}
+        compressionLevel: {description: "The compression level at which the BAM files are written.", category: "advanced"}
+        useJdkInflater: {description: "True, uses the java inflater. False, uses the optimized intel inflater.", category: "advanced"}
+        useJdkDeflater: {description: "True, uses the java deflator to compress the BAM files. False uses the optimized intel deflater.", category: "advanced"}
         XmxGb: {description: "The maximum memory available to picard SortSam. Should be lower than `memory` to accommodate JVM overhead and BWA mem's memory usage.", category: "advanced"}
         timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
         dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
@@ -981,7 +1006,7 @@ task SortVcf {
         String javaXmx = "8G"
         String memory = "9G"
         Int timeMinutes = 1 + ceil(size(vcfFiles, "G") * 5)
-        String dockerImage = "quay.io/biocontainers/picard:2.23.8--0"
+        String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0"
     }
 
 
@@ -1031,7 +1056,7 @@ task RenameSample {
         String javaXmx = "8G"
         String memory = "9G"
         Int timeMinutes = 1 + ceil(size(inputVcf, "G") * 2)
-        String dockerImage = "quay.io/biocontainers/picard:2.23.8--0"
+        String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0"
     }
 
     command {
@@ -1068,3 +1093,78 @@ task RenameSample {
         renamedVcf: {description: "New VCF with renamed sample."}
     }
 }
+
+task UmiAwareMarkDuplicatesWithMateCigar {
+    input {
+        Array[File] inputBams
+        String outputPath
+        String outputPathMetrics = outputPath + ".metrics"
+        String outputPathUmiMetrics = outputPath + ".umi-metrics"
+        Int maxRecordsInRam = 1500000  # Default is 500_000 but that will lead to very small files on disk.
+        String? assumeSortOrder
+        String tempdir = "temp"
+        Boolean removeDuplicates = true
+        String umiTagName = "RX"
+        Int compressionLevel = 1
+        Boolean useJdkInflater = false
+        Boolean useJdkDeflater = true  # Achieves much better compression rates than the intel deflater
+        String javaXmx = "8G"
+        String memory = "9G"
+        Int timeMinutes = 360
+        String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0"
+    }
+
+    command {
+        set -e
+        mkdir -p "$(dirname ~{outputPath})" ~{tempdir}
+        picard -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \
+        UmiAwareMarkDuplicatesWithMateCigar \
+        INPUT=~{sep=' INPUT=' inputBams} \
+        O=~{outputPath} \
+        M=~{outputPathMetrics} \
+        UMI_TAG_NAME=~{umiTagName} \
+        UMI_METRICS_FILE=~{outputPathUmiMetrics} \
+        TMP_DIR=~{tempdir} \
+        REMOVE_DUPLICATES=~{removeDuplicates} \
+        MAX_RECORDS_IN_RAM=~{maxRecordsInRam} \
+        CREATE_INDEX=true \
+        COMPRESSION_LEVEL=~{compressionLevel} \
+        USE_JDK_INFLATER=~{true="true" false="false" useJdkInflater} \
+        USE_JDK_DEFLATER=~{true="true" false="false" useJdkDeflater} \
+        ~{"ASSUME_SORT_ORDER=" + assumeSortOrder}
+    }
+
+    output {
+        File outputBam = outputPath
+        File outputBamIndex = sub(outputPath, "\.bam$", ".bai")
+        File outputMetrics = outputPathMetrics
+        File outputUmiMetrics = outputPathUmiMetrics
+    }
+
+    runtime {
+        memory: memory
+        time_minutes: timeMinutes
+        docker: dockerImage
+    }
+
+    parameter_meta {
+        # inputs
+        inputBams: {description: "The BAM files for which the duplicate reads should be marked.", category: "required"}
+        outputPath: {description: "The location the output BAM file should be written to.", category: "required"}
+        outputPathMetrics: {description: "The location the output metrics file should be written to.", category: "required"}
+        outputPathUmiMetrics: {description: "The location the output UMI metrics file should be written to.", category: "required"}
+        removeDuplicates: {description: "Whether the duplicate reads should be removed instead of marked.", category: "common"}
+        umiTagName: {description: "Which tag in the BAM file holds the UMI.", category: "common"}
+        assumeSortOrder: {description: "Assume a certain sort order even though the header might say otherwise.", category: "common"}
+        tempdir: {description: "Temporary directory.", category: "advanced"}
+        compressionLevel: {description: "The compression level at which the BAM files are written.", category: "advanced"}
+        maxRecordsInRam: {description: "This will specify the number of records stored in RAM before spilling to disk.", category: "advanced"}
+        useJdkInflater: {description: "True, uses the java inflater. False, uses the optimized intel inflater.", category: "advanced"}
+        useJdkDeflater: {description: "True, uses the java deflator to compress the BAM files. False uses the optimized intel deflater.", category: "advanced"}
+        javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"}
+        memory: {description: "The amount of memory this job will use.", category: "advanced"}
+        timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
+        dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+        
+    }
+}
\ No newline at end of file
diff --git a/star.wdl b/star.wdl
index aa1fd6084096a631758561df393498fa87a485a1..6a123c86dcad02c36a4891884b5674598e006db6 100644
--- a/star.wdl
+++ b/star.wdl
@@ -78,7 +78,7 @@ task GenomeGenerate {
 
     parameter_meta {
         # inputs
-        genomeDir: {description:"The directory the STAR index should be written to.", categroy: "common"}
+        genomeDir: {description:"The directory the STAR index should be written to.", category: "common"}
         referenceFasta: {description: "The reference Fasta file.", category: "required"}
         referenceGtf: {description: "The reference GTF file.", category: "common"}
         sjdbOverhang: {description: "Equivalent to STAR's `--sjdbOverhang` option.", category: "advanced"}
diff --git a/umi.wdl b/umi.wdl
new file mode 100644
index 0000000000000000000000000000000000000000..0dc5c55e786014e7a59af4d1bfc91fdd225ef807
--- /dev/null
+++ b/umi.wdl
@@ -0,0 +1,105 @@
+version 1.0
+
+# Copyright (c) 2022 Leiden University Medical Center
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+task BamReadNameToUmiTag {
+
+    # This task processes a bam file with reads that have been extracted with
+    # umi-tools extract. The UMI is extracted from the read name again and put 
+    # in the bam file again with umiTag (default RX)
+    input {
+        File inputBam
+        String outputPath = "output.bam"
+        String umiTag = "RX"
+
+        String memory = "2G"
+        Int timeMinutes = 1 + ceil(size([inputBam], "G") * 10)
+        String dockerImage = "quay.io/biocontainers/pysam:0.17.0--py39h051187c_0"
+    }
+    
+    String bamIndexPath = sub(select_first([outputPath]), "\.bam$", ".bai")
+    
+    command <<<
+        python <<CODE
+        import pysam 
+        import sys
+        import os
+
+        from typing import Tuple
+
+        def split_umi_from_name(name) -> Tuple[str, str]:
+            id_and_rest = name.split(maxsplit=1)
+            id = id_and_rest[0]
+            # If there was no whitespace id_and_rest will have length 1
+            other_parts = id_and_rest[1] if len(id_and_rest) == 2 else ""
+            underscore_index = id.rfind("_")
+            umi = id[underscore_index + 1:]
+            new_id = id[:underscore_index]
+            if other_parts:
+                return " ".join([new_id, other_parts]), umi
+            return new_id, umi
+
+        def annotate_umis(in_file, out_file, bam_tag="RX"):
+            in_bam = pysam.AlignmentFile(in_file, "rb")
+            os.makedirs(os.path.dirname(out_file), exist_ok=True)
+            out_bam = pysam.AlignmentFile(out_file, "wb", template=in_bam)
+            # Encode bam_tag as bytes. Otherwise pysam converts it to bytes anyway.
+            encoded_bam_tag = bam_tag.encode('ascii')
+            for segment in in_bam:  # type: pysam.AlignedSegment
+                new_name, umi = split_umi_from_name(segment.query_name)
+                segment.query_name = new_name
+                # Encode umi as ascii. Otherwise pysam encodes it to bytes anyway.
+                # Value type has to be a string though, otherwise pysam crashes.
+                segment.set_tag(encoded_bam_tag, umi.encode('ascii'), value_type="Z")
+                out_bam.write(segment)
+
+        if __name__ == "__main__":
+            annotate_umis("~{inputBam}", "~{outputPath}", "~{umiTag}")
+            pysam.index("~{outputPath}", "~{bamIndexPath}", b=True)
+        CODE
+    >>>
+
+    output {
+        File outputBam = outputPath
+        File outputBamIndex = bamIndexPath
+    }
+
+    runtime {
+        memory: memory
+        time_minutes: timeMinutes
+        docker: dockerImage
+    }
+
+    parameter_meta {
+        # inputs
+        inputBam: {description: "The input SAM file.", category: "required"}
+        outputPath: {description: "Output directory path + output file.", category: "common"}
+        umiTag: {description: "The tag used for UMIs in the output BAM file.", category: "common"}
+
+        memory: {description: "The amount of memory available to the job.", category: "advanced"}
+        timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
+        dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+        # outputs
+        outputBam: {description: "Sorted BAM file."}
+        outputBamIndex: {description: "Sorted BAM file index."}
+    }
+}