Skip to content
Snippets Groups Projects
gatk.wdl 6.83 KiB
Newer Older
Peter van 't Hof's avatar
Peter van 't Hof committed
# Generate Base Quality Score Recalibration (BQSR) model
task BaseRecalibrator {
Peter van 't Hof's avatar
Peter van 't Hof committed
    String? preCommand
Peter van 't Hof's avatar
Peter van 't Hof committed
    String gatk_jar
    String input_bam
    String input_bam_index
    String recalibration_report_filename
    Array[File]+ sequence_group_interval
Peter van 't Hof's avatar
Peter van 't Hof committed
    Array[File]+ known_indels_sites_VCFs
    Array[File]+ known_indels_sites_indices
Peter van 't Hof's avatar
Peter van 't Hof committed
    File ref_dict
    File ref_fasta
    File ref_fasta_index

Cats's avatar
Cats committed
    Float? memory
    Float? memoryMultiplier

Cats's avatar
Cats committed
    Int mem = ceil(select_first([memory, 4.0]))
Peter van 't Hof's avatar
Peter van 't Hof committed
    command {
Peter van 't Hof's avatar
Peter van 't Hof committed
        set -e -o pipefail
        ${preCommand}
Cats's avatar
Cats committed
        java -Xms${mem}G -jar ${gatk_jar} \
Peter van 't Hof's avatar
Peter van 't Hof committed
          BaseRecalibrator \
          -R ${ref_fasta} \
          -I ${input_bam} \
          --use-original-qualities \
          -O ${recalibration_report_filename} \
          --known-sites ${sep=" --known-sites " known_indels_sites_VCFs} \
          -L ${sep=" -L " sequence_group_interval}
    }
Cats's avatar
Cats committed

Peter van 't Hof's avatar
Peter van 't Hof committed
    output {
        File recalibration_report = "${recalibration_report_filename}"
    }
Cats's avatar
Cats committed

    runtime {
        memory: ceil(mem * select_first([memoryMultiplier, 3.0]))
Cats's avatar
Cats committed
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
}

# Apply Base Quality Score Recalibration (BQSR) model
task ApplyBQSR {
Peter van 't Hof's avatar
Peter van 't Hof committed
    String? preCommand
Peter van 't Hof's avatar
Peter van 't Hof committed
    String gatk_jar
    String input_bam
    String output_bam_path
    File recalibration_report
    Array[String] sequence_group_interval
    File ref_dict
    File ref_fasta
    File ref_fasta_index
    Int? compression_level

Cats's avatar
Cats committed
    Float? memory
    Float? memoryMultiplier

Cats's avatar
Cats committed
    Int mem = ceil(select_first([memory, 4.0]))
Peter van 't Hof's avatar
Peter van 't Hof committed
    command {
Peter van 't Hof's avatar
Peter van 't Hof committed
        set -e -o pipefail
        ${preCommand}
Cats's avatar
Cats committed
        java ${"-Dsamjdk.compression_level=" + compression_level} \
Cats's avatar
Cats committed
        -Xms${mem}G -jar ${gatk_jar} \
Peter van 't Hof's avatar
Peter van 't Hof committed
          ApplyBQSR \
          --create-output-bam-md5 \
          --add-output-sam-program-record \
          -R ${ref_fasta} \
          -I ${input_bam} \
          --use-original-qualities \
Peter van 't Hof's avatar
Peter van 't Hof committed
          -O ${output_bam_path} \
Peter van 't Hof's avatar
Peter van 't Hof committed
          -bqsr ${recalibration_report} \
          --static-quantized-quals 10 --static-quantized-quals 20 --static-quantized-quals 30 \
          -L ${sep=" -L " sequence_group_interval}
    }
Cats's avatar
Cats committed

Peter van 't Hof's avatar
Peter van 't Hof committed
    output {
        File recalibrated_bam = "${output_bam_path}"
        File recalibrated_bam_checksum = "${output_bam_path}.md5"
    }
Cats's avatar
Cats committed

    runtime {
        memory: ceil(mem * select_first([memoryMultiplier, 3.0]))
Cats's avatar
Cats committed
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
}

# Combine multiple recalibration tables from scattered BaseRecalibrator runs
task GatherBqsrReports {
Peter van 't Hof's avatar
Peter van 't Hof committed
    String? preCommand
Peter van 't Hof's avatar
Peter van 't Hof committed
    String gatk_jar
    Array[File] input_bqsr_reports
    String output_report_filepath

Cats's avatar
Cats committed
    Float? memory
    Float? memoryMultiplier

Cats's avatar
Cats committed
    Int mem = ceil(select_first([memory, 4.0]))
Peter van 't Hof's avatar
Peter van 't Hof committed
    command {
Peter van 't Hof's avatar
Peter van 't Hof committed
        set -e -o pipefail
        ${preCommand}
Cats's avatar
Cats committed
        java -Xms${mem}G -jar ${gatk_jar} \
Peter van 't Hof's avatar
Peter van 't Hof committed
        GatherBQSRReports \
        -I ${sep=' -I ' input_bqsr_reports} \
        -O ${output_report_filepath}
    }
Cats's avatar
Cats committed

Peter van 't Hof's avatar
Peter van 't Hof committed
    output {
        File output_bqsr_report = "${output_report_filepath}"
    }
Cats's avatar
Cats committed

    runtime {
        memory: ceil(mem * select_first([memoryMultiplier, 3.0]))
Cats's avatar
Cats committed
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
}

# Call variants on a single sample with HaplotypeCaller to produce a GVCF
task HaplotypeCallerGvcf {
Peter van 't Hof's avatar
Peter van 't Hof committed
    String? preCommand
Cats's avatar
Cats committed
    Array[File]+ inputBams
    Array[File]+ inputBamsIndex
    Array[File]+ intervalList
    String gvcfPath
    File refDict
    File refFasta
    File refFastaIndex
Peter van 't Hof's avatar
Peter van 't Hof committed
    Float? contamination
Cats's avatar
Cats committed
    Int? compressionLevel
    String gatkJar
Cats's avatar
Cats committed
    Float? memory
    Float? memoryMultiplier

Cats's avatar
Cats committed
    Int mem = ceil(select_first([memory, 4.0]))
Peter van 't Hof's avatar
Peter van 't Hof committed
    command {
        set -e -o pipefail
        ${preCommand}
Cats's avatar
Cats committed
        java ${"-Dsamjdk.compression_level=" + compressionLevel} \
        -Xmx${mem}G -jar ${gatkJar} \
Peter van 't Hof's avatar
Peter van 't Hof committed
          HaplotypeCaller \
Cats's avatar
Cats committed
          -R ${refFasta} \
          -O ${gvcfPath} \
          -I ${sep=" -I " inputBams} \
          -L ${sep=' -L ' intervalList} \
Peter van 't Hof's avatar
Peter van 't Hof committed
          -contamination ${default=0 contamination} \
          -ERC GVCF
    }
Cats's avatar
Cats committed

Peter van 't Hof's avatar
Peter van 't Hof committed
    output {
Cats's avatar
Cats committed
        File outputGVCF = gvcfPath
        File outputGVCFindex = gvcfPath + ".tbi"
Cats's avatar
Cats committed

    runtime {
        memory: ceil(mem * select_first([memoryMultiplier, 3.0]))
Cats's avatar
Cats committed
    }
Peter van 't Hof's avatar
Peter van 't Hof committed

task GenotypeGVCFs {
Peter van 't Hof's avatar
Peter van 't Hof committed
    String? preCommand
Cats's avatar
Cats committed
    File gvcfFiles
    File gvcfFileIndexes
Peter van 't Hof's avatar
Peter van 't Hof committed
    Array[File]+ intervals
Cats's avatar
Cats committed
    String outputPath
Cats's avatar
Cats committed
    String gatkJar
Cats's avatar
Cats committed
    File refFasta
    File refFastaIndex
    File refDict
Cats's avatar
Cats committed
    File dbsnpVCF
    File dbsnpVCFindex
Cats's avatar
Cats committed
    Int? compressionLevel
Cats's avatar
Cats committed
    Float? memory
    Float? memoryMultiplier
Cats's avatar
Cats committed
    Int mem = ceil(select_first([memory, 4.0]))
Peter van 't Hof's avatar
Peter van 't Hof committed
    command {
        set -e -o pipefail
        ${preCommand}
Cats's avatar
Cats committed
        java ${"-Dsamjdk.compression_level=" + compressionLevel} \
        -Xmx${mem}G -jar ${gatkJar} \
Peter van 't Hof's avatar
Peter van 't Hof committed
         GenotypeGVCFs \
Cats's avatar
Cats committed
         -R ${refFasta} \
         -O ${outputPath} \
         -D ${dbsnpVCF} \
Peter van 't Hof's avatar
Peter van 't Hof committed
         -G StandardAnnotation \
         --only-output-calls-starting-in-intervals \
         -new-qual \
Cats's avatar
Cats committed
         -V ${gvcfFiles} \
Peter van 't Hof's avatar
Peter van 't Hof committed
         -L ${sep=' -L ' intervals}
Peter van 't Hof's avatar
Peter van 't Hof committed
    }

    output {
Cats's avatar
Cats committed
        File outputVCF = outputPath
        File outputVCFindex = outputPath + ".tbi"
Cats's avatar
Cats committed

    runtime{
        memory: ceil(mem * select_first([memoryMultiplier, 3.0]))
Cats's avatar
Cats committed
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
task CombineGVCFs {
    String? preCommand
Cats's avatar
Cats committed
    Array[File]+ gvcfFiles
    Array[File]+ gvcfFileIndexes
Peter van 't Hof's avatar
Peter van 't Hof committed
    Array[File]+ intervals

Cats's avatar
Cats committed
    String outputPath
Cats's avatar
Cats committed
    String gatkJar
Cats's avatar
Cats committed
    File refFasta
    File refFastaIndex
    File refDict
Cats's avatar
Cats committed
    Int? compressionLevel
Cats's avatar
Cats committed
    Float? memory
    Float? memoryMultiplier
Cats's avatar
Cats committed
    Int mem = ceil(select_first([memory, 4.0]))
Peter van 't Hof's avatar
Peter van 't Hof committed
    command {
        set -e -o pipefail
        ${preCommand}

Cats's avatar
Cats committed
        if [ ${length(gvcfFiles)} -gt 1 ]; then
            java ${"-Dsamjdk.compression_level=" + compressionLevel} \
            -Xmx${mem}G -jar ${gatkJar} \
Peter van 't Hof's avatar
Peter van 't Hof committed
             CombineGVCFs \
Cats's avatar
Cats committed
             -R ${refFasta} \
             -O ${outputPath} \
             -V ${sep=' -V ' gvcfFiles} \
Peter van 't Hof's avatar
Peter van 't Hof committed
             -L ${sep=' -L ' intervals}
        else
Cats's avatar
Cats committed
            ln -sf ${select_first(gvcfFiles)} ${outputPath}
            ln -sf ${select_first(gvcfFileIndexes)} ${outputPath}.tbi
Cats's avatar
Cats committed
        File outputGVCF = outputPath
        File outputGVCFindex = outputPath + ".tbi"
Cats's avatar
Cats committed

    runtime {
        memory: ceil(mem * select_first([memoryMultiplier, 3.0]))
Cats's avatar
Cats committed
    }
Cats's avatar
Cats committed

task SplitNCigarReads {
    String? preCommand

Cats's avatar
Cats committed
    File inputBam
    File inputBamIndex
    File refFasta
    File refFastaIndex
    File refDict
    String outputBam
    String gatkJar
    Array[File]+ intervals

Cats's avatar
Cats committed
    Float? memory
    Float? memoryMultiplier
Cats's avatar
Cats committed

Cats's avatar
Cats committed
    Int mem = ceil(select_first([memory, 4.0]))
Cats's avatar
Cats committed
    command {
        set -e -o pipefail
        ${preCommand}
Cats's avatar
Cats committed
        java -Xms${mem}G -jar ${gatkJar} \
Cats's avatar
Cats committed
        SplitNCigarReads \
Cats's avatar
Cats committed
        -I ${inputBam} \
        -R ${refFasta} \
        -O ${outputBam} \
        -L ${sep=' -L ' intervals}
Cats's avatar
Cats committed
    }

    output {
Cats's avatar
Cats committed
        File bam = outputBam
        File bam_index = sub(outputBam, "\\.bam$", ".bai")
Cats's avatar
Cats committed
    }
Cats's avatar
Cats committed

    runtime {
        memory: ceil(mem * select_first([memoryMultiplier, 3.0]))
Cats's avatar
Cats committed
    }
Cats's avatar
Cats committed
}