Skip to content
Snippets Groups Projects
gatk.wdl 7.07 KiB
Newer Older
Peter van 't Hof's avatar
Peter van 't Hof committed
# Generate Base Quality Score Recalibration (BQSR) model
task BaseRecalibrator {
Peter van 't Hof's avatar
Peter van 't Hof committed
    String? preCommand
Peter van 't Hof's avatar
Peter van 't Hof committed
    String gatk_jar
    String input_bam
    String input_bam_index
    String recalibration_report_filename
    Array[File]+ sequence_group_interval
Peter van 't Hof's avatar
Peter van 't Hof committed
    Array[File]+ known_indels_sites_VCFs
    Array[File]+ known_indels_sites_indices
Peter van 't Hof's avatar
Peter van 't Hof committed
    File ref_dict
    File ref_fasta
    File ref_fasta_index

Cats's avatar
Cats committed
    Float? memory
    Float? memoryMultiplier

Cats's avatar
Cats committed
    Int mem = ceil(select_first([memory, 4.0]))
Peter van 't Hof's avatar
Peter van 't Hof committed
    command {
Peter van 't Hof's avatar
Peter van 't Hof committed
        set -e -o pipefail
        ${preCommand}
Cats's avatar
Cats committed
        java -Xms${mem}G -jar ${gatk_jar} \
Peter van 't Hof's avatar
Peter van 't Hof committed
          BaseRecalibrator \
          -R ${ref_fasta} \
          -I ${input_bam} \
          --use-original-qualities \
          -O ${recalibration_report_filename} \
          --known-sites ${sep=" --known-sites " known_indels_sites_VCFs} \
          -L ${sep=" -L " sequence_group_interval}
    }
Cats's avatar
Cats committed

Peter van 't Hof's avatar
Peter van 't Hof committed
    output {
        File recalibration_report = "${recalibration_report_filename}"
    }
Cats's avatar
Cats committed

    runtime {
Cats's avatar
Cats committed
        memory: ceil(mem * select_first([memoryMultiplier, 1.5]))
Cats's avatar
Cats committed
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
}

# Apply Base Quality Score Recalibration (BQSR) model
task ApplyBQSR {
Peter van 't Hof's avatar
Peter van 't Hof committed
    String? preCommand
Peter van 't Hof's avatar
Peter van 't Hof committed
    String gatk_jar
    String input_bam
    String output_bam_path
    File recalibration_report
    Array[String] sequence_group_interval
    File ref_dict
    File ref_fasta
    File ref_fasta_index
    Int? compression_level

Cats's avatar
Cats committed
    Float? memory
    Float? memoryMultiplier

Cats's avatar
Cats committed
    Int mem = ceil(select_first([memory, 4.0]))
Peter van 't Hof's avatar
Peter van 't Hof committed
    command {
Peter van 't Hof's avatar
Peter van 't Hof committed
        set -e -o pipefail
        ${preCommand}
Cats's avatar
Cats committed
        java ${"-Dsamjdk.compression_level=" + compression_level} \
Cats's avatar
Cats committed
        -Xms${mem}G -jar ${gatk_jar} \
Peter van 't Hof's avatar
Peter van 't Hof committed
          ApplyBQSR \
          --create-output-bam-md5 \
          --add-output-sam-program-record \
          -R ${ref_fasta} \
          -I ${input_bam} \
          --use-original-qualities \
Peter van 't Hof's avatar
Peter van 't Hof committed
          -O ${output_bam_path} \
Peter van 't Hof's avatar
Peter van 't Hof committed
          -bqsr ${recalibration_report} \
          --static-quantized-quals 10 --static-quantized-quals 20 --static-quantized-quals 30 \
          -L ${sep=" -L " sequence_group_interval}
    }
Cats's avatar
Cats committed

Peter van 't Hof's avatar
Peter van 't Hof committed
    output {
        File recalibrated_bam = "${output_bam_path}"
        File recalibrated_bam_checksum = "${output_bam_path}.md5"
    }
Cats's avatar
Cats committed

    runtime {
Cats's avatar
Cats committed
        memory: ceil(mem * select_first([memoryMultiplier, 1.5]))
Cats's avatar
Cats committed
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
}

# Combine multiple recalibration tables from scattered BaseRecalibrator runs
task GatherBqsrReports {
Peter van 't Hof's avatar
Peter van 't Hof committed
    String? preCommand
Peter van 't Hof's avatar
Peter van 't Hof committed
    String gatk_jar
    Array[File] input_bqsr_reports
    String output_report_filepath

Cats's avatar
Cats committed
    Float? memory
    Float? memoryMultiplier

Cats's avatar
Cats committed
    Int mem = ceil(select_first([memory, 4.0]))
Peter van 't Hof's avatar
Peter van 't Hof committed
    command {
Peter van 't Hof's avatar
Peter van 't Hof committed
        set -e -o pipefail
        ${preCommand}
Cats's avatar
Cats committed
        java -Xms${mem}G -jar ${gatk_jar} \
Peter van 't Hof's avatar
Peter van 't Hof committed
        GatherBQSRReports \
        -I ${sep=' -I ' input_bqsr_reports} \
        -O ${output_report_filepath}
    }
Cats's avatar
Cats committed

Peter van 't Hof's avatar
Peter van 't Hof committed
    output {
        File output_bqsr_report = "${output_report_filepath}"
    }
Cats's avatar
Cats committed

    runtime {
Cats's avatar
Cats committed
        memory: ceil(mem * select_first([memoryMultiplier, 1.5]))
Cats's avatar
Cats committed
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
}

# Call variants on a single sample with HaplotypeCaller to produce a GVCF
task HaplotypeCallerGvcf {
Peter van 't Hof's avatar
Peter van 't Hof committed
    String? preCommand
    Array[File]+ input_bams
    Array[File]+ input_bams_index
    Array[File]+ interval_list
    String gvcf_basename
    File ref_dict
    File ref_fasta
    File ref_fasta_index
    Float? contamination
    Int? compression_level
    String gatk_jar

Cats's avatar
Cats committed
    Float? memory
    Float? memoryMultiplier

Cats's avatar
Cats committed
    Int mem = ceil(select_first([memory, 4.0]))
Peter van 't Hof's avatar
Peter van 't Hof committed
    command {
        set -e -o pipefail
        ${preCommand}
Cats's avatar
Cats committed
        java ${"-Dsamjdk.compression_level=" + compression_level} \
Cats's avatar
Cats committed
        -Xmx${mem}G -jar ${gatk_jar} \
Peter van 't Hof's avatar
Peter van 't Hof committed
          HaplotypeCaller \
          -R ${ref_fasta} \
          -O ${gvcf_basename}.vcf.gz \
          -I ${sep=" -I " input_bams} \
          -L ${sep=' -L ' interval_list} \
          -contamination ${default=0 contamination} \
          -ERC GVCF
    }
Cats's avatar
Cats committed

Peter van 't Hof's avatar
Peter van 't Hof committed
    output {
        File output_gvcf = "${gvcf_basename}.vcf.gz"
        File output_gvcf_index = "${gvcf_basename}.vcf.gz.tbi"
    }
Cats's avatar
Cats committed

    runtime {
Cats's avatar
Cats committed
        memory: ceil(mem * select_first([memoryMultiplier, 1.5]))
Cats's avatar
Cats committed
    }
Peter van 't Hof's avatar
Peter van 't Hof committed

task GenotypeGVCFs {
Peter van 't Hof's avatar
Peter van 't Hof committed
    String? preCommand
    File gvcf_files
    File gvcf_file_indexes
    Array[File]+ intervals
Peter van 't Hof's avatar
Peter van 't Hof committed
    String output_basename
Peter van 't Hof's avatar
Peter van 't Hof committed
    String gatk_jar
Peter van 't Hof's avatar
Peter van 't Hof committed
    File ref_fasta
    File ref_fasta_index
    File ref_dict
Peter van 't Hof's avatar
Peter van 't Hof committed
    File dbsnp_vcf
    File dbsnp_vcf_index
Peter van 't Hof's avatar
Peter van 't Hof committed
    Int? compression_level
Cats's avatar
Cats committed
    Float? memory
    Float? memoryMultiplier
Cats's avatar
Cats committed
    Int mem = ceil(select_first([memory, 4.0]))
Peter van 't Hof's avatar
Peter van 't Hof committed
    command {
        set -e -o pipefail
        ${preCommand}
Cats's avatar
Cats committed
        java ${"-Dsamjdk.compression_level=" + compression_level} \
Cats's avatar
Cats committed
        -Xmx${mem}G -jar ${gatk_jar} \
Peter van 't Hof's avatar
Peter van 't Hof committed
         GenotypeGVCFs \
Peter van 't Hof's avatar
Peter van 't Hof committed
         -R ${ref_fasta} \
         -O ${output_basename + ".vcf.gz"} \
Peter van 't Hof's avatar
Peter van 't Hof committed
         -D ${dbsnp_vcf} \
         -G StandardAnnotation \
         --only-output-calls-starting-in-intervals \
         -new-qual \
         -V ${gvcf_files} \
Peter van 't Hof's avatar
Peter van 't Hof committed
         -L ${sep=' -L ' intervals}
Peter van 't Hof's avatar
Peter van 't Hof committed
    }

    output {
        File output_vcf = output_basename + ".vcf.gz"
        File output_vcf_index = output_basename + ".vcf.gz.tbi"
    }
Cats's avatar
Cats committed

    runtime{
Cats's avatar
Cats committed
        memory: ceil(mem * select_first([memoryMultiplier, 1.5]))
Cats's avatar
Cats committed
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
task CombineGVCFs {
    String? preCommand
    Array[File]+ gvcf_files
    Array[File]+ gvcf_file_indexes
    Array[File]+ intervals

    String output_basename
Peter van 't Hof's avatar
Peter van 't Hof committed
    String gatk_jar
Peter van 't Hof's avatar
Peter van 't Hof committed
    File ref_fasta
    File ref_fasta_index
    File ref_dict

    Int? compression_level
Cats's avatar
Cats committed
    Float? memory
    Float? memoryMultiplier
Cats's avatar
Cats committed
    Int mem = ceil(select_first([memory, 4.0]))
Peter van 't Hof's avatar
Peter van 't Hof committed
    command {
        set -e -o pipefail
        ${preCommand}

        if [ ${length(gvcf_files)} -gt 1 ]; then
Cats's avatar
Cats committed
            java ${"-Dsamjdk.compression_level=" + compression_level} \
Cats's avatar
Cats committed
            -Xmx${mem}G -jar ${gatk_jar} \
Peter van 't Hof's avatar
Peter van 't Hof committed
             CombineGVCFs \
             -R ${ref_fasta} \
             -O ${output_basename + ".vcf.gz"} \
             -V ${sep=' -V ' gvcf_files} \
             -L ${sep=' -L ' intervals}
        else
            ln -sf ${select_first(gvcf_files)} ${output_basename + ".vcf.gz"}
            ln -sf ${select_first(gvcf_files)}.tbi ${output_basename + ".vcf.gz.tbi"}
        fi
    }

    output {
        File output_gvcf = output_basename + ".vcf.gz"
        File output_gvcf_index = output_basename + ".vcf.gz.tbi"
    }
Cats's avatar
Cats committed

    runtime {
Cats's avatar
Cats committed
        memory: ceil(mem * select_first([memoryMultiplier, 1.5]))
Cats's avatar
Cats committed
    }
Cats's avatar
Cats committed

task SplitNCigarReads {
    String? preCommand

    File input_bam
Cats's avatar
Cats committed
    File input_bam_index
Cats's avatar
Cats committed
    File ref_fasta
Cats's avatar
Cats committed
    File ref_fasta_index
    File ref_dict
Cats's avatar
Cats committed
    String output_bam
    String gatk_jar
    Array[File]+ intervals

Cats's avatar
Cats committed
    Float? memory
    Float? memoryMultiplier
Cats's avatar
Cats committed

Cats's avatar
Cats committed
    Int mem = ceil(select_first([memory, 4.0]))
Cats's avatar
Cats committed
    command {
        set -e -o pipefail
        ${preCommand}
Cats's avatar
Cats committed
        java -Xms${mem}G -jar ${gatk_jar} \
Cats's avatar
Cats committed
        SplitNCigarReads \
Cats's avatar
Cats committed
        -I ${input_bam} \
        -R ${ref_fasta} \
Cats's avatar
Cats committed
        -O ${output_bam} \
        -L ${sep=' -L ' intervals}
Cats's avatar
Cats committed
    }

    output {
        File bam = output_bam
Cats's avatar
Cats committed
        File bam_index = sub(output_bam, "\\.bam$", ".bai")
Cats's avatar
Cats committed
    }
Cats's avatar
Cats committed

    runtime {
Cats's avatar
Cats committed
        memory: ceil(mem * select_first([memoryMultiplier, 1.5]))
Cats's avatar
Cats committed
    }
Cats's avatar
Cats committed
}