# Generate Base Quality Score Recalibration (BQSR) model
task BaseRecalibrator {
    String? preCommand
    String gatk_jar
    String input_bam
    String input_bam_index
    String recalibration_report_filename
    Array[File]+ sequence_group_interval
    Array[File]+ known_indels_sites_VCFs
    Array[File]+ known_indels_sites_indices
    File ref_dict
    File ref_fasta
    File ref_fasta_index

    Float? memory
    Float? memoryMultiplier

    Int mem = ceil(select_first([memory, 4.0]))
    command {
        set -e -o pipefail
        ${preCommand}
        java -Xms${mem}G -jar ${gatk_jar} \
          BaseRecalibrator \
          -R ${ref_fasta} \
          -I ${input_bam} \
          --use-original-qualities \
          -O ${recalibration_report_filename} \
          --known-sites ${sep=" --known-sites " known_indels_sites_VCFs} \
          -L ${sep=" -L " sequence_group_interval}
    }

    output {
        File recalibration_report = "${recalibration_report_filename}"
    }

    runtime {
        memory: ceil(mem * select_first([memoryMultiplier, 1.5]))
    }
}

# Apply Base Quality Score Recalibration (BQSR) model
task ApplyBQSR {
    String? preCommand
    String gatk_jar
    String input_bam
    String output_bam_path
    File recalibration_report
    Array[String] sequence_group_interval
    File ref_dict
    File ref_fasta
    File ref_fasta_index
    Int? compression_level

    Float? memory
    Float? memoryMultiplier

    Int mem = ceil(select_first([memory, 4.0]))
    command {
        set -e -o pipefail
        ${preCommand}
        java ${"-Dsamjdk.compression_level=" + compression_level} \
        -Xms${mem}G -jar ${gatk_jar} \
          ApplyBQSR \
          --create-output-bam-md5 \
          --add-output-sam-program-record \
          -R ${ref_fasta} \
          -I ${input_bam} \
          --use-original-qualities \
          -O ${output_bam_path} \
          -bqsr ${recalibration_report} \
          --static-quantized-quals 10 --static-quantized-quals 20 --static-quantized-quals 30 \
          -L ${sep=" -L " sequence_group_interval}
    }

    output {
        File recalibrated_bam = "${output_bam_path}"
        File recalibrated_bam_checksum = "${output_bam_path}.md5"
    }

    runtime {
        memory: ceil(mem * select_first([memoryMultiplier, 1.5]))
    }
}

# Combine multiple recalibration tables from scattered BaseRecalibrator runs
task GatherBqsrReports {
    String? preCommand
    String gatk_jar
    Array[File] input_bqsr_reports
    String output_report_filepath

    Float? memory
    Float? memoryMultiplier

    Int mem = ceil(select_first([memory, 4.0]))
    command {
        set -e -o pipefail
        ${preCommand}
        java -Xms${mem}G -jar ${gatk_jar} \
        GatherBQSRReports \
        -I ${sep=' -I ' input_bqsr_reports} \
        -O ${output_report_filepath}
    }

    output {
        File output_bqsr_report = "${output_report_filepath}"
    }

    runtime {
        memory: ceil(mem * select_first([memoryMultiplier, 1.5]))
    }
}

# Call variants on a single sample with HaplotypeCaller to produce a GVCF
task HaplotypeCallerGvcf {
    String? preCommand
    Array[File]+ input_bams
    Array[File]+ input_bams_index
    Array[File]+ interval_list
    String gvcf_basename
    File ref_dict
    File ref_fasta
    File ref_fasta_index
    Float? contamination
    Int? compression_level
    String gatk_jar

    Float? memory
    Float? memoryMultiplier

    Int mem = ceil(select_first([memory, 4.0]))
    command {
        set -e -o pipefail
        ${preCommand}
        java ${"-Dsamjdk.compression_level=" + compression_level} \
        -Xmx${mem}G -jar ${gatk_jar} \
          HaplotypeCaller \
          -R ${ref_fasta} \
          -O ${gvcf_basename}.vcf.gz \
          -I ${sep=" -I " input_bams} \
          -L ${sep=' -L ' interval_list} \
          -contamination ${default=0 contamination} \
          -ERC GVCF
    }

    output {
        File output_gvcf = "${gvcf_basename}.vcf.gz"
        File output_gvcf_index = "${gvcf_basename}.vcf.gz.tbi"
    }

    runtime {
        memory: ceil(mem * select_first([memoryMultiplier, 1.5]))
    }
}

task GenotypeGVCFs {
    String? preCommand
    File gvcf_files
    File gvcf_file_indexes
    Array[File]+ intervals

    String output_basename

    String gatk_jar

    File ref_fasta
    File ref_fasta_index
    File ref_dict

    File dbsnp_vcf
    File dbsnp_vcf_index

    Int? compression_level
    Float? memory
    Float? memoryMultiplier

    Int mem = ceil(select_first([memory, 4.0]))
    command {
        set -e -o pipefail
        ${preCommand}

        java ${"-Dsamjdk.compression_level=" + compression_level} \
        -Xmx${mem}G -jar ${gatk_jar} \
         GenotypeGVCFs \
         -R ${ref_fasta} \
         -O ${output_basename + ".vcf.gz"} \
         -D ${dbsnp_vcf} \
         -G StandardAnnotation \
         --only-output-calls-starting-in-intervals \
         -new-qual \
         -V ${gvcf_files} \
         -L ${sep=' -L ' intervals}
    }

    output {
        File output_vcf = output_basename + ".vcf.gz"
        File output_vcf_index = output_basename + ".vcf.gz.tbi"
    }

    runtime{
        memory: ceil(mem * select_first([memoryMultiplier, 1.5]))
    }
}

task CombineGVCFs {
    String? preCommand
    Array[File]+ gvcf_files
    Array[File]+ gvcf_file_indexes
    Array[File]+ intervals

    String output_basename

    String gatk_jar

    File ref_fasta
    File ref_fasta_index
    File ref_dict

    Int? compression_level
    Float? memory
    Float? memoryMultiplier

    Int mem = ceil(select_first([memory, 4.0]))
    command {
        set -e -o pipefail
        ${preCommand}

        if [ ${length(gvcf_files)} -gt 1 ]; then
            java ${"-Dsamjdk.compression_level=" + compression_level} \
            -Xmx${mem}G -jar ${gatk_jar} \
             CombineGVCFs \
             -R ${ref_fasta} \
             -O ${output_basename + ".vcf.gz"} \
             -V ${sep=' -V ' gvcf_files} \
             -L ${sep=' -L ' intervals}
        else
            ln -sf ${select_first(gvcf_files)} ${output_basename + ".vcf.gz"}
            ln -sf ${select_first(gvcf_files)}.tbi ${output_basename + ".vcf.gz.tbi"}
        fi
    }

    output {
        File output_gvcf = output_basename + ".vcf.gz"
        File output_gvcf_index = output_basename + ".vcf.gz.tbi"
    }

    runtime {
        memory: ceil(mem * select_first([memoryMultiplier, 1.5]))
    }
}

task SplitNCigarReads {
    String? preCommand

    File input_bam
    File input_bam_index
    File ref_fasta
    File ref_fasta_index
    File ref_dict
    String output_bam
    String gatk_jar
    Array[File]+ intervals

    Float? memory
    Float? memoryMultiplier

    Int mem = ceil(select_first([memory, 4.0]))
    command {
        set -e -o pipefail
        ${preCommand}
        java -Xms${mem}G -jar ${gatk_jar} \
        SplitNCigarReads \
        -I ${input_bam} \
        -R ${ref_fasta} \
        -O ${output_bam} \
        -L ${sep=' -L ' intervals}
    }

    output {
        File bam = output_bam
        File bam_index = sub(output_bam, "\\.bam$", ".bai")
    }

    runtime {
        memory: ceil(mem * select_first([memoryMultiplier, 1.5]))
    }
}