Skip to content
Snippets Groups Projects
picard.wdl 5.41 KiB
Newer Older
Peter van 't Hof's avatar
Peter van 't Hof committed
task ScatterIntervalList {
Peter van 't Hof's avatar
Peter van 't Hof committed
    String? preCommand
Peter van 't Hof's avatar
Peter van 't Hof committed
    File interval_list
    Int scatter_count
    String picard_jar

Cats's avatar
Cats committed
    Float? memory
    Float? memoryMultiplier

Cats's avatar
Cats committed
    Int mem = ceil(select_first([memory, 4.0]))
Peter van 't Hof's avatar
Peter van 't Hof committed
    command {
        set -e -o pipefail
Peter van 't Hof's avatar
Peter van 't Hof committed
        ${preCommand}
Peter van 't Hof's avatar
Peter van 't Hof committed
        mkdir scatter_list
Cats's avatar
Cats committed
        java -Xmx${mem}G -jar ${picard_jar} \
Peter van 't Hof's avatar
Peter van 't Hof committed
          IntervalListTools \
          SCATTER_COUNT=${scatter_count} \
          SUBDIVISION_MODE=BALANCING_WITHOUT_INTERVAL_SUBDIVISION_WITH_OVERFLOW \
          UNIQUE=true \
          SORT=true \
          INPUT=${interval_list} \
          OUTPUT=scatter_list
Peter van 't Hof's avatar
Peter van 't Hof committed
    }
Cats's avatar
Cats committed

Peter van 't Hof's avatar
Peter van 't Hof committed
    output {
        Array[File] out = glob("scatter_list/*/*.interval_list")
        Int interval_count = read_int(stdout())
    }
Cats's avatar
Cats committed

    runtime {
        memory: ceil(mem * select_first([memoryMultiplier, 3.0]))
Cats's avatar
Cats committed
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
}

# Combine multiple recalibrated BAM files from scattered ApplyRecalibration runs
task GatherBamFiles {
Peter van 't Hof's avatar
Peter van 't Hof committed
    String? preCommand
    Array[File]+ input_bams
    String output_bam_path
    Int? compression_level
    String picard_jar
Peter van 't Hof's avatar
Peter van 't Hof committed

Cats's avatar
Cats committed
    Float? memory
    Float? memoryMultiplier

Cats's avatar
Cats committed
    Int mem = ceil(select_first([memory, 4.0]))
Peter van 't Hof's avatar
Peter van 't Hof committed
    command {
        set -e -o pipefail
        ${preCommand}
Cats's avatar
Cats committed
        java ${"-Dsamjdk.compression_level=" + compression_level} \
Cats's avatar
Cats committed
        -Xmx${mem}G -jar ${picard_jar} \
Peter van 't Hof's avatar
Peter van 't Hof committed
          GatherBamFiles \
          INPUT=${sep=' INPUT=' input_bams} \
          OUTPUT=${output_bam_path} \
          CREATE_INDEX=true \
          CREATE_MD5_FILE=true
    }
Cats's avatar
Cats committed

Peter van 't Hof's avatar
Peter van 't Hof committed
    output {
        File output_bam = "${output_bam_path}"
        File output_bam_index = sub(output_bam_path, ".bam$", ".bai")
        File output_bam_md5 = "${output_bam_path}.md5"
Peter van 't Hof's avatar
Peter van 't Hof committed
    }
Cats's avatar
Cats committed

    runtime {
        memory: ceil(mem * select_first([memoryMultiplier, 3.0]))
Cats's avatar
Cats committed
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
}

# Mark duplicate reads to avoid counting non-independent observations
task MarkDuplicates {
Peter van 't Hof's avatar
Peter van 't Hof committed
    String? preCommand
    Array[File] input_bams
    String output_bam_path
    String metrics_path
    Int? compression_level
    String picard_jar
Cats's avatar
Cats committed
    Float? memory
    Float? memoryMultiplier

    # The program default for READ_NAME_REGEX is appropriate in nearly every case.
    # Sometimes we wish to supply "null" in order to turn off optical duplicate detection
    # This can be desirable if you don't mind the estimated library size being wrong and optical duplicate detection is taking >7 days and failing
    String? read_name_regex
    # Task is assuming query-sorted input so that the Secondary and Supplementary reads get marked correctly
    # This works because the output of BWA is query-grouped and therefore, so is the output of MergeBamAlignment.
    # While query-grouped isn't actually query-sorted, it's good enough for MarkDuplicates with ASSUME_SORT_ORDER="queryname"
Cats's avatar
Cats committed
    Int mem = ceil(select_first([memory, 4.0]))
    command {
        set -e -o pipefail
Peter van 't Hof's avatar
Peter van 't Hof committed
        ${preCommand}
        mkdir -p $(dirname ${output_bam_path})
Cats's avatar
Cats committed
        java ${"-Dsamjdk.compression_level=" + compression_level} \
Cats's avatar
Cats committed
        -Xmx${mem}G -jar ${picard_jar} \
          MarkDuplicates \
          INPUT=${sep=' INPUT=' input_bams} \
          OUTPUT=${output_bam_path} \
          METRICS_FILE=${metrics_path} \
          VALIDATION_STRINGENCY=SILENT \
          ${"READ_NAME_REGEX=" + read_name_regex} \
          OPTICAL_DUPLICATE_PIXEL_DISTANCE=2500 \
          CLEAR_DT="false" \
          CREATE_INDEX=true \
          ADD_PG_TAG_TO_READS=false
    }
Cats's avatar
Cats committed

    output {
        File output_bam = output_bam_path
        File output_bam_index = sub(output_bam_path, ".bam$", ".bai")
        File duplicate_metrics = metrics_path
    }
Cats's avatar
Cats committed

    runtime {
        memory: ceil(mem * select_first([memoryMultiplier, 3.0]))
Cats's avatar
Cats committed
    }

# Combine multiple VCFs or GVCFs from scattered HaplotypeCaller runs
task MergeVCFs {
Peter van 't Hof's avatar
Peter van 't Hof committed
    String? preCommand
    Array[File] input_vcfs
    Array[File] input_vcfs_indexes
    String output_vcf_path
    Int? compression_level
    String picard_jar
Cats's avatar
Cats committed
    Float? memory
    Float? memoryMultiplier

Peter van 't Hof's avatar
Peter van 't Hof committed
    # Using MergeVcfs instead of GatherVcfs so we can create indices
    # See https://github.com/broadinstitute/picard/issues/789 for relevant GatherVcfs ticket
Cats's avatar
Cats committed
    Int mem = ceil(select_first([memory, 4.0]))
Peter van 't Hof's avatar
Peter van 't Hof committed
    command {
        set -e -o pipefail
        ${preCommand}
Cats's avatar
Cats committed
        java ${"-Dsamjdk.compression_level=" + compression_level} \
Cats's avatar
Cats committed
        -Xmx${mem}G -jar ${picard_jar} \
Peter van 't Hof's avatar
Peter van 't Hof committed
          MergeVcfs \
          INPUT=${sep=' INPUT=' input_vcfs} \
          OUTPUT=${output_vcf_path}
    }
Cats's avatar
Cats committed

Peter van 't Hof's avatar
Peter van 't Hof committed
    output {
        File output_vcf = output_vcf_path
        File output_vcf_index = output_vcf_path + ".tbi"
    }
Cats's avatar
Cats committed

Ruben Vorderman's avatar
Ruben Vorderman committed
    runtime {
        memory: ceil(mem * select_first([memoryMultiplier, 3.0]))
Ruben Vorderman's avatar
Ruben Vorderman committed
    }
}

task SamToFastq {
    String? preCommand
    File inputBam
    String outputRead1
    String? outputRead2
    String? outputUnpaired
    String picard_jar
    Float? memory
    Float? memoryMultiplier
    Int mem = ceil(select_first([memory, 16.0])) # High memory default to avoid crashes.
Ruben Vorderman's avatar
Ruben Vorderman committed

    command {
        set -e -o pipefail
        ${preCommand}
        java \
        -Xmx${mem}G \
        -jar ${picard_jar} \
Ruben Vorderman's avatar
Ruben Vorderman committed
        SamToFastq \
Ruben Vorderman's avatar
Ruben Vorderman committed
        I=${inputBam} \
        ${"FASTQ=" + outputRead1} \
        ${"SECOND_END_FASTQ=" + outputRead2} \
        ${"UNPAIRED_FASTQ=" + outputUnpaired}
    }

    output {
        File read1 = outputRead1
        File? read2 = outputRead2
        File? unpairedRead = outputUnpaired
    }

Cats's avatar
Cats committed
    runtime {
        memory: ceil(mem * select_first([memoryMultiplier, 3.0]))
Cats's avatar
Cats committed
    }