Skip to content
Snippets Groups Projects
picard.wdl 12 KiB
Newer Older
Ruben Vorderman's avatar
Ruben Vorderman committed
version 1.0
Cats's avatar
Cats committed

Ruben Vorderman's avatar
Ruben Vorderman committed
task CollectMultipleMetrics {
    input {
        String? preCommand
        File bamFile
        File bamIndex
        File refFasta
        File refDict
        File refFastaIndex
        String basename

        Boolean collectAlignmentSummaryMetrics = true
        Boolean collectInsertSizeMetrics = true
        Boolean qualityScoreDistribution = true
        Boolean meanQualityByCycle = true
        Boolean collectBaseDistributionByCycle = true
        Boolean collectGcBiasMetrics = true
        #Boolean rnaSeqMetrics = false # There is a bug in picard https://github.com/broadinstitute/picard/issues/999
Ruben Vorderman's avatar
Ruben Vorderman committed
        Boolean collectSequencingArtifactMetrics = true
        Boolean collectQualityYieldMetrics = true

        String? picardJar

        Int memory = 4
        Float memoryMultiplier = 3.0
Ruben Vorderman's avatar
Ruben Vorderman committed
    }
    String toolCommand = if defined(picardJar)
        then "java -Xmx" + memory + "G -jar " + picardJar
        else "picard -Xmx" + memory + "G"
Peter van 't Hof's avatar
Peter van 't Hof committed
    command {
        set -e -o pipefail
Ruben Vorderman's avatar
Ruben Vorderman committed
        mkdir -p $(dirname "~{basename}")
        ~{preCommand}
        ~{toolCommand} \
Cats's avatar
Cats committed
        CollectMultipleMetrics \
Ruben Vorderman's avatar
Ruben Vorderman committed
        I=~{bamFile} \
        R=~{refFasta} \
        O=~{basename} \
Cats's avatar
Cats committed
        PROGRAM=null \
Ruben Vorderman's avatar
Ruben Vorderman committed
        ~{true="PROGRAM=CollectAlignmentSummaryMetrics" false="" collectAlignmentSummaryMetrics} \
        ~{true="PROGRAM=CollectInsertSizeMetrics" false="" collectInsertSizeMetrics} \
        ~{true="PROGRAM=QualityScoreDistribution" false="" qualityScoreDistribution} \
        ~{true="PROGRAM=MeanQualityByCycle" false="" meanQualityByCycle} \
        ~{true="PROGRAM=CollectBaseDistributionByCycle" false="" collectBaseDistributionByCycle} \
        ~{true="PROGRAM=CollectGcBiasMetrics" false="" collectGcBiasMetrics} \
        ~{true="PROGRAM=CollectSequencingArtifactMetrics" false=""
Cats's avatar
Cats committed
            collectSequencingArtifactMetrics} \
Ruben Vorderman's avatar
Ruben Vorderman committed
        ~{true="PROGRAM=CollectQualityYieldMetrics" false="" collectQualityYieldMetrics}
Peter van 't Hof's avatar
Peter van 't Hof committed
    }
Cats's avatar
Cats committed

Peter van 't Hof's avatar
Peter van 't Hof committed
    output {
Cats's avatar
Cats committed
        File alignmentSummary = basename + ".alignment_summary_metrics"
Cats's avatar
Cats committed
        File baitBiasDetail = basename + ".bait_bias_detail_metrics"
        File baitBiasSummary = basename + ".bait_bias_summary_metrics"
        File baseDistributionByCycle = basename + ".base_distribution_by_cycle_metrics"
        File baseDistributionByCyclePdf = basename + ".base_distribution_by_cycle.pdf"
        File errorSummary = basename + ".error_summary_metrics"
        File gcBiasDetail = basename + ".gc_bias.detail_metrics"
        File gcBiasPdf = basename + ".gc_bias.pdf"
        File gcBiasSummary = basename + ".gc_bias.summary_metrics"
        File insertSizeHistogramPdf = basename + ".insert_size_histogram.pdf"
        File insertSize = basename + ".insert_size_metrics"
        File preAdapterDetail = basename + ".pre_adapter_detail_metrics"
        File preAdapterSummary = basename + ".pre_adapter_summary_metrics"
        File qualityByCycle = basename + ".quality_by_cycle_metrics"
        File qualityByCyclePdf = basename + ".quality_by_cycle.pdf"
        File qualityDistribution = basename + ".quality_distribution_metrics"
        File qualityDistributionPdf = basename + ".quality_distribution.pdf"
        File qualityYield = basename + ".quality_yield_metrics"
    }

    runtime {
        memory: ceil(memory * memoryMultiplier)
Cats's avatar
Cats committed
    }
}

task CollectRnaSeqMetrics {
Ruben Vorderman's avatar
Ruben Vorderman committed
    input {
        String? preCommand
        File bamFile
        File bamIndex
        File refRefflat
        String basename
        String strandSpecificity = "NONE"
Ruben Vorderman's avatar
Ruben Vorderman committed

        String? picardJar

        Int memory = 4
        Float memoryMultiplier = 3.0
Ruben Vorderman's avatar
Ruben Vorderman committed
    }
Cats's avatar
Cats committed
    String toolCommand = if defined(picardJar)
        then "java -Xmx" + memory + "G -jar " + picardJar
        else "picard -Xmx" + memory + "G"
Cats's avatar
Cats committed

    command {
        set -e -o pipefail
Ruben Vorderman's avatar
Ruben Vorderman committed
        mkdir -p $(dirname "~{basename}")
        ~{preCommand}
        ~{toolCommand} \
Cats's avatar
Cats committed
        CollectRnaSeqMetrics \
Ruben Vorderman's avatar
Ruben Vorderman committed
        I=~{bamFile} \
        O=~{basename}.RNA_Metrics \
        CHART_OUTPUT=~{basename}.RNA_Metrics.pdf \
        STRAND_SPECIFICITY=~{strandSpecificity} \
Ruben Vorderman's avatar
Ruben Vorderman committed
        REF_FLAT=~{refRefflat}
Cats's avatar
Cats committed
    }

    output {
        File chart = basename + ".RNA_Metrics.pdf"
Cats's avatar
Cats committed
        File metrics = basename + ".RNA_Metrics"
Cats's avatar
Cats committed
    }

    runtime {
        memory: ceil(memory * memoryMultiplier)
Cats's avatar
Cats committed
    }
}

task CollectTargetedPcrMetrics {
Ruben Vorderman's avatar
Ruben Vorderman committed
    input {
        String? preCommand
        File bamFile
        File bamIndex
        File refFasta
        File refDict
        File refFastaIndex
        File ampliconIntervals
        Array[File]+ targetIntervals
        String basename

        String? picardJar

        Int memory = 4
        Float memoryMultiplier = 3.0
Ruben Vorderman's avatar
Ruben Vorderman committed
    }
Cats's avatar
Cats committed

    String toolCommand = if defined(picardJar)
        then "java -Xmx" + memory + "G -jar " + picardJar
        else "picard -Xmx" + memory + "G"
Cats's avatar
Cats committed

    command {
        set -e -o pipefail
Ruben Vorderman's avatar
Ruben Vorderman committed
        mkdir -p $(dirname "~{basename}")
        ~{preCommand}
        ~{toolCommand} \
Cats's avatar
Cats committed
        CollectTargetedPcrMetrics \
Ruben Vorderman's avatar
Ruben Vorderman committed
        I=~{bamFile} \
        R=~{refFasta} \
        AMPLICON_INTERVALS=~{ampliconIntervals} \
        TARGET_INTERVALS=~{sep=" TARGET_INTERVALS=" targetIntervals} \
        O=~{basename}.targetPcrMetrics \
        PER_BASE_COVERAGE=~{basename}.targetPcrPerBaseCoverage \
        PER_TARGET_COVERAGE=~{basename}.targetPcrPerTargetCoverage
Cats's avatar
Cats committed
    }

    output {
        File perTargetCoverage = basename + ".targetPcrPerTargetCoverage"
        File perBaseCoverage = basename + ".targetPcrPerBaseCoverage"
Cats's avatar
Cats committed
        File metrics = basename + ".targetPcrMetrics"
Peter van 't Hof's avatar
Peter van 't Hof committed
    }
Cats's avatar
Cats committed

    runtime {
        memory: ceil(memory * memoryMultiplier)
Cats's avatar
Cats committed
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
}

# Combine multiple recalibrated BAM files from scattered ApplyRecalibration runs
task GatherBamFiles {
Ruben Vorderman's avatar
Ruben Vorderman committed
    input {
        String? preCommand
        Array[File]+ input_bams
        String output_bam_path
        Int? compression_level
        String? picardJar

        Int memory = 4
        Float memoryMultiplier = 3.0
Ruben Vorderman's avatar
Ruben Vorderman committed
    }
Cats's avatar
Cats committed

    String toolCommand = if defined(picardJar)
        then "java -Xmx" + memory + "G -jar " + picardJar
        else "picard -Xmx" + memory + "G"
Peter van 't Hof's avatar
Peter van 't Hof committed
    command {
        set -e -o pipefail
Ruben Vorderman's avatar
Ruben Vorderman committed
        ~{preCommand}
        ~{toolCommand} \
        GatherBamFiles \
        INPUT=~{sep=' INPUT=' input_bams} \
        OUTPUT=~{output_bam_path} \
        CREATE_INDEX=true \
        CREATE_MD5_FILE=true
Cats's avatar
Cats committed

Peter van 't Hof's avatar
Peter van 't Hof committed
    output {
Ruben Vorderman's avatar
Ruben Vorderman committed
        File output_bam = "~{output_bam_path}"
Peter van 't Hof's avatar
Peter van 't Hof committed
        File output_bam_index = sub(output_bam_path, ".bam$", ".bai")
Ruben Vorderman's avatar
Ruben Vorderman committed
        File output_bam_md5 = "~{output_bam_path}.md5"
Peter van 't Hof's avatar
Peter van 't Hof committed
    }
Cats's avatar
Cats committed

    runtime {
        memory: ceil(memory * memoryMultiplier)
Cats's avatar
Cats committed
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
}

# Mark duplicate reads to avoid counting non-independent observations
task MarkDuplicates {
Ruben Vorderman's avatar
Ruben Vorderman committed
    input {
        String? preCommand
        Array[File] input_bams
        String output_bam_path
        String metrics_path
        Int? compression_level
        String? picardJar

        Int memory = 4
        Float memoryMultiplier = 3.0
Ruben Vorderman's avatar
Ruben Vorderman committed

        # The program default for READ_NAME_REGEX is appropriate in nearly every case.
        # Sometimes we wish to supply "null" in order to turn off optical duplicate detection
        # This can be desirable if you don't mind the estimated library size being wrong and optical duplicate detection is taking >7 days and failing
        String? read_name_regex
    }
    # Task is assuming query-sorted input so that the Secondary and Supplementary reads get marked correctly
    # This works because the output of BWA is query-grouped and therefore, so is the output of MergeBamAlignment.
    # While query-grouped isn't actually query-sorted, it's good enough for MarkDuplicates with ASSUME_SORT_ORDER="queryname"

    String toolCommand = if defined(picardJar)
        then "java -Xmx" + memory + "G -jar " + picardJar
        else "picard -Xmx" + memory + "G"
    command {
        set -e -o pipefail
Ruben Vorderman's avatar
Ruben Vorderman committed
        ~{preCommand}
        mkdir -p $(dirname ~{output_bam_path})
        ~{toolCommand} \
        MarkDuplicates \
        INPUT=~{sep=' INPUT=' input_bams} \
        OUTPUT=~{output_bam_path} \
        METRICS_FILE=~{metrics_path} \
        VALIDATION_STRINGENCY=SILENT \
        ~{"READ_NAME_REGEX=" + read_name_regex} \
        OPTICAL_DUPLICATE_PIXEL_DISTANCE=2500 \
        CLEAR_DT="false" \
        CREATE_INDEX=true \
        ADD_PG_TAG_TO_READS=false
Cats's avatar
Cats committed

    output {
        File output_bam = output_bam_path
        File output_bam_index = sub(output_bam_path, ".bam$", ".bai")
        File duplicate_metrics = metrics_path
    }
Cats's avatar
Cats committed

    runtime {
        memory: ceil(memory * memoryMultiplier)
Cats's avatar
Cats committed
    }

# Combine multiple VCFs or GVCFs from scattered HaplotypeCaller runs
task MergeVCFs {
Ruben Vorderman's avatar
Ruben Vorderman committed
    input {
        String? preCommand
        Array[File] inputVCFs
        Array[File] inputVCFsIndexes
        String outputVCFpath
        Int? compressionLevel
        String? picardJar

        Int memory = 4
        Float memoryMultiplier = 3.0
Ruben Vorderman's avatar
Ruben Vorderman committed
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
    # Using MergeVcfs instead of GatherVcfs so we can create indices
    # See https://github.com/broadinstitute/picard/issues/789 for relevant GatherVcfs ticket

    String toolCommand = if defined(picardJar)
        then "java -Xmx" + memory + "G -jar " + picardJar
        else "picard -Xmx" + memory + "G"
Peter van 't Hof's avatar
Peter van 't Hof committed
    command {
        set -e -o pipefail
Ruben Vorderman's avatar
Ruben Vorderman committed
        ~{preCommand}
        ~{toolCommand} \
        MergeVcfs \
        INPUT=~{sep=' INPUT=' inputVCFs} \
        OUTPUT=~{outputVCFpath}
Cats's avatar
Cats committed

Peter van 't Hof's avatar
Peter van 't Hof committed
    output {
Cats's avatar
Cats committed
        File outputVCF = outputVCFpath
        File outputVCFindex = outputVCFpath + ".tbi"
Cats's avatar
Cats committed

Ruben Vorderman's avatar
Ruben Vorderman committed
    runtime {
        memory: ceil(memory * memoryMultiplier)
Ruben Vorderman's avatar
Ruben Vorderman committed
    }
}

task SamToFastq {
Ruben Vorderman's avatar
Ruben Vorderman committed
    input {
        String? preCommand
        File inputBam
        String outputRead1
        String? outputRead2
        String? outputUnpaired
Ruben Vorderman's avatar
Ruben Vorderman committed
        String? picardJar
        Int memory = 16 # High memory default to avoid crashes.
        Float memoryMultiplier = 3.0
Ruben Vorderman's avatar
Ruben Vorderman committed
    }
    String toolCommand = if defined(picardJar)
    then "java -Xmx" + memory + "G -jar " + picardJar
    else "picard -Xmx" + memory + "G"
Ruben Vorderman's avatar
Ruben Vorderman committed
    command {
        set -e -o pipefail
Ruben Vorderman's avatar
Ruben Vorderman committed
        ~{preCommand}
        ~{toolCommand} \
Ruben Vorderman's avatar
Ruben Vorderman committed
        SamToFastq \
Ruben Vorderman's avatar
Ruben Vorderman committed
        I=~{inputBam} \
        ~{"FASTQ=" + outputRead1} \
        ~{"SECOND_END_FASTQ=" + outputRead2} \
        ~{"UNPAIRED_FASTQ=" + outputUnpaired}
Ruben Vorderman's avatar
Ruben Vorderman committed
    }

    output {
        File read1 = outputRead1
        File? read2 = outputRead2
        File? unpairedRead = outputUnpaired
    }

Cats's avatar
Cats committed
    runtime {
        memory: ceil(memory * memoryMultiplier)
Cats's avatar
Cats committed
    }
}

task ScatterIntervalList {
Ruben Vorderman's avatar
Ruben Vorderman committed
    input {
        String? preCommand
        File interval_list
        Int scatter_count
        String? picardJar

        Int memory = 4
        Float memoryMultiplier = 3.0
Ruben Vorderman's avatar
Ruben Vorderman committed
    }
Cats's avatar
Cats committed
    String toolCommand = if defined(picardJar)
        then "java -Xmx" + memory + "G -jar " + picardJar
        else "picard -Xmx" + memory + "G"
Cats's avatar
Cats committed

    command {
        set -e -o pipefail
Ruben Vorderman's avatar
Ruben Vorderman committed
        ~{preCommand}
Cats's avatar
Cats committed
        mkdir scatter_list
Ruben Vorderman's avatar
Ruben Vorderman committed
        ~{toolCommand} \
        IntervalListTools \
        SCATTER_COUNT=~{scatter_count} \
        SUBDIVISION_MODE=BALANCING_WITHOUT_INTERVAL_SUBDIVISION_WITH_OVERFLOW \
        UNIQUE=true \
        SORT=true \
        INPUT=~{interval_list} \
        OUTPUT=scatter_list
Cats's avatar
Cats committed
    }

    output {
        Array[File] out = glob("scatter_list/*/*.interval_list")
        Int interval_count = read_int(stdout())
    }

Cats's avatar
Cats committed
    runtime {
        memory: ceil(memory * memoryMultiplier)
Cats's avatar
Cats committed
    }
Cats's avatar
Cats committed

task SortVcf {
    input {
        String? preCommand
        String? picardJar

        Array[File]+ vcfFiles
        String outputVcf
Cats's avatar
Cats committed
        File? sequenceDict
Cats's avatar
Cats committed

        Int memory = 4
        Float memoryMultiplier = 3.0
        }

        String toolCommand = if defined(picardJar)
            then "java -Xmx" + memory + "G -jar " + picardJar
            else "picard -Xmx" + memory + "G"

    command {
        set -e -o pipefail
        ~{preCommand}
        ~{toolCommand} \
        SortVcf \
        I=~{sep=" I=" vcfFiles} \
Cats's avatar
Cats committed
        ~{"SEQUENCE_DICTIONARY=" + sequenceDict} \
Cats's avatar
Cats committed
        O=~{outputVcf}
Cats's avatar
Cats committed
    }

    output {
        File vcfFile = outputVcf
        File vcfIndex = outputVcf + ".tbi"
    }

    runtime {
        memory: ceil(memory * memoryMultiplier)
    }
}