Commit fcaea52d authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Added some picard tools to biopet

parent 4deb4b69
package nl.lumc.sasc.biopet.function.picard
import java.io.File
import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.sting.commandline.{Argument, Input, Output}
class CalculateHsMetrics(val root:Configurable) extends Picard {
javaMainClass = "net.sf.picard.analysis.directed.CalculateHsMetrics"
@Input(doc="The input SAM or BAM files to analyze. Must be coordinate sorted.", required = true)
var input: File = _
@Input(doc="BAIT_INTERVALS", required = true)
var baitIntervals: File = _
@Input(doc="TARGET_INTERVALS", required = true)
var targetIntervals: File = _
@Output(doc="The output file to write statistics to", required = true)
var output: File = _
@Output(doc="PER_TARGET_COVERAGE", required = false)
var perTargetCoverage: File = _
@Argument(doc="Reference file", required = false)
var reference: File = config("reference", "")
@Argument(doc="METRIC_ACCUMULATION_LEVEL", required=false)
var metricAccumulationLevel: List[String] = config("metricaccumulationlevel", List())
@Argument(doc="BAIT_SET_NAME", required = false)
var baitSetName: String = _
override def commandLine = super.commandLine +
required("INPUT=", input, spaceSeparated=false) +
required("OUTPUT=", output, spaceSeparated=false) +
optional("REFERENCE_SEQUENCE=", reference, spaceSeparated=false) +
repeat("METRIC_ACCUMULATION_LEVEL=", metricAccumulationLevel, spaceSeparated=false) +
required("BAIT_INTERVALS=", baitIntervals, spaceSeparated=false) +
required("TARGET_INTERVALS=", targetIntervals, spaceSeparated=false) +
optional("PER_TARGET_COVERAGE=", perTargetCoverage, spaceSeparated=false) +
optional("BAIT_SET_NAME=", baitSetName, spaceSeparated=false)
}
object CalculateHsMetrics {
def apply(root:Configurable, input:File, baitIntervals:File, targetIntervals:File, outputDir:String) : CalculateHsMetrics = {
val calculateHsMetrics = new CalculateHsMetrics(root)
calculateHsMetrics.input = input
calculateHsMetrics.baitIntervals = baitIntervals
calculateHsMetrics.targetIntervals = targetIntervals
calculateHsMetrics.output = new File(outputDir, input.getName.stripSuffix(".bam") + ".capmetrics")
calculateHsMetrics.perTargetCoverage = new File(outputDir, input.getName.stripSuffix(".bam") + ".per_target_coverage")
return calculateHsMetrics
}
}
\ No newline at end of file
package nl.lumc.sasc.biopet.function.picard
import java.io.File
import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.sting.commandline.{Argument, Input, Output}
class CollectGcBiasMetrics(val root:Configurable) extends Picard {
javaMainClass = "net.sf.picard.analysis.CollectGcBiasMetrics"
@Input(doc="The input SAM or BAM files to analyze. Must be coordinate sorted.", required = true)
var input: Seq[File] = Nil
@Output(doc="The output file to write statistics to", required = true)
var output: File = _
@Output(doc="Output chart", required = false)
var outputChart: File = _
@Output(doc="Output summary", required = false)
var outputSummary: File = _
@Argument(doc="Reference file", required = false)
var reference: File = config("reference", "")
@Argument(doc="Window size", required = false)
var windowSize: Int = config("windowsize", 100)
@Argument(doc="MINIMUM_GENOME_FRACTION", required=false)
var minGenomeFraction: Double = config("mingenomefraction", 1.0E-5)
@Argument(doc="ASSUME_SORTED", required=false)
var assumeSorted: Boolean = config("assumesorted", false)
@Argument(doc="IS_BISULFITE_SEQUENCED", required=false)
var isBisulfiteSequinced: Boolean = config("isbisulfitesequinced", false)
override def afterGraph {
if (outputChart == null) outputChart = new File(output + ".pdf")
//require(reference.exists)
}
override def commandLine = super.commandLine +
repeat("INPUT=", input, spaceSeparated=false) +
required("OUTPUT=", output, spaceSeparated=false) +
optional("CHART_OUTPUT=", outputChart, spaceSeparated=false) +
required("REFERENCE_SEQUENCE=", reference, spaceSeparated=false) +
optional("SUMMARY_OUTPUT=", outputSummary, spaceSeparated=false) +
optional("WINDOW_SIZE=", windowSize, spaceSeparated=false) +
optional("MINIMUM_GENOME_FRACTION=", minGenomeFraction, spaceSeparated=false) +
conditional(assumeSorted, "ASSUME_SORTED=TRUE") +
conditional(isBisulfiteSequinced, "IS_BISULFITE_SEQUENCED=TRUE")
}
object CollectGcBiasMetrics {
def apply(root:Configurable, input:File, outputDir:String) : CollectGcBiasMetrics = {
val collectGcBiasMetrics = new CollectGcBiasMetrics(root)
collectGcBiasMetrics.input :+= input
collectGcBiasMetrics.output = new File(outputDir, input.getName.stripSuffix(".bam") + ".gcbiasmetrics")
return collectGcBiasMetrics
}
}
package nl.lumc.sasc.biopet.function.picard
import java.io.File
import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.sting.commandline.{Argument, Input, Output}
class CollectInsertSizeMetrics(val root:Configurable) extends Picard {
javaMainClass = "net.sf.picard.analysis.CollectInsertSizeMetrics"
@Input(doc="The input SAM or BAM files to analyze. Must be coordinate sorted.", required = true)
var input: File = _
@Output(doc="The output file to write statistics to", required = true)
var output: File = _
@Output(doc="Output histogram", required = true)
var outputHistogram: File = _
@Argument(doc="Reference file", required = false)
var reference: File = config("reference", "")
@Argument(doc="DEVIATIONS", required = false)
var deviations: Double = config("deviations", 10.0)
@Argument(doc="MINIMUM_PCT", required=false)
var minPct: Double = config("minpct", 0.05)
@Argument(doc="ASSUME_SORTED", required=false)
var assumeSorted: Boolean = config("assumesorted", false)
@Argument(doc="STOP_AFTER", required=false)
var stopAfter: Long = config("metricaccumulationlevel", 0)
@Argument(doc="METRIC_ACCUMULATION_LEVEL", required=false)
var metricAccumulationLevel: List[String] = config("metricaccumulationlevel", List())
@Argument(doc="HISTOGRAM_WIDTH", required=false)
var histogramWidth: Int = config("histogramWidth", 0)
override def afterGraph {
if (outputHistogram == null) outputHistogram = new File(output + ".pdf")
//require(reference.exists)
}
override def commandLine = super.commandLine +
required("INPUT=", input, spaceSeparated=false) +
required("OUTPUT=", output, spaceSeparated=false) +
optional("HISTOGRAM_FILE=", outputHistogram, spaceSeparated=false) +
required("REFERENCE_SEQUENCE=", reference, spaceSeparated=false) +
optional("DEVIATIONS=", deviations, spaceSeparated=false) +
repeat("METRIC_ACCUMULATION_LEVEL=", metricAccumulationLevel, spaceSeparated=false) +
(if (stopAfter > 0) optional("STOP_AFTER=", stopAfter, spaceSeparated=false) else "") +
(if (histogramWidth > 0) optional("HISTOGRAM_WIDTH=", histogramWidth, spaceSeparated=false) else "") +
conditional(assumeSorted, "ASSUME_SORTED=TRUE")
}
object CollectInsertSizeMetrics {
def apply(root:Configurable, input:File, outputDir:String) : CollectInsertSizeMetrics = {
val collectInsertSizeMetrics = new CollectInsertSizeMetrics(root)
collectInsertSizeMetrics.input = input
collectInsertSizeMetrics.output = new File(outputDir, input.getName.stripSuffix(".bam") + ".insertsizemetrics")
return collectInsertSizeMetrics
}
}
\ No newline at end of file
package nl.lumc.sasc.biopet.function.picard
import java.io.File
import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.sting.commandline.{Argument, Input, Output}
class MarkDuplicates(val root:Configurable) extends Picard {
javaMainClass = "net.sf.picard.sam.MarkDuplicates"
@Input(doc="The input SAM or BAM files to analyze. Must be coordinate sorted.", required = true)
var input: List[File] = Nil
@Output(doc="The output file to bam file to", required = true)
var output: File = _
@Output(doc="The output file to write statistics to", required = true)
var outputMetrics: File = _
@Argument(doc="PROGRAM_RECORD_ID", required=false)
var programRecordId: String = if (configContains("programrecordid")) config("programrecordid") else null
@Argument(doc="PROGRAM_GROUP_VERSION", required=false)
var programGroupVersion: String = if (configContains("programgroupversion")) config("programgroupversion") else null
@Argument(doc="PROGRAM_GROUP_COMMAND_LINE", required=false)
var programGroupCommandLine: String = if (configContains("programgroupcommandline")) config("programgroupcommandline") else null
@Argument(doc="PROGRAM_GROUP_NAME", required=false)
var programGroupName: String = if (configContains("programgroupname")) config("programgroupname") else null
@Argument(doc="COMMENT", required=false)
var comment: String = if (configContains("comment")) config("comment") else null
@Argument(doc="REMOVE_DUPLICATES", required=false)
var removeDuplicates: Boolean = config("removeduplicates", false)
@Argument(doc="ASSUME_SORTED", required=false)
var assumeSorted: Boolean = config("assumesorted", false)
@Argument(doc="MAX_SEQUENCES_FOR_DISK_READ_ENDS_MAP", required=false)
var maxSequencesForDiskReadEndsMap: Int = config("maxSequencesForDiskReadEndsMap", 50000)
@Argument(doc="MAX_FILE_HANDLES_FOR_READ_ENDS_MAP", required=false)
var maxFileHandlesForReadEndsMap: Int = config("maxFileHandlesForReadEndsMap", 8000)
@Argument(doc="SORTING_COLLECTION_SIZE_RATIO", required=false)
var sortingCollectionSizeRatio: Double = config("sortingCollectionSizeRatio", 0.25)
@Argument(doc="READ_NAME_REGEX", required=false)
var readNameRegex: String = if (configContains("readNameRegex")) config("readNameRegex") else null
@Argument(doc="OPTICAL_DUPLICATE_PIXEL_DISTANCE", required=false)
var opticalDuplicatePixelDistance: Int = config("opticalDuplicatePixelDistance", 100)
override def commandLine = super.commandLine +
repeat("INPUT=", input, spaceSeparated=false) +
required("OUTPUT=", output, spaceSeparated=false) +
required("METRICS_FILE=", outputMetrics, spaceSeparated=false) +
optional("PROGRAM_RECORD_ID=", programRecordId, spaceSeparated=false) +
optional("PROGRAM_GROUP_VERSION=", programGroupVersion, spaceSeparated=false) +
optional("PROGRAM_GROUP_COMMAND_LINE=", programGroupCommandLine, spaceSeparated=false) +
optional("PROGRAM_GROUP_NAME=", programGroupName, spaceSeparated=false) +
optional("COMMENT=", comment, spaceSeparated=false) +
conditional(removeDuplicates, "REMOVE_DUPLICATES=TRUE") +
conditional(assumeSorted, "ASSUME_SORTED=TRUE") +
(if (maxSequencesForDiskReadEndsMap > 0) optional("MAX_SEQUENCES_FOR_DISK_READ_ENDS_MAP=", maxSequencesForDiskReadEndsMap, spaceSeparated=false) else "") +
(if (maxFileHandlesForReadEndsMap > 0) optional("MAX_FILE_HANDLES_FOR_READ_ENDS_MAP=", maxFileHandlesForReadEndsMap, spaceSeparated=false) else "") +
(if (sortingCollectionSizeRatio > 0) optional("SORTING_COLLECTION_SIZE_RATIO=", sortingCollectionSizeRatio, spaceSeparated=false) else "") +
optional("READ_NAME_REGEX=", readNameRegex, spaceSeparated=false) +
(if (opticalDuplicatePixelDistance > 0) optional("OPTICAL_DUPLICATE_PIXEL_DISTANCE=", opticalDuplicatePixelDistance, spaceSeparated=false) else "")
}
object MarkDuplicates {
def apply(root:Configurable, input:List[File], outputDir:String) : MarkDuplicates = {
val markDuplicates = new MarkDuplicates(root)
markDuplicates.input = input
markDuplicates.output = new File(outputDir, input.head.getName.stripSuffix(".bam") + ".dedup.bam")
markDuplicates.outputMetrics = new File(outputDir, input.head.getName.stripSuffix(".bam") + ".dedup.metrics")
return markDuplicates
}
def apply(root:Configurable, input:List[File], output:File) : MarkDuplicates = {
val markDuplicates = new MarkDuplicates(root)
markDuplicates.input = input
markDuplicates.output = output
markDuplicates.outputMetrics = new File(output.getParent, output.getName.stripSuffix(".bam") + ".metrics")
return markDuplicates
}
}
\ No newline at end of file
package nl.lumc.sasc.biopet.function.picard
import nl.lumc.sasc.biopet.core.BiopetJavaCommandLineFunction
import org.broadinstitute.sting.commandline._
abstract class Picard extends BiopetJavaCommandLineFunction {
@Argument(doc="VERBOSITY", required = false)
var verbosity: String = config("verbosity", "INFO", "picard")
@Argument(doc="QUIET", required = false)
var quiet: Boolean = config("quiet", false, "picard")
@Argument(doc="VALIDATION_STRINGENCY", required = false)
var stringency: String = config("validationstringency", "STRICT", "picard")
@Argument(doc="COMPRESSION_LEVEL", required = false)
var compression: Int = config("compressionlevel", 5, "picard")
@Argument(doc="MAX_RECORDS_IN_RAM", required = false)
var maxRecordsInRam: Int = config("maxrecordsinram", 500000, "picard")
@Argument(doc="CREATE_INDEX", required = false)
var createIndex: Boolean = config("createindex", true, "picard")
@Argument(doc="CREATE_MD5_FILE", required = false)
var createMd5: Boolean = config("createmd5", false, "picard")
override def versionCommand = executeble + " " + javaOpts + " " + javaExecutable + " -h"
override val versionRegex = """Version: (.*)""".r
override val versionExitcode = List(0,1)
override val defaultVmem = "8G"
memoryLimit = Option(5.0)
override def commandLine = super.commandLine +
required("TMP_DIR=" + jobTempDir) +
optional("VERBOSITY=", verbosity, spaceSeparated=false) +
conditional(quiet, "QUIET=TRUE") +
optional("VALIDATION_STRINGENCY=", stringency, spaceSeparated=false) +
optional("COMPRESSION_LEVEL=", compression, spaceSeparated=false) +
optional("MAX_RECORDS_IN_RAM=", maxRecordsInRam, spaceSeparated=false) +
conditional(createIndex, "CREATE_INDEX=TRUE") +
conditional(createMd5, "CREATE_MD5_FILE=TRUE")
}
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment