diff --git a/bam-metrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetrics.scala b/bam-metrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetrics.scala index 28625a91bc38c157c1bf8aad1a0fdb18e1021595..93055136e8cb5d89a0fa5840d72302e24b8af1e1 100644 --- a/bam-metrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetrics.scala +++ b/bam-metrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetrics.scala @@ -2,63 +2,63 @@ package nl.lumc.sasc.biopet.pipelines.bammetrics import nl.lumc.sasc.biopet.pipelines.bammetrics.scripts.CoverageStats import org.broadinstitute.gatk.queue.QScript -import nl.lumc.sasc.biopet.core.{BiopetQScript, PipelineCommand} +import nl.lumc.sasc.biopet.core.{ BiopetQScript, PipelineCommand } import java.io.File -import nl.lumc.sasc.biopet.core.apps.{BedToInterval, BiopetFlagstat} +import nl.lumc.sasc.biopet.core.apps.{ BedToInterval, BiopetFlagstat } import nl.lumc.sasc.biopet.core.config.Configurable -import nl.lumc.sasc.biopet.function.bedtools.{BedtoolsCoverage, BedtoolsIntersect} -import nl.lumc.sasc.biopet.function.picard.{CollectInsertSizeMetrics, CollectGcBiasMetrics, CalculateHsMetrics, CollectAlignmentSummaryMetrics} +import nl.lumc.sasc.biopet.function.bedtools.{ BedtoolsCoverage, BedtoolsIntersect } +import nl.lumc.sasc.biopet.function.picard.{ CollectInsertSizeMetrics, CollectGcBiasMetrics, CalculateHsMetrics, CollectAlignmentSummaryMetrics } import nl.lumc.sasc.biopet.function.samtools.SamtoolsFlagstat -class BamMetrics(val root:Configurable) extends QScript with BiopetQScript { +class BamMetrics(val root: Configurable) extends QScript with BiopetQScript { def this() = this(null) - - @Input(doc="Bam File", shortName="BAM",required=true) + + @Input(doc = "Bam File", shortName = "BAM", required = true) var inputBam: File = _ - - @Input(doc="Bed tracks targets", shortName="target",required=false) + + @Input(doc = "Bed tracks targets", shortName = "target", required = false) var bedFiles: List[File] = Nil - - @Input(doc="Bed tracks bait", shortName="bait",required=false) + + @Input(doc = "Bed tracks bait", shortName = "bait", required = false) var baitBedFile: File = _ - - @Argument(doc="",required=false) + + @Argument(doc = "", required = false) var wholeGenome = false - + def init() { for (file <- configfiles) globalConfig.loadConfigFile(file) if (outputDir == null) throw new IllegalStateException("Missing Output directory on BamMetrics module") else if (!outputDir.endsWith("/")) outputDir += "/" } - + def biopetScript() { add(SamtoolsFlagstat(this, inputBam, outputDir)) add(BiopetFlagstat(this, inputBam, outputDir)) add(CollectGcBiasMetrics(this, inputBam, outputDir)) add(CollectInsertSizeMetrics(this, inputBam, outputDir)) add(CollectAlignmentSummaryMetrics(this, inputBam, outputDir)) - + val baitIntervalFile = if (baitBedFile != null) new File(outputDir, baitBedFile.getName.stripSuffix(".bed") + ".interval") else null if (baitIntervalFile != null) add(BedToInterval(this, baitBedFile, inputBam, outputDir), true) - + for (bedFile <- bedFiles) { val targetDir = outputDir + bedFile.getName.stripSuffix(".bed") + "/" val targetInterval = BedToInterval(this, bedFile, inputBam, targetDir) add(targetInterval, true) add(CalculateHsMetrics(this, inputBam, if (baitIntervalFile != null) baitIntervalFile - else targetInterval.output, targetInterval.output, targetDir)) - + else targetInterval.output, targetInterval.output, targetDir)) + val strictOutputBam = new File(targetDir, inputBam.getName.stripSuffix(".bam") + ".overlap.strict.bam") - add(BedtoolsIntersect(this, inputBam, bedFile, strictOutputBam, minOverlap = config("strictintersectoverlap", default=1.0)), true) + add(BedtoolsIntersect(this, inputBam, bedFile, strictOutputBam, minOverlap = config("strictintersectoverlap", default = 1.0)), true) add(SamtoolsFlagstat(this, strictOutputBam)) add(BiopetFlagstat(this, strictOutputBam, targetDir)) - + val looseOutputBam = new File(targetDir, inputBam.getName.stripSuffix(".bam") + ".overlap.loose.bam") - add(BedtoolsIntersect(this, inputBam, bedFile, looseOutputBam, minOverlap = config("looseintersectoverlap", default=0.01)), true) + add(BedtoolsIntersect(this, inputBam, bedFile, looseOutputBam, minOverlap = config("looseintersectoverlap", default = 0.01)), true) add(SamtoolsFlagstat(this, looseOutputBam)) add(BiopetFlagstat(this, looseOutputBam, targetDir)) - + val coverageFile = new File(targetDir, inputBam.getName.stripSuffix(".bam") + ".coverage") add(BedtoolsCoverage(this, inputBam, bedFile, coverageFile, true), true) add(CoverageStats(this, coverageFile, targetDir)) @@ -68,12 +68,12 @@ class BamMetrics(val root:Configurable) extends QScript with BiopetQScript { object BamMetrics extends PipelineCommand { override val pipeline = "/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetrics.class" - - def apply(root:Configurable, bamFile:File, outputDir:String) : BamMetrics = { + + def apply(root: Configurable, bamFile: File, outputDir: String): BamMetrics = { val bamMetrics = new BamMetrics(root) bamMetrics.inputBam = bamFile bamMetrics.outputDir = outputDir - + bamMetrics.init bamMetrics.biopetScript return bamMetrics diff --git a/bam-metrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/scripts/CoverageStats.scala b/bam-metrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/scripts/CoverageStats.scala index 8cfa8d885f7d290e3de73fd705275af8465174f3..053eb84924e8a0167c5be0f3f1b21440113e1590 100644 --- a/bam-metrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/scripts/CoverageStats.scala +++ b/bam-metrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/scripts/CoverageStats.scala @@ -2,27 +2,27 @@ package nl.lumc.sasc.biopet.pipelines.bammetrics.scripts import nl.lumc.sasc.biopet.core.config.Configurable import nl.lumc.sasc.biopet.function.PythonCommandLineFunction -import org.broadinstitute.gatk.utils.commandline.{Input, Output} +import org.broadinstitute.gatk.utils.commandline.{ Input, Output } import java.io.File -class CoverageStats(val root:Configurable) extends PythonCommandLineFunction { +class CoverageStats(val root: Configurable) extends PythonCommandLineFunction { setPythonScript("bedtools_cov_stats.py") - - @Input(doc="Input file") + + @Input(doc = "Input file") var input: File = _ - - @Output(doc="output File") + + @Output(doc = "output File") var output: File = _ - - @Output(doc="plot File (png)") + + @Output(doc = "plot File (png)") var plot: File = _ - + def cmdLine = getPythonCommand + required(input) + required("--plot", plot) + " > " + required(output) } object CoverageStats { - def apply(root:Configurable, input:File, outputDir:String) : CoverageStats = { + def apply(root: Configurable, input: File, outputDir: String): CoverageStats = { val coverageStats = new CoverageStats(root) coverageStats.input = input coverageStats.output = new File(outputDir, input.getName + ".stats") diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetCommandLineFunction.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetCommandLineFunction.scala index 3f2853280821f3f831e973f933463b386df72324..e1f61225eb9356e19b8ad933c32b2191e6a7adbe 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetCommandLineFunction.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetCommandLineFunction.scala @@ -1,6 +1,6 @@ package nl.lumc.sasc.biopet.core -abstract class BiopetCommandLineFunction extends BiopetCommandLineFunctionTrait { +abstract class BiopetCommandLineFunction extends BiopetCommandLineFunctionTrait { protected def cmdLine: String final def commandLine: String = { preCmdInternal diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetCommandLineFunctionTrait.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetCommandLineFunctionTrait.scala index 819035be7defa4b22ef68fb0000ebbfcf419ceb5..df70599f3d7943b41b49d8e507d5b5755bf9e8cb 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetCommandLineFunctionTrait.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetCommandLineFunctionTrait.scala @@ -10,45 +10,45 @@ import scala.util.matching.Regex trait BiopetCommandLineFunctionTrait extends CommandLineFunction with Configurable { analysisName = getClass.getSimpleName - - @Input(doc="deps", required=false) + + @Input(doc = "deps", required = false) var deps: List[File] = Nil - - @Argument(doc="Threads", required=false) + + @Argument(doc = "Threads", required = false) var threads = 0 val defaultThreads = 1 - - @Argument(doc="Vmem", required=false) + + @Argument(doc = "Vmem", required = false) var vmem: String = _ val defaultVmem: String = "" - - @Argument(doc="Executable") + + @Argument(doc = "Executable") var executable: String = _ - + protected def beforeCmd { } - + protected def afterGraph { } - + override def freezeFieldValues() { checkExecutable afterGraph - jobOutputFile = new File(firstOutput.getParent + "/." + firstOutput.getName + "." + analysisName + ".out") - + jobOutputFile = new File(firstOutput.getParent + "/." + firstOutput.getName + "." + analysisName + ".out") + if (threads == 0) threads = getThreads(defaultThreads) if (threads > 1) nCoresRequest = Option(threads) - + if (vmem == null) { vmem = config("vmem") if (vmem == null && !defaultVmem.isEmpty) vmem = defaultVmem } if (vmem != null) jobResourceRequests :+= "h_vmem=" + vmem jobName = this.analysisName + ":" + firstOutput.getName - + super.freezeFieldValues() } - + protected def checkExecutable { try if (executable != null) { val buffer = new StringBuffer() @@ -66,22 +66,22 @@ trait BiopetCommandLineFunctionTrait extends CommandLineFunction with Configurab case ioe: java.io.IOException => logger.warn("Could not use 'which', check on executable skipped: " + ioe) } } - + final protected def preCmdInternal { checkExecutable //for (input <- this.inputs) if (!input.exists) throw new IllegalStateException("Input: " + input + " for " + analysisName + " is missing") //logger.debug("Config for " + analysisName + ": " + localConfig) - + beforeCmd - + addJobReportBinding("cores", if (nCoresRequest.get.toInt > 0) nCoresRequest.get.toInt else 1) addJobReportBinding("version", getVersion) } - + protected def versionCommand: String = null protected val versionRegex: Regex = null protected val versionExitcode = List(0) // Can select multiple - def getVersion : String = { + def getVersion: String = { if (versionCommand == null || versionRegex == null) return "N/A" val buffer = new StringBuffer() val process = Process(versionCommand).run(ProcessLogger(buffer append _)) @@ -91,25 +91,25 @@ trait BiopetCommandLineFunctionTrait extends CommandLineFunction with Configurab } val lines = versionCommand lines_! ProcessLogger(buffer append _) for (line <- lines) { - line match { + line match { case versionRegex(m) => return m - case _ => + case _ => } } logger.warn("Version command: '" + versionCommand + "' give a exit code " + process.exitValue + " but no version was found, executable oke?") return "N/A" } - - def getThreads(default:Int) : Int = { - val maxThreads: Int = config("maxthreads", default=8) - val threads: Int = config("threads", default=default) + + def getThreads(default: Int): Int = { + val maxThreads: Int = config("maxthreads", default = 8) + val threads: Int = config("threads", default = default) if (maxThreads > threads) return threads else return maxThreads } - - def getThreads(default:Int, module:String) : Int = { - val maxThreads: Int = config("maxthreads", default=8, submodule=module) - val threads: Int = config("threads", default=default, submodule=module) + + def getThreads(default: Int, module: String): Int = { + val maxThreads: Int = config("maxthreads", default = 8, submodule = module) + val threads: Int = config("threads", default = default, submodule = module) if (maxThreads > threads) return threads else return maxThreads } diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetJavaCommandLineFunction.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetJavaCommandLineFunction.scala index 543d9450474b4eace2b36b737315fa02b7f6e3c9..8b691ed507792c5429bb7ced63ad569e6720d04b 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetJavaCommandLineFunction.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetJavaCommandLineFunction.scala @@ -4,12 +4,12 @@ import org.broadinstitute.gatk.queue.function.JavaCommandLineFunction abstract class BiopetJavaCommandLineFunction extends JavaCommandLineFunction with BiopetCommandLineFunctionTrait { executable = "java" - + override def commandLine: String = { preCmdInternal val cmd = super.commandLine val finalCmd = executable + cmd.substring(cmd.indexOf(" ")) -// addJobReportBinding("command", cmd) + // addJobReportBinding("command", cmd) return cmd } } diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetQCommandLine.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetQCommandLine.scala index 2dcae9518e178d2254b91cc8144580d821e3f218..b7527d4bf5734cea82d562c7c27695024d122d89 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetQCommandLine.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetQCommandLine.scala @@ -32,7 +32,7 @@ import org.broadinstitute.gatk.queue.util._ import org.broadinstitute.gatk.queue.QCommandPlugin import org.broadinstitute.gatk.queue.QScript import org.broadinstitute.gatk.queue.QScriptManager -import org.broadinstitute.gatk.queue.engine.{QStatusMessenger, QGraphSettings, QGraph} +import org.broadinstitute.gatk.queue.engine.{ QStatusMessenger, QGraphSettings, QGraph } import collection.JavaConversions._ import org.broadinstitute.gatk.utils.classloader.PluginManager import org.broadinstitute.gatk.utils.exceptions.UserException @@ -40,7 +40,7 @@ import org.broadinstitute.gatk.utils.io.IOUtils import org.broadinstitute.gatk.utils.help.ApplicationDetails import java.io.FileOutputStream import java.net.URL -import java.util.{ResourceBundle, Arrays} +import java.util.{ ResourceBundle, Arrays } import org.broadinstitute.gatk.utils.text.TextFormattingUtils import org.apache.commons.io.FilenameUtils @@ -84,11 +84,11 @@ object BiopetQCommandLine extends Logging { * Entry point of Queue. Compiles and runs QScripts passed in to the command line. */ class BiopetQCommandLine extends CommandLineProgram with Logging { - @Input(fullName="script", shortName="S", doc="QScript scala file", required=false) + @Input(fullName = "script", shortName = "S", doc = "QScript scala file", required = false) @ClassType(classOf[File]) var scripts: Seq[File] = Nil var pipelineName: String = _ - + @ArgumentCollection val settings = new QGraphSettings @@ -155,8 +155,7 @@ class BiopetQCommandLine extends CommandLineProgram with Logging { // TODO: Default command plugin argument? val remoteFileConverter = ( for (commandPlugin <- allCommandPlugins if (commandPlugin.remoteFileConverter != null)) - yield commandPlugin.remoteFileConverter - ).headOption.getOrElse(null) + yield commandPlugin.remoteFileConverter).headOption.getOrElse(null) if (remoteFileConverter != null) loadArgumentsIntoObject(remoteFileConverter) @@ -267,37 +266,35 @@ class BiopetQCommandLine extends CommandLineProgram with Logging { override def getArgumentTypeDescriptors = Arrays.asList(new ScalaCompoundArgumentTypeDescriptor) - override def getApplicationDetails : ApplicationDetails = { + override def getApplicationDetails: ApplicationDetails = { new ApplicationDetails(createQueueHeader(), - Seq.empty[String], - ApplicationDetails.createDefaultRunningInstructions(getClass.asInstanceOf[Class[CommandLineProgram]]), - "") + Seq.empty[String], + ApplicationDetails.createDefaultRunningInstructions(getClass.asInstanceOf[Class[CommandLineProgram]]), + "") } - private def createQueueHeader() : Seq[String] = { + private def createQueueHeader(): Seq[String] = { Seq(String.format("Queue v%s, Compiled %s", getQueueVersion, getBuildTimestamp), - "Copyright (c) 2012 The Broad Institute", - "For support and documentation go to http://www.broadinstitute.org/gatk") + "Copyright (c) 2012 The Broad Institute", + "For support and documentation go to http://www.broadinstitute.org/gatk") } - private def getQueueVersion : String = { - val stingResources : ResourceBundle = TextFormattingUtils.loadResourceBundle("StingText") + private def getQueueVersion: String = { + val stingResources: ResourceBundle = TextFormattingUtils.loadResourceBundle("StingText") - if ( stingResources.containsKey("org.broadinstitute.sting.queue.QueueVersion.version") ) { + if (stingResources.containsKey("org.broadinstitute.sting.queue.QueueVersion.version")) { stingResources.getString("org.broadinstitute.sting.queue.QueueVersion.version") - } - else { + } else { "" } } - private def getBuildTimestamp : String = { - val stingResources : ResourceBundle = TextFormattingUtils.loadResourceBundle("StingText") + private def getBuildTimestamp: String = { + val stingResources: ResourceBundle = TextFormattingUtils.loadResourceBundle("StingText") - if ( stingResources.containsKey("build.timestamp") ) { + if (stingResources.containsKey("build.timestamp")) { stingResources.getString("build.timestamp") - } - else { + } else { "" } } diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetQScript.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetQScript.scala index 17ee7e47c035e6363d1a997a3f06267b50480c15..9a1accb31301e49d64144d0a4554e3512a5eb9b3 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetQScript.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetQScript.scala @@ -8,19 +8,19 @@ import org.broadinstitute.gatk.queue.QSettings import org.broadinstitute.gatk.queue.function.QFunction trait BiopetQScript extends Configurable { - @Argument(doc="Config Json file",shortName="config", required=false) + @Argument(doc = "Config Json file", shortName = "config", required = false) val configfiles: List[File] = Nil - - @Argument(doc="Output directory", shortName="outputDir", required=true) + + @Argument(doc = "Output directory", shortName = "outputDir", required = true) var outputDir: String = _ - - var outputFiles:Map[String,File] = Map() - + + var outputFiles: Map[String, File] = Map() + var qSettings: QSettings - + def init def biopetScript - + final def script() { for (file <- configfiles) globalConfig.loadConfigFile(file) if (!outputDir.endsWith("/")) outputDir += "/" @@ -33,12 +33,11 @@ trait BiopetQScript extends Configurable { writer.close() for (line <- configReport.split("\n")) logger.debug(line) } - + def add(functions: QFunction*) // Gets implemeted at org.broadinstitute.sting.queue.QScript - def add(function: QFunction, isIntermediate:Boolean = false) { + def add(function: QFunction, isIntermediate: Boolean = false) { function.isIntermediate = isIntermediate add(function) } - - + } diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/MultiSampleQScript.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/MultiSampleQScript.scala index 5c161afd9a625c26c248c4a942837049f17d686e..8ceefb2af128a2686df0d1ed94091c9715fe87a5 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/MultiSampleQScript.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/MultiSampleQScript.scala @@ -4,33 +4,34 @@ import java.io.File import nl.lumc.sasc.biopet.core.config.Configurable trait MultiSampleQScript extends BiopetQScript { - var samples:Map[String,Any] = Map() - - final def runSamplesJobs : Map[String,Map[String,File]] = { - var output: Map[String,Map[String,File]] = Map() + var samples: Map[String, Any] = Map() + + final def runSamplesJobs: Map[String, Map[String, File]] = { + var output: Map[String, Map[String, File]] = Map() samples = config("samples") if (samples == null) samples = Map() - if (globalConfig.contains("samples")) for ((key,value) <- samples) { + if (globalConfig.contains("samples")) for ((key, value) <- samples) { var sample = Configurable.any2map(value) if (!sample.contains("ID")) sample += ("ID" -> key) if (sample("ID") == key) { - var files:Map[String,List[File]] = runSingleSampleJobs(sample) + var files: Map[String, List[File]] = runSingleSampleJobs(sample) } else logger.warn("Key is not the same as ID on value for sample") - } else logger.warn("No Samples found in config") + } + else logger.warn("No Samples found in config") return output } - - def runSingleSampleJobs(sampleConfig:Map[String,Any]) : Map[String,List[File]] - def runSingleSampleJobs(sample:String) : Map[String,List[File]] ={ + + def runSingleSampleJobs(sampleConfig: Map[String, Any]): Map[String, List[File]] + def runSingleSampleJobs(sample: String): Map[String, List[File]] = { return runSingleSampleJobs(Configurable.any2map(samples(sample))) } - - final def runRunsJobs(sampleConfig:Map[String,Any]) : Map[String,Map[String,File]] = { - var output: Map[String,Map[String,File]] = Map() + + final def runRunsJobs(sampleConfig: Map[String, Any]): Map[String, Map[String, File]] = { + var output: Map[String, Map[String, File]] = Map() val sampleID = sampleConfig("ID") if (sampleConfig.contains("runs")) { val runs = Configurable.any2map(sampleConfig("runs")) - for ((key,value) <- runs) { + for ((key, value) <- runs) { var run = Configurable.any2map(value) if (!run.contains("ID")) run += ("ID" -> key) if (run("ID") == key) { @@ -40,5 +41,5 @@ trait MultiSampleQScript extends BiopetQScript { } else logger.warn("No runs found in config for sample: " + sampleID) return output } - def runSingleRunJobs(runConfig:Map[String,Any], sampleConfig:Map[String,Any]) : Map[String,File] + def runSingleRunJobs(runConfig: Map[String, Any], sampleConfig: Map[String, Any]): Map[String, File] } diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/PipelineCommand.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/PipelineCommand.scala index 786dcd4cb87468adaffcf58c6609bec98c2d5e96..5bb506aa3b94060d2f78536c568b9f65c1d73afe 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/PipelineCommand.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/PipelineCommand.scala @@ -11,17 +11,17 @@ import java.io.FileOutputStream import org.broadinstitute.gatk.queue.util.Logging trait PipelineCommand extends Logging { -// val src = "" -// val extension = ".scala" + // val src = "" + // val extension = ".scala" val pipeline = "" - + def main(args: Array[String]): Unit = { -// val tempFile = java.io.File.createTempFile(src + ".", extension) -// val is = getClass.getResourceAsStream(src + extension) -// val os = new FileOutputStream(tempFile) -// org.apache.commons.io.IOUtils.copy(is, os) -// os.close() - + // val tempFile = java.io.File.createTempFile(src + ".", extension) + // val is = getClass.getResourceAsStream(src + extension) + // val os = new FileOutputStream(tempFile) + // org.apache.commons.io.IOUtils.copy(is, os) + // os.close() + var argv: Array[String] = Array() //argv ++= Array("-S", tempFile.getAbsolutePath) argv ++= Array("-S", pipeline) diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/apps/BedToInterval.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/apps/BedToInterval.scala index 6674154bab52e23c574d9b449a4c477f6ef090b4..f5082a7e15b8a8b7a21eaa346062c370e8c1abb8 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/apps/BedToInterval.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/apps/BedToInterval.scala @@ -5,58 +5,58 @@ import htsjdk.samtools.SAMSequenceRecord import java.io.File import nl.lumc.sasc.biopet.core.BiopetJavaCommandLineFunction import nl.lumc.sasc.biopet.core.config.Configurable -import org.broadinstitute.gatk.utils.commandline.{Input, Output} +import org.broadinstitute.gatk.utils.commandline.{ Input, Output } import java.io.PrintWriter import scala.io.Source -class BedToInterval(val root:Configurable) extends BiopetJavaCommandLineFunction { +class BedToInterval(val root: Configurable) extends BiopetJavaCommandLineFunction { javaMainClass = getClass.getName - - @Input(doc="Input Bed file", required = true) + + @Input(doc = "Input Bed file", required = true) var input: File = _ - - @Input(doc="Bam File", required = true) - var bamFile:File = _ - - @Output(doc="Output interval list", required = true) + + @Input(doc = "Bam File", required = true) + var bamFile: File = _ + + @Output(doc = "Output interval list", required = true) var output: File = _ - + override val defaultVmem = "8G" memoryLimit = Option(4.0) - + override def commandLine = super.commandLine + required(input) + required(bamFile) + required(output) } object BedToInterval { - def apply(root:Configurable, inputBed:File, inputBam:File, outputDir:String) : BedToInterval = { + def apply(root: Configurable, inputBed: File, inputBam: File, outputDir: String): BedToInterval = { val bedToInterval = new BedToInterval(root) bedToInterval.input = inputBed bedToInterval.bamFile = inputBam bedToInterval.output = new File(outputDir, inputBed.getName.stripSuffix(".bed") + ".interval") return bedToInterval } - - def apply(root:Configurable, inputBed:File, inputBam:File, output:File) : BedToInterval = { + + def apply(root: Configurable, inputBed: File, inputBam: File, output: File): BedToInterval = { val bedToInterval = new BedToInterval(root) bedToInterval.input = inputBed bedToInterval.bamFile = inputBam bedToInterval.output = output return bedToInterval } - + /** * @param args the command line arguments */ def main(args: Array[String]): Unit = { val writer = new PrintWriter(args(2)) - + val inputSam = new SAMFileReader(new File(args(1))) for (bla <- inputSam.getFileHeader.getSequenceDictionary.getSequences.toArray) { val record = bla.asInstanceOf[SAMSequenceRecord] writer.write("@SQ\tSN:" + record.getSequenceName + "\tLN:" + record.getSequenceLength + "\n") } inputSam.close - + val bedFile = Source.fromFile(args(0)) for (line <- bedFile.getLines) { val split = line.split("\t") @@ -76,7 +76,7 @@ object BedToInterval { for (t <- 3 until split.length) writer.write(":" + split(t)) writer.write("\n") } - + writer.close() } } \ No newline at end of file diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/apps/BiopetFlagstat.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/apps/BiopetFlagstat.scala index ad282ead2fe01419a793c18e91f8594dc68d7e0f..0e5921777aa588fc648c70715b616da39f506b6e 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/apps/BiopetFlagstat.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/apps/BiopetFlagstat.scala @@ -2,38 +2,38 @@ package nl.lumc.sasc.biopet.core.apps import htsjdk.samtools.SAMFileReader import htsjdk.samtools.SAMRecord -import java.io.{BufferedInputStream, File, FileInputStream, PrintWriter} +import java.io.{ BufferedInputStream, File, FileInputStream, PrintWriter } import nl.lumc.sasc.biopet.core.BiopetJavaCommandLineFunction import scala.io.Source import nl.lumc.sasc.biopet.core.config.Configurable -import org.broadinstitute.gatk.utils.commandline.{Input, Output} +import org.broadinstitute.gatk.utils.commandline.{ Input, Output } import org.broadinstitute.gatk.queue.util.Logging import scala.collection.JavaConversions._ import scala.collection.mutable.Map -class BiopetFlagstat(val root:Configurable) extends BiopetJavaCommandLineFunction { +class BiopetFlagstat(val root: Configurable) extends BiopetJavaCommandLineFunction { javaMainClass = getClass.getName - - @Input(doc="Input bam", shortName = "input", required = true) + + @Input(doc = "Input bam", shortName = "input", required = true) var input: File = _ - - @Output(doc="Output flagstat", shortName="output", required = true) + + @Output(doc = "Output flagstat", shortName = "output", required = true) var output: File = _ - + override val defaultVmem = "8G" memoryLimit = Option(4.0) - + override def commandLine = super.commandLine + required(input) + " > " + required(output) } object BiopetFlagstat extends Logging { - def apply(root:Configurable, input:File, output:File) : BiopetFlagstat = { + def apply(root: Configurable, input: File, output: File): BiopetFlagstat = { val flagstat = new BiopetFlagstat(root) flagstat.input = input flagstat.output = output return flagstat } - def apply(root:Configurable, input:File, outputDir:String) : BiopetFlagstat = { + def apply(root: Configurable, input: File, outputDir: String): BiopetFlagstat = { val flagstat = new BiopetFlagstat(root) flagstat.input = input flagstat.output = new File(outputDir, input.getName.stripSuffix(".bam") + ".biopetflagstat") @@ -44,70 +44,66 @@ object BiopetFlagstat extends Logging { */ def main(args: Array[String]): Unit = { val inputSam = new SAMFileReader(new File(args(0))) - + val flagstatCollector = new FlagstatCollector flagstatCollector.loadDefaultFunctions val m = 10 val max = 60 for (t <- 0 to (max / m)) - flagstatCollector.addFunction("MAPQ>"+(t * m), record => record.getMappingQuality > (t * m)) + flagstatCollector.addFunction("MAPQ>" + (t * m), record => record.getMappingQuality > (t * m)) flagstatCollector.addFunction("First normal, second read inverted (paired end orientation)", record => { if (record.getReferenceIndex == record.getMateReferenceIndex && record.getReadNegativeStrandFlag != record.getMateNegativeStrandFlag && - ((record.getFirstOfPairFlag && !record.getReadNegativeStrandFlag && record.getAlignmentStart < record.getMateAlignmentStart) || - (record.getFirstOfPairFlag && record.getReadNegativeStrandFlag && record.getAlignmentStart > record.getMateAlignmentStart) || - (record.getSecondOfPairFlag && !record.getReadNegativeStrandFlag && record.getAlignmentStart < record.getMateAlignmentStart) || - (record.getSecondOfPairFlag && record.getReadNegativeStrandFlag && record.getAlignmentStart > record.getMateAlignmentStart)) - ) true + ((record.getFirstOfPairFlag && !record.getReadNegativeStrandFlag && record.getAlignmentStart < record.getMateAlignmentStart) || + (record.getFirstOfPairFlag && record.getReadNegativeStrandFlag && record.getAlignmentStart > record.getMateAlignmentStart) || + (record.getSecondOfPairFlag && !record.getReadNegativeStrandFlag && record.getAlignmentStart < record.getMateAlignmentStart) || + (record.getSecondOfPairFlag && record.getReadNegativeStrandFlag && record.getAlignmentStart > record.getMateAlignmentStart))) true else false }) flagstatCollector.addFunction("First normal, second read normal", record => { if (record.getReferenceIndex == record.getMateReferenceIndex && record.getReadNegativeStrandFlag == record.getMateNegativeStrandFlag && - ((record.getFirstOfPairFlag && !record.getReadNegativeStrandFlag && record.getAlignmentStart < record.getMateAlignmentStart) || - (record.getFirstOfPairFlag && record.getReadNegativeStrandFlag && record.getAlignmentStart > record.getMateAlignmentStart) || - (record.getSecondOfPairFlag && record.getReadNegativeStrandFlag && record.getAlignmentStart < record.getMateAlignmentStart) || - (record.getSecondOfPairFlag && !record.getReadNegativeStrandFlag && record.getAlignmentStart > record.getMateAlignmentStart)) - ) true + ((record.getFirstOfPairFlag && !record.getReadNegativeStrandFlag && record.getAlignmentStart < record.getMateAlignmentStart) || + (record.getFirstOfPairFlag && record.getReadNegativeStrandFlag && record.getAlignmentStart > record.getMateAlignmentStart) || + (record.getSecondOfPairFlag && record.getReadNegativeStrandFlag && record.getAlignmentStart < record.getMateAlignmentStart) || + (record.getSecondOfPairFlag && !record.getReadNegativeStrandFlag && record.getAlignmentStart > record.getMateAlignmentStart))) true else false }) flagstatCollector.addFunction("First inverted, second read inverted", record => { if (record.getReferenceIndex == record.getMateReferenceIndex && record.getReadNegativeStrandFlag == record.getMateNegativeStrandFlag && - ((record.getFirstOfPairFlag && record.getReadNegativeStrandFlag && record.getAlignmentStart < record.getMateAlignmentStart) || - (record.getFirstOfPairFlag && !record.getReadNegativeStrandFlag && record.getAlignmentStart > record.getMateAlignmentStart) || - (record.getSecondOfPairFlag && !record.getReadNegativeStrandFlag && record.getAlignmentStart < record.getMateAlignmentStart) || - (record.getSecondOfPairFlag && record.getReadNegativeStrandFlag && record.getAlignmentStart > record.getMateAlignmentStart)) - ) true + ((record.getFirstOfPairFlag && record.getReadNegativeStrandFlag && record.getAlignmentStart < record.getMateAlignmentStart) || + (record.getFirstOfPairFlag && !record.getReadNegativeStrandFlag && record.getAlignmentStart > record.getMateAlignmentStart) || + (record.getSecondOfPairFlag && !record.getReadNegativeStrandFlag && record.getAlignmentStart < record.getMateAlignmentStart) || + (record.getSecondOfPairFlag && record.getReadNegativeStrandFlag && record.getAlignmentStart > record.getMateAlignmentStart))) true else false }) flagstatCollector.addFunction("First inverted, second read normal", record => { if (record.getReferenceIndex == record.getMateReferenceIndex && record.getReadNegativeStrandFlag != record.getMateNegativeStrandFlag && - ((record.getFirstOfPairFlag && record.getReadNegativeStrandFlag && record.getAlignmentStart < record.getMateAlignmentStart) || - (record.getFirstOfPairFlag && !record.getReadNegativeStrandFlag && record.getAlignmentStart > record.getMateAlignmentStart) || - (record.getSecondOfPairFlag && record.getReadNegativeStrandFlag && record.getAlignmentStart < record.getMateAlignmentStart) || - (record.getSecondOfPairFlag && !record.getReadNegativeStrandFlag && record.getAlignmentStart > record.getMateAlignmentStart)) - ) true + ((record.getFirstOfPairFlag && record.getReadNegativeStrandFlag && record.getAlignmentStart < record.getMateAlignmentStart) || + (record.getFirstOfPairFlag && !record.getReadNegativeStrandFlag && record.getAlignmentStart > record.getMateAlignmentStart) || + (record.getSecondOfPairFlag && record.getReadNegativeStrandFlag && record.getAlignmentStart < record.getMateAlignmentStart) || + (record.getSecondOfPairFlag && !record.getReadNegativeStrandFlag && record.getAlignmentStart > record.getMateAlignmentStart))) true else false }) flagstatCollector.addFunction("Mate in same strand", record => record.getReadNegativeStrandFlag && record.getMateNegativeStrandFlag && - record.getReferenceIndex == record.getMateReferenceIndex) + record.getReferenceIndex == record.getMateReferenceIndex) flagstatCollector.addFunction("Mate on other chr", record => record.getReferenceIndex != record.getMateReferenceIndex) - + for (record <- inputSam.iterator) { if (flagstatCollector.readsCount % 1e6 == 0 && flagstatCollector.readsCount > 0) System.err.println("Reads prosessed: " + flagstatCollector.readsCount) flagstatCollector.loadRecord(record) } - + println(flagstatCollector.report) } - - class FlagstatCollector { + + class FlagstatCollector { private var functionCount = 0 var readsCount = 0 private val names: Map[Int, String] = Map() private var functions: Array[SAMRecord => Boolean] = Array() private var totalCounts: Array[Long] = Array() - private var crossCounts = Array.ofDim[Int](1,1) - + private var crossCounts = Array.ofDim[Int](1, 1) + def loadDefaultFunctions { addFunction("All", record => true) addFunction("Mapped", record => !record.getReadUnmappedFlag) @@ -116,7 +112,7 @@ object BiopetFlagstat extends Logging { addFunction("SecondOfPair", record => record.getSecondOfPairFlag) addFunction("ReadNegativeStrand", record => record.getReadNegativeStrandFlag) - + addFunction("NotPrimaryAlignment", record => record.getNotPrimaryAlignmentFlag) addFunction("ReadPaired", record => record.getReadPairedFlag) @@ -129,8 +125,8 @@ object BiopetFlagstat extends Logging { addFunction("SupplementaryAlignment", record => record.getSupplementaryAlignmentFlag) addFunction("SecondaryOrSupplementary", record => record.isSecondaryOrSupplementary) } - - def loadRecord(record:SAMRecord) { + + def loadRecord(record: SAMRecord) { readsCount += 1 val values: Array[Boolean] = new Array(names.size) for (t <- 0 until names.size) { @@ -147,57 +143,57 @@ object BiopetFlagstat extends Logging { } } } - - def addFunction(name:String, function:SAMRecord => Boolean) { + + def addFunction(name: String, function: SAMRecord => Boolean) { functionCount += 1 - crossCounts = Array.ofDim[Int](functionCount,functionCount) + crossCounts = Array.ofDim[Int](functionCount, functionCount) totalCounts = new Array[Long](functionCount) val temp = new Array[SAMRecord => Boolean](functionCount) for (t <- 0 until (temp.size - 1)) temp(t) = functions(t) functions = temp - + val index = functionCount - 1 names += (index -> name) functions(index) = function totalCounts(index) = 0 } - + def report: String = { val buffer = new StringBuilder buffer.append("Number\tTotal Flags\tFraction\tName\n") for (t <- 0 until names.size) { val precentage = (totalCounts(t).toFloat / readsCount) * 100 - buffer.append("#" + (t+1) + "\t" + totalCounts(t) + "\t" + f"$precentage%.4f" + "%\t" + names(t) + "\n") + buffer.append("#" + (t + 1) + "\t" + totalCounts(t) + "\t" + f"$precentage%.4f" + "%\t" + names(t) + "\n") } buffer.append("\n") - + buffer.append(crossReport() + "\n") buffer.append(crossReport(fraction = true) + "\n") - + return buffer.toString } - - def crossReport(fraction:Boolean = false): String = { + + def crossReport(fraction: Boolean = false): String = { val buffer = new StringBuilder - + for (t <- 0 until names.size) // Header for table - buffer.append("\t#" + (t+1)) + buffer.append("\t#" + (t + 1)) buffer.append("\n") - + for (t <- 0 until names.size) { - buffer.append("#" + (t+1) + "\t") + buffer.append("#" + (t + 1) + "\t") for (t2 <- 0 until names.size) { val reads = crossCounts(t)(t2) if (fraction) { val precentage = (reads.toFloat / totalCounts(t).toFloat) * 100 buffer.append(f"$precentage%.4f" + "%") } else buffer.append(reads) - if (t2 == names.size-1) buffer.append("\n") + if (t2 == names.size - 1) buffer.append("\n") else buffer.append("\t") } } return buffer.toString } } // End of class - + } diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/apps/FastqSplitter.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/apps/FastqSplitter.scala index 40f77e68bcbf79f51784d8c29f985d6ae32e00a9..62e701af774fecd7156533a0be2089b8f39af27e 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/apps/FastqSplitter.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/apps/FastqSplitter.scala @@ -1,24 +1,24 @@ package nl.lumc.sasc.biopet.core.apps -import java.io.{BufferedInputStream, File, FileInputStream, PrintWriter} +import java.io.{ BufferedInputStream, File, FileInputStream, PrintWriter } import java.util.zip.GZIPInputStream import nl.lumc.sasc.biopet.core.BiopetJavaCommandLineFunction import scala.io.Source import nl.lumc.sasc.biopet.core.config.Configurable -import org.broadinstitute.gatk.utils.commandline.{Input, Output} +import org.broadinstitute.gatk.utils.commandline.{ Input, Output } -class FastqSplitter(val root:Configurable) extends BiopetJavaCommandLineFunction { +class FastqSplitter(val root: Configurable) extends BiopetJavaCommandLineFunction { javaMainClass = getClass.getName - - @Input(doc="Input fastq", shortName = "input", required = true) + + @Input(doc = "Input fastq", shortName = "input", required = true) var input: File = _ - - @Output(doc="Output fastq files", shortName="output", required = true) + + @Output(doc = "Output fastq files", shortName = "output", required = true) var output: List[File] = Nil - + override val defaultVmem = "8G" memoryLimit = Option(4.0) - + override def commandLine = super.commandLine + required(input) + repeat(output) } @@ -29,13 +29,13 @@ object FastqSplitter { def main(args: Array[String]): Unit = { val groupsize = 100 val input = new File(args.head) - val output:Array[PrintWriter] = new Array[PrintWriter](args.tail.size) - for (t <- 1 to args.tail.size) output(t-1) = new PrintWriter(args(t)) + val output: Array[PrintWriter] = new Array[PrintWriter](args.tail.size) + for (t <- 1 to args.tail.size) output(t - 1) = new PrintWriter(args(t)) val inputStream = { if (input.getName.endsWith(".gz") || input.getName.endsWith(".gzip")) Source.fromInputStream( new GZIPInputStream( new BufferedInputStream( - new FileInputStream(input)))).bufferedReader + new FileInputStream(input)))).bufferedReader else Source.fromFile(input).bufferedReader } while (inputStream.ready) { diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/config/Config.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/config/Config.scala index 79defc15adab62e201379ace506f5c6ea5364d90..92ea25fc98265435cdac9a86de9a07fb97de5d61 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/config/Config.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/config/Config.scala @@ -6,13 +6,13 @@ import org.broadinstitute.gatk.queue.util.Logging import argonaut._, Argonaut._ import scalaz._, Scalaz._ -class Config(var map: Map[String,Any]) extends Logging { +class Config(var map: Map[String, Any]) extends Logging { logger.debug("Init phase of config") def this() = { this(Map()) loadDefaultConfig() } - + def loadDefaultConfig() { var globalFile: String = System.getenv("BIOPET_CONFIG") if (globalFile != null) { @@ -23,8 +23,8 @@ class Config(var map: Map[String,Any]) extends Logging { } else logger.warn("BIOPET_CONFIG value found but file does not exist, no global config is loaded") } else logger.info("BIOPET_CONFIG value not found, no global config is loaded") } - - def loadConfigFile(configFile:File) { + + def loadConfigFile(configFile: File) { logger.debug("Jsonfile: " + configFile) val jsonText = scala.io.Source.fromFile(configFile).mkString val json = Parse.parseOption(jsonText) @@ -34,13 +34,13 @@ class Config(var map: Map[String,Any]) extends Logging { if (configJson == None) { throw new IllegalStateException("The config JSON file is either not properly formatted or not a JSON file, file: " + configFile) } - + if (map.isEmpty) map = configJson else map = Config.mergeMaps(configJson, map) logger.debug("New config: " + map) } - - private def jsonToMap(json:Json) : Map[String, Any] = { + + private def jsonToMap(json: Json): Map[String, Any] = { var output: Map[String, Any] = Map() if (json.isObject) { for (key <- json.objectFieldsOrEmpty) { @@ -50,13 +50,13 @@ class Config(var map: Map[String,Any]) extends Logging { } else return null return output } - - private def jsonToAny(json:Json): Any = { + + private def jsonToAny(json: Json): Any = { if (json.isObject) return jsonToMap(json) else if (json.isArray) { - var list:List[Any] = List() - for (value <- json.objectValues.get) list ::= jsonToAny(value) - return list + var list: List[Any] = List() + for (value <- json.objectValues.get) list ::= jsonToAny(value) + return list } else if (json.isBool) return json.bool.get else if (json.isString) return json.string.get.toString else if (json.isNumber) { @@ -65,17 +65,17 @@ class Config(var map: Map[String,Any]) extends Logging { else return num.toLong } else throw new IllegalStateException("Config value type not supported, value: " + json) } - - def getMap() : Map[String,Any] = map - + + def getMap(): Map[String, Any] = map + var notFoundCache: List[ConfigValueIndex] = List() - var foundCache: Map[ConfigValueIndex,ConfigValue] = Map() - var defaultCache: Map[ConfigValueIndex,ConfigValue] = Map() - - def contains(s:String) : Boolean = map.contains(s) - def contains(requestedIndex:ConfigValueIndex) : Boolean = contains(requestedIndex.module, requestedIndex.path, requestedIndex.key) - def contains(module:String, path: List[String], key:String) : Boolean = { - val requestedIndex = ConfigValueIndex(module,path,key) + var foundCache: Map[ConfigValueIndex, ConfigValue] = Map() + var defaultCache: Map[ConfigValueIndex, ConfigValue] = Map() + + def contains(s: String): Boolean = map.contains(s) + def contains(requestedIndex: ConfigValueIndex): Boolean = contains(requestedIndex.module, requestedIndex.path, requestedIndex.key) + def contains(module: String, path: List[String], key: String): Boolean = { + val requestedIndex = ConfigValueIndex(module, path, key) if (notFoundCache.contains(requestedIndex)) return false else if (foundCache.contains(requestedIndex)) return true else { @@ -86,14 +86,14 @@ class Config(var map: Map[String,Any]) extends Logging { val p = getMapFromPath(submodules2 ::: module :: Nil) //logger.debug("p: " + p) if (p.contains(key)) { - foundCache += (requestedIndex -> ConfigValue.apply(requestedIndex, ConfigValueIndex(module,submodules2,key), p(key))) + foundCache += (requestedIndex -> ConfigValue.apply(requestedIndex, ConfigValueIndex(module, submodules2, key), p(key))) return true } val p2 = getMapFromPath(submodules2) //logger.debug("p2: " + p2) if (p2.contains(key)) { - foundCache += (requestedIndex -> ConfigValue.apply(requestedIndex, ConfigValueIndex(module,submodules2,key), p2(key))) + foundCache += (requestedIndex -> ConfigValue.apply(requestedIndex, ConfigValueIndex(module, submodules2, key), p2(key))) return true } submodules2 = submodules2.init @@ -102,10 +102,10 @@ class Config(var map: Map[String,Any]) extends Logging { } val p = getMapFromPath(module :: Nil) if (p.contains(key)) { // Module is not nested - foundCache += (requestedIndex -> ConfigValue.apply(requestedIndex, ConfigValueIndex(module,Nil,key), p(key))) + foundCache += (requestedIndex -> ConfigValue.apply(requestedIndex, ConfigValueIndex(module, Nil, key), p(key))) return true } else if (this.contains(key)) { // Root value of json - foundCache += (requestedIndex -> ConfigValue.apply(requestedIndex, ConfigValueIndex("",Nil,key), get(key))) + foundCache += (requestedIndex -> ConfigValue.apply(requestedIndex, ConfigValueIndex("", Nil, key), get(key))) return true } else { // At this point key is not found on the path notFoundCache +:= requestedIndex @@ -113,47 +113,47 @@ class Config(var map: Map[String,Any]) extends Logging { } } } - - private def get(key:String) : Any = map(key) - private def get(key:String, default:Any) : Any = if (contains(key)) get(key) else default - - def apply(module:String, path: List[String], key:String, default:Any) : ConfigValue = { - val requestedIndex = ConfigValueIndex(module,path,key) + + private def get(key: String): Any = map(key) + private def get(key: String, default: Any): Any = if (contains(key)) get(key) else default + + def apply(module: String, path: List[String], key: String, default: Any): ConfigValue = { + val requestedIndex = ConfigValueIndex(module, path, key) if (contains(requestedIndex)) return foundCache(requestedIndex) else { defaultCache += (requestedIndex -> ConfigValue.apply(requestedIndex, null, default, true)) return defaultCache(requestedIndex) } } - - def apply(module:String, path: List[String], key:String) : ConfigValue = { - val requestedIndex = ConfigValueIndex(module,path,key) + + def apply(module: String, path: List[String], key: String): ConfigValue = { + val requestedIndex = ConfigValueIndex(module, path, key) if (contains(requestedIndex)) return foundCache(requestedIndex) else { logger.error("Value in config could not be found but it seems required, index: " + requestedIndex) throw new IllegalStateException("Value in config could not be found but it seems required, index: " + requestedIndex) } } - - private def getMapFromPath(path: List[String]) : Map[String,Any] = { - var returnMap: Map[String,Any] = map + + private def getMapFromPath(path: List[String]): Map[String, Any] = { + var returnMap: Map[String, Any] = map for (m <- path) { if (!returnMap.contains(m)) return Map() else returnMap = Config.valueToMap(returnMap(m)) } return returnMap } - + def getReport: String = { - var output:StringBuilder = new StringBuilder + var output: StringBuilder = new StringBuilder output.append("Config report, sorted on module:\n") - var modules:Map[String,StringBuilder] = Map() - for ((key,value) <- foundCache) { + var modules: Map[String, StringBuilder] = Map() + for ((key, value) <- foundCache) { val module = key.module if (!modules.contains(module)) modules += (module -> new StringBuilder) modules(module).append("Found: " + value.toString + "\n") } - for ((key,value) <- defaultCache) { + for ((key, value) <- defaultCache) { val module = key.module if (!modules.contains(module)) modules += (module -> new StringBuilder) modules(module).append("Default used: " + value.toString + "\n") @@ -163,36 +163,36 @@ class Config(var map: Map[String,Any]) extends Logging { if (!modules.contains(module)) modules += (module -> new StringBuilder) if (!defaultCache.contains(value)) modules(module).append("Not Found: " + value.toString + "\n") } - for ((key,value) <- modules) { + for ((key, value) <- modules) { output.append("Config options for module: " + key + "\n") output.append(value.toString) output.append("\n") } return output.toString } - - override def toString() : String = map.toString + + override def toString(): String = map.toString } -object Config { - def valueToMap(input:Any) : Map[String,Any] = { +object Config { + def valueToMap(input: Any): Map[String, Any] = { input match { - case m:Map[_, _] => return m.asInstanceOf[Map[String,Any]] - case _ => throw new IllegalStateException("Value '" + input + "' is not an Map") + case m: Map[_, _] => return m.asInstanceOf[Map[String, Any]] + case _ => throw new IllegalStateException("Value '" + input + "' is not an Map") } } - - def mergeMaps(map1:Map[String,Any],map2:Map[String,Any]) : Map[String,Any] = { - var newMap: Map[String,Any] = Map() + + def mergeMaps(map1: Map[String, Any], map2: Map[String, Any]): Map[String, Any] = { + var newMap: Map[String, Any] = Map() for (key <- map1.keySet.++(map2.keySet)) { if (map1.contains(key) && !map2.contains(key)) newMap += (key -> map1(key)) else if (!map1.contains(key) && map2.contains(key)) newMap += (key -> map2(key)) else if (map1.contains(key) && map2.contains(key)) { - map1(key) match { - case m1:Map[_,_] => { + map1(key) match { + case m1: Map[_, _] => { map2(key) match { - case m2:Map[_,_] => newMap += (key -> mergeMaps(Config.valueToMap(m1),Config.valueToMap(m2))) - case _ => newMap += (key -> map1(key)) + case m2: Map[_, _] => newMap += (key -> mergeMaps(Config.valueToMap(m1), Config.valueToMap(m2))) + case _ => newMap += (key -> map1(key)) } } case _ => newMap += (key -> map1(key)) @@ -201,6 +201,6 @@ object Config { } return newMap } - - def mergeConfigs(config1:Config,config2:Config) : Config = new Config(mergeMaps(config1.getMap, config2.getMap)) + + def mergeConfigs(config1: Config, config2: Config): Config = new Config(mergeMaps(config1.getMap, config2.getMap)) } \ No newline at end of file diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/config/ConfigValue.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/config/ConfigValue.scala index 3963a683f69a4a5d44f1cb8576d49f3bc6379747..aa43f9bb4484e4bd9241bf2797d8d4d6149bfa2c 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/config/ConfigValue.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/config/ConfigValue.scala @@ -1,29 +1,29 @@ package nl.lumc.sasc.biopet.core.config -class ConfigValue(val requestIndex:ConfigValueIndex, val foundIndex:ConfigValueIndex, val value:Any, val default: Boolean) { +class ConfigValue(val requestIndex: ConfigValueIndex, val foundIndex: ConfigValueIndex, val value: Any, val default: Boolean) { def getString = Configurable.any2string(value) def getInt = Configurable.any2int(value) def getDouble = Configurable.any2double(value) def getList = Configurable.any2list(value) def getMap = Configurable.any2map(value) - + override def toString: String = { var output = "key = " + requestIndex.key output += ", value = " + value output += ", requestIndex = (" + requestIndex + ")" if (foundIndex == null && !default) output += ", found on root of config" - else if(!default) output += ", foundIndex = (" + foundIndex + ")" + else if (!default) output += ", foundIndex = (" + foundIndex + ")" else output += ", default value is used" - + return output } } object ConfigValue { - def apply(requestIndex:ConfigValueIndex, foundIndex:ConfigValueIndex, value:Any) = { - new ConfigValue(requestIndex,foundIndex,value, false) + def apply(requestIndex: ConfigValueIndex, foundIndex: ConfigValueIndex, value: Any) = { + new ConfigValue(requestIndex, foundIndex, value, false) } - def apply(requestIndex:ConfigValueIndex, foundIndex:ConfigValueIndex, value:Any, default:Boolean) = { - new ConfigValue(requestIndex,foundIndex,value, default) + def apply(requestIndex: ConfigValueIndex, foundIndex: ConfigValueIndex, value: Any, default: Boolean) = { + new ConfigValue(requestIndex, foundIndex, value, default) } } diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/config/ConfigValueIndex.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/config/ConfigValueIndex.scala index d67cacfa133f6dd18c3d151a9faa398225d9f662..fadf6bae6ac2fb077e88b95f4d23ea59cfe93231 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/config/ConfigValueIndex.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/config/ConfigValueIndex.scala @@ -1,13 +1,13 @@ package nl.lumc.sasc.biopet.core.config -class ConfigValueIndex(val module:String, val path:List[String], val key:String) { +class ConfigValueIndex(val module: String, val path: List[String], val key: String) { override def toString = "Module = " + module + ", path = " + path + ", key = " + key } object ConfigValueIndex { private var cache: Map[(String, List[String], String), ConfigValueIndex] = Map() - - def apply(module:String, path:List[String], key:String) : ConfigValueIndex = { + + def apply(module: String, path: List[String], key: String): ConfigValueIndex = { if (!cache.contains(module, path, key)) cache += ((module, path, key) -> new ConfigValueIndex(module, path, key)) return cache(module, path, key) } diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/config/Configurable.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/config/Configurable.scala index 90b6462a91e7afe136ce8ea96f2435f2690c497c..01d1e55e1378b815f8fbf7d82a10aa4a59b58f31 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/config/Configurable.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/config/Configurable.scala @@ -9,8 +9,8 @@ trait Configurable extends Logging { val configPath: List[String] = if (root != null) root.configFullPath else List() protected val configName = getClass.getSimpleName.toLowerCase protected val configFullPath = configName :: configPath - - def config(key:String, default:Any = null, submodule:String = null, required:Boolean = false): ConfigValue = { + + def config(key: String, default: Any = null, submodule: String = null, required: Boolean = false): ConfigValue = { val m = if (submodule != null) submodule else configName val p = if (submodule != null) configName :: configPath else configPath if (!configContains(key, submodule) && default == null) { @@ -24,113 +24,113 @@ trait Configurable extends Logging { } //def config(key:String, default:Any) = globalConfig(configName, configPath, key, default) //def config(key:String, default:Any, module:String) = globalConfig(module, configName :: configPath, key, default) - + //def configContains(key:String) = globalConfig.contains(configName, configPath, key) - def configContains(key:String, submodule:String = null) = { + def configContains(key: String, submodule: String = null) = { val m = if (submodule != null) submodule else configName val p = if (submodule != null) configName :: configPath else configPath - + globalConfig.contains(m, p, key) } - - implicit def configValue2file(value:ConfigValue): File = if (value != null) new File(Configurable.any2string(value.value)) else null - implicit def configValue2string(value:ConfigValue): String = if (value != null) Configurable.any2string(value.value) else null - implicit def configValue2long(value:ConfigValue): Long = if (value != null) Configurable.any2long(value.value) else 0 - implicit def configValue2optionLong(value:ConfigValue): Option[Long] = if (value != null) Option(Configurable.any2long(value.value)) else None - implicit def configValue2int(value:ConfigValue): Int = if (value != null) Configurable.any2int(value.value) else 0 - implicit def configValue2optionInt(value:ConfigValue): Option[Int] = if (value != null) Option(Configurable.any2int(value.value)) else None - implicit def configValue2double(value:ConfigValue): Double = if (value != null) Configurable.any2double(value.value) else 0 - implicit def configValue2optionDouble(value:ConfigValue): Option[Double] = if (value != null) Option(Configurable.any2double(value.value)) else None - implicit def configValue2boolean(value:ConfigValue): Boolean = if (value != null) Configurable.any2boolean(value.value) else false - implicit def configValue2optionBoolean(value:ConfigValue): Option[Boolean] = if (value != null) Option(Configurable.any2boolean(value.value)) else None - implicit def configValue2list(value:ConfigValue): List[Any] = if (value != null) Configurable.any2list(value.value) else null - implicit def configValue2stringList(value:ConfigValue): List[String] = if (value != null) Configurable.any2stringList(value.value) else null - implicit def configValue2stringSet(value:ConfigValue): Set[String] = if (value != null) Configurable.any2stringList(value.value).toSet else null - implicit def configValue2map(value:ConfigValue): Map[String, Any] = if (value != null) Configurable.any2map(value.value) else null + + implicit def configValue2file(value: ConfigValue): File = if (value != null) new File(Configurable.any2string(value.value)) else null + implicit def configValue2string(value: ConfigValue): String = if (value != null) Configurable.any2string(value.value) else null + implicit def configValue2long(value: ConfigValue): Long = if (value != null) Configurable.any2long(value.value) else 0 + implicit def configValue2optionLong(value: ConfigValue): Option[Long] = if (value != null) Option(Configurable.any2long(value.value)) else None + implicit def configValue2int(value: ConfigValue): Int = if (value != null) Configurable.any2int(value.value) else 0 + implicit def configValue2optionInt(value: ConfigValue): Option[Int] = if (value != null) Option(Configurable.any2int(value.value)) else None + implicit def configValue2double(value: ConfigValue): Double = if (value != null) Configurable.any2double(value.value) else 0 + implicit def configValue2optionDouble(value: ConfigValue): Option[Double] = if (value != null) Option(Configurable.any2double(value.value)) else None + implicit def configValue2boolean(value: ConfigValue): Boolean = if (value != null) Configurable.any2boolean(value.value) else false + implicit def configValue2optionBoolean(value: ConfigValue): Option[Boolean] = if (value != null) Option(Configurable.any2boolean(value.value)) else None + implicit def configValue2list(value: ConfigValue): List[Any] = if (value != null) Configurable.any2list(value.value) else null + implicit def configValue2stringList(value: ConfigValue): List[String] = if (value != null) Configurable.any2stringList(value.value) else null + implicit def configValue2stringSet(value: ConfigValue): Set[String] = if (value != null) Configurable.any2stringList(value.value).toSet else null + implicit def configValue2map(value: ConfigValue): Map[String, Any] = if (value != null) Configurable.any2map(value.value) else null } object Configurable extends Logging { - def any2string(any:Any) : String = { + def any2string(any: Any): String = { if (any == null) return null any match { - case s:String => return s - case _ => return any.toString + case s: String => return s + case _ => return any.toString } } - - def any2int(any:Any) : Int = { + + def any2int(any: Any): Int = { any match { - case i:Double => return i.toInt - case i:Int => return i - case i:String => { + case i: Double => return i.toInt + case i: Int => return i + case i: String => { logger.warn("Value '" + any + "' is a string insteadof int in json file, trying auto convert") return i.toInt } case _ => throw new IllegalStateException("Value '" + any + "' is not an int") } } - - def any2long(any:Any) : Long = { + + def any2long(any: Any): Long = { any match { - case l:Double => return l.toLong - case l:Int => return l.toLong - case l:Long => return l - case l:String => { + case l: Double => return l.toLong + case l: Int => return l.toLong + case l: Long => return l + case l: String => { logger.warn("Value '" + any + "' is a string insteadof int in json file, trying auto convert") return l.toLong } case _ => throw new IllegalStateException("Value '" + any + "' is not an int") } } - - def any2double(any:Any) : Double = { + + def any2double(any: Any): Double = { any match { - case d:Double => return d - case d:Int => return d.toDouble - case d:String => { + case d: Double => return d + case d: Int => return d.toDouble + case d: String => { logger.warn("Value '" + any + "' is a string insteadof int in json file, trying auto convert") return d.toInt } case _ => throw new IllegalStateException("Value '" + any + "' is not an int") } } - - def any2boolean(any:Any) : Boolean = { + + def any2boolean(any: Any): Boolean = { any match { - case b:Boolean => return b - case b:String => { + case b: Boolean => return b + case b: String => { logger.warn("Value '" + any + "' is a string insteadof boolean in json file, trying auto convert") return b.contains("true") } - case b:Int => { + case b: Int => { logger.warn("Value '" + any + "' is a int insteadof boolean in json file, trying auto convert") return (b > 0) } case _ => throw new IllegalStateException("Value '" + any + "' is not an boolean") } } - - def any2list(any:Any) : List[Any] = { + + def any2list(any: Any): List[Any] = { if (any == null) return null any match { - case l:List[_] => return l - case s:String => return List(s) - case _ => throw new IllegalStateException("Value '" + any + "' is not an List") + case l: List[_] => return l + case s: String => return List(s) + case _ => throw new IllegalStateException("Value '" + any + "' is not an List") } } - - def any2stringList(any:Any) : List[String] = { + + def any2stringList(any: Any): List[String] = { if (any == null) return null var l: List[String] = Nil - for (v <- any2list(any)) l :+= v.toString - return l + for (v <- any2list(any)) l :+= v.toString + return l } - - def any2map(any:Any) : Map[String,Any] = { + + def any2map(any: Any): Map[String, Any] = { if (any == null) return null any match { - case m:Map[_, _] => return m.asInstanceOf[Map[String,Any]] - case _ => throw new IllegalStateException("Value '" + any + "' is not an Map") + case m: Map[_, _] => return m.asInstanceOf[Map[String, Any]] + case _ => throw new IllegalStateException("Value '" + any + "' is not an Map") } } } \ No newline at end of file diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/Cat.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/Cat.scala index 8ce757f9aeca86db5d85395abc1497a27cde9162..ed3bf4ff3976a58b460d3e18fd1669bcbb17de24 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/Cat.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/Cat.scala @@ -5,20 +5,20 @@ import nl.lumc.sasc.biopet.core.config.Configurable import org.broadinstitute.gatk.utils.commandline._ import java.io.File -class Cat(val root:Configurable) extends BiopetCommandLineFunction { - @Input(doc="Input file", required=true) +class Cat(val root: Configurable) extends BiopetCommandLineFunction { + @Input(doc = "Input file", required = true) var input: List[File] = Nil - - @Output(doc="Unzipped file", required=true) + + @Output(doc = "Unzipped file", required = true) var output: File = _ - + executable = config("exe", "cat") - + def cmdLine = required(executable) + repeat(input) + " > " + required(output) } object Cat { - def apply(root:Configurable, input:List[File], output:File): Cat = { + def apply(root: Configurable, input: List[File], output: File): Cat = { val cat = new Cat(root) cat.input = input cat.output = output diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/Ln.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/Ln.scala index 65e3bfc2135571556ce48797f311081c7d0fb365..d7e9aad056225c2e536945d42a3e7e7c8ce73124 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/Ln.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/Ln.scala @@ -2,19 +2,19 @@ package nl.lumc.sasc.biopet.function import nl.lumc.sasc.biopet.core.config.Configurable import org.broadinstitute.gatk.queue.function.InProcessFunction -import org.broadinstitute.gatk.utils.commandline.{Input, Output} +import org.broadinstitute.gatk.utils.commandline.{ Input, Output } import java.io.File import scala.sys.process.Process -class Ln(val root:Configurable) extends InProcessFunction with Configurable { +class Ln(val root: Configurable) extends InProcessFunction with Configurable { this.analysisName = getClass.getSimpleName - - @Input(doc="Input file") + + @Input(doc = "Input file") var in: File = _ - - @Output(doc="Link destination") + + @Output(doc = "Link destination") var out: File = _ - + override def run { val cmd = "ln -s " + in + " " + out val process = Process(cmd).run diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/Pbzip2.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/Pbzip2.scala index 0ab09e875e2bb905ef2d442bb4ebd5779ec8ead7..f7c4f9a0c083f5e86f5197a91e29812171e6e9b8 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/Pbzip2.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/Pbzip2.scala @@ -2,39 +2,39 @@ package nl.lumc.sasc.biopet.function import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction import nl.lumc.sasc.biopet.core.config.Configurable -import org.broadinstitute.gatk.utils.commandline.{Input, Output} +import org.broadinstitute.gatk.utils.commandline.{ Input, Output } import java.io.File -class Pbzip2(val root:Configurable) extends BiopetCommandLineFunction { - @Input(doc="Zipped file") +class Pbzip2(val root: Configurable) extends BiopetCommandLineFunction { + @Input(doc = "Zipped file") var input: File = _ - - @Output(doc="Unzipped file") + + @Output(doc = "Unzipped file") var output: File = _ - - executable = config("exe", default="pbzip2") - + + executable = config("exe", default = "pbzip2") + var decomrpess = true var memory: Option[Int] = config("memory") - + override val defaultVmem = (memory.getOrElse(1000) * 2 / 1000) + "G" override val defaultThreads = 2 - + override def beforeCmd { if (!memory.isEmpty) memory = Option(memory.get * threads) } - + def cmdLine = required(executable) + - conditional(decomrpess, "-d") + - conditional(!decomrpess, "-z") + - optional("-p", threads, spaceSeparated=false) + - optional("-m", memory, spaceSeparated=false) + - required("-c", output) + - required(input) + conditional(decomrpess, "-d") + + conditional(!decomrpess, "-z") + + optional("-p", threads, spaceSeparated = false) + + optional("-m", memory, spaceSeparated = false) + + required("-c", output) + + required(input) } object Pbzip2 { - def apply(root:Configurable, input:File, output:File): Pbzip2 = { + def apply(root: Configurable, input: File, output: File): Pbzip2 = { val pbzip2 = new Pbzip2(root) pbzip2.input = input pbzip2.output = output diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/PythonCommandLineFunction.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/PythonCommandLineFunction.scala index 61d9ef545db396e5b7786603990f2878000bf39a..4e298ee6f5a2068413a20ff67655e2a21b5027e4 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/PythonCommandLineFunction.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/PythonCommandLineFunction.scala @@ -2,19 +2,19 @@ package nl.lumc.sasc.biopet.function import java.io.FileOutputStream import java.io.File -import org.broadinstitute.gatk.utils.commandline.{Input} +import org.broadinstitute.gatk.utils.commandline.{ Input } import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction import scala.collection.JavaConversions._ trait PythonCommandLineFunction extends BiopetCommandLineFunction { - @Input(doc="Python script", required=false) + @Input(doc = "Python script", required = false) var python_script: File = _ - - executable = config("exe", default="python", submodule="python") - - protected var python_script_name : String = _ - def setPythonScript(script:String) { setPythonScript(script,"") } - def setPythonScript(script:String, subpackage:String) { + + executable = config("exe", default = "python", submodule = "python") + + protected var python_script_name: String = _ + def setPythonScript(script: String) { setPythonScript(script, "") } + def setPythonScript(script: String, subpackage: String) { python_script_name = script python_script = new File(".queue/tmp/" + subpackage + python_script_name) if (!python_script.getParentFile.exists) python_script.getParentFile.mkdirs @@ -23,8 +23,8 @@ trait PythonCommandLineFunction extends BiopetCommandLineFunction { org.apache.commons.io.IOUtils.copy(is, os) os.close() } - - def getPythonCommand() : String = { + + def getPythonCommand(): String = { required(executable) + required(python_script) } } diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/Sha1sum.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/Sha1sum.scala index ccf69cf9830da0135f3675bfde9dc8eb5741a16d..a58be437c8e5e9e187196cae777fde0024cfbe1a 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/Sha1sum.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/Sha1sum.scala @@ -5,14 +5,14 @@ import nl.lumc.sasc.biopet.core.config.Configurable import org.broadinstitute.gatk.utils.commandline._ import java.io.File -class Sha1sum(val root:Configurable) extends BiopetCommandLineFunction { - @Input(doc="Zipped file") +class Sha1sum(val root: Configurable) extends BiopetCommandLineFunction { + @Input(doc = "Zipped file") var input: File = _ - - @Output(doc="Unzipped file") + + @Output(doc = "Unzipped file") var output: File = _ - - executable = config("exe", default="sha1sum") - + + executable = config("exe", default = "sha1sum") + def cmdLine = required(executable) + required(input) + " > " + required(output) } \ No newline at end of file diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/Zcat.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/Zcat.scala index 9a806e400c94a03e3b9e75e1b1d418e7dd89ef01..e342f75a4a6831efb8f439d0dff5d0cc0c7d75b9 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/Zcat.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/Zcat.scala @@ -2,23 +2,23 @@ package nl.lumc.sasc.biopet.function import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction import nl.lumc.sasc.biopet.core.config.Configurable -import org.broadinstitute.gatk.utils.commandline.{Input, Output} +import org.broadinstitute.gatk.utils.commandline.{ Input, Output } import java.io.File -class Zcat(val root:Configurable) extends BiopetCommandLineFunction { - @Input(doc="Zipped file") +class Zcat(val root: Configurable) extends BiopetCommandLineFunction { + @Input(doc = "Zipped file") var input: File = _ - - @Output(doc="Unzipped file") + + @Output(doc = "Unzipped file") var output: File = _ - - executable = config("exe", default="zcat") - + + executable = config("exe", default = "zcat") + def cmdLine = required(executable) + required(input) + " > " + required(output) } object Zcat { - def apply(root:Configurable, input:File, output:File): Zcat = { + def apply(root: Configurable, input: File, output: File): Zcat = { val zcat = new Zcat(root) zcat.input = input zcat.output = output diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/bedtools/Bedtools.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/bedtools/Bedtools.scala index 486ff176197321c8790e6f31fffcd6bba54ad3ce..abe391d8384492d57816b35d660cb9951b5a2343 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/bedtools/Bedtools.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/bedtools/Bedtools.scala @@ -3,7 +3,7 @@ package nl.lumc.sasc.biopet.function.bedtools import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction abstract class Bedtools extends BiopetCommandLineFunction { - executable = config("exe", default="bedtools", submodule="bedtools") + executable = config("exe", default = "bedtools", submodule = "bedtools") override def versionCommand = executable + " --version" override val versionRegex = """bedtools (.*)""".r } \ No newline at end of file diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/bedtools/BedtoolsCoverage.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/bedtools/BedtoolsCoverage.scala index a6f57ea0edbb0fff73aa2cbcf4e91d4a1f21f3df..fb26be17e342ba54fd5a5b13184f4cbffe760633 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/bedtools/BedtoolsCoverage.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/bedtools/BedtoolsCoverage.scala @@ -1,38 +1,38 @@ package nl.lumc.sasc.biopet.function.bedtools import nl.lumc.sasc.biopet.core.config.Configurable -import org.broadinstitute.gatk.utils.commandline.{Input, Output, Argument} +import org.broadinstitute.gatk.utils.commandline.{ Input, Output, Argument } import java.io.File -class BedtoolsCoverage(val root:Configurable) extends Bedtools { - @Input(doc="Input file (bed/gff/vcf/bam)") +class BedtoolsCoverage(val root: Configurable) extends Bedtools { + @Input(doc = "Input file (bed/gff/vcf/bam)") var input: File = _ - - @Input(doc="Intersect file (bed/gff/vcf)") + + @Input(doc = "Intersect file (bed/gff/vcf)") var intersectFile: File = _ - - @Output(doc="output File") + + @Output(doc = "output File") var output: File = _ - - @Argument(doc="dept", required=false) + + @Argument(doc = "dept", required = false) var depth: Boolean = false - + var inputTag = "-a" - + override def beforeCmd { if (input.getName.endsWith(".bam")) inputTag = "-abam" } - - def cmdLine = required(executable) + required("coverage") + - required(inputTag, input) + - required("-b", intersectFile) + - conditional(depth, "-d") + + + def cmdLine = required(executable) + required("coverage") + + required(inputTag, input) + + required("-b", intersectFile) + + conditional(depth, "-d") + " > " + required(output) } object BedtoolsCoverage { - def apply(root:Configurable, input:File, intersect:File, output:File, - depth:Boolean = true) : BedtoolsCoverage = { + def apply(root: Configurable, input: File, intersect: File, output: File, + depth: Boolean = true): BedtoolsCoverage = { val bedtoolsCoverage = new BedtoolsCoverage(root) bedtoolsCoverage.input = input bedtoolsCoverage.intersectFile = intersect diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/bedtools/BedtoolsIntersect.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/bedtools/BedtoolsIntersect.scala index aba220327ea0ae53246d865a5d6b964ccfd6f324..8e0f7c0cafe45e09f91d21ddecba1c5edbb75918 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/bedtools/BedtoolsIntersect.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/bedtools/BedtoolsIntersect.scala @@ -1,42 +1,42 @@ package nl.lumc.sasc.biopet.function.bedtools import nl.lumc.sasc.biopet.core.config.Configurable -import org.broadinstitute.gatk.utils.commandline.{Input, Output, Argument} +import org.broadinstitute.gatk.utils.commandline.{ Input, Output, Argument } import java.io.File -class BedtoolsIntersect(val root:Configurable) extends Bedtools { - @Input(doc="Input file (bed/gff/vcf/bam)") +class BedtoolsIntersect(val root: Configurable) extends Bedtools { + @Input(doc = "Input file (bed/gff/vcf/bam)") var input: File = _ - - @Input(doc="Intersect file (bed/gff/vcf)") + + @Input(doc = "Intersect file (bed/gff/vcf)") var intersectFile: File = _ - - @Output(doc="output File") + + @Output(doc = "output File") var output: File = _ - - @Argument(doc="Min overlap", required=false) + + @Argument(doc = "Min overlap", required = false) var minOverlap: Option[Double] = config("minoverlap") - - @Argument(doc="Only count", required=false) + + @Argument(doc = "Only count", required = false) var count: Boolean = false - + var inputTag = "-a" - + override def beforeCmd { if (input.getName.endsWith(".bam")) inputTag = "-abam" } - - def cmdLine = required(executable) + required("intersect") + - required(inputTag, input) + - required("-b", intersectFile) + - optional("-f", minOverlap) + - conditional(count, "-c") + + + def cmdLine = required(executable) + required("intersect") + + required(inputTag, input) + + required("-b", intersectFile) + + optional("-f", minOverlap) + + conditional(count, "-c") + " > " + required(output) } object BedtoolsIntersect { - def apply(root:Configurable, input:File, intersect:File, output:File, - minOverlap:Double = 0, count:Boolean = false) : BedtoolsIntersect = { + def apply(root: Configurable, input: File, intersect: File, output: File, + minOverlap: Double = 0, count: Boolean = false): BedtoolsIntersect = { val bedtoolsIntersect = new BedtoolsIntersect(root) bedtoolsIntersect.input = input bedtoolsIntersect.intersectFile = intersect diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/picard/CalculateHsMetrics.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/picard/CalculateHsMetrics.scala index feae3a989c762c7b76d37eab8689150ea7698b49..a1074791737db6e12805e40224a7c1f7f7ac2b13 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/picard/CalculateHsMetrics.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/picard/CalculateHsMetrics.scala @@ -2,48 +2,48 @@ package nl.lumc.sasc.biopet.function.picard import java.io.File import nl.lumc.sasc.biopet.core.config.Configurable -import org.broadinstitute.gatk.utils.commandline.{Input, Output, Argument} +import org.broadinstitute.gatk.utils.commandline.{ Input, Output, Argument } -class CalculateHsMetrics(val root:Configurable) extends Picard { +class CalculateHsMetrics(val root: Configurable) extends Picard { javaMainClass = "picard.analysis.directed.CalculateHsMetrics" - - @Input(doc="The input SAM or BAM files to analyze. Must be coordinate sorted.", required = true) + + @Input(doc = "The input SAM or BAM files to analyze. Must be coordinate sorted.", required = true) var input: File = _ - - @Input(doc="BAIT_INTERVALS", required = true) + + @Input(doc = "BAIT_INTERVALS", required = true) var baitIntervals: File = _ - - @Input(doc="TARGET_INTERVALS", required = true) + + @Input(doc = "TARGET_INTERVALS", required = true) var targetIntervals: File = _ - - @Output(doc="The output file to write statistics to", required = true) + + @Output(doc = "The output file to write statistics to", required = true) var output: File = _ - - @Output(doc="PER_TARGET_COVERAGE", required = false) + + @Output(doc = "PER_TARGET_COVERAGE", required = false) var perTargetCoverage: File = _ - - @Argument(doc="Reference file", required = false) + + @Argument(doc = "Reference file", required = false) var reference: File = config("reference") - - @Argument(doc="METRIC_ACCUMULATION_LEVEL", required=false) + + @Argument(doc = "METRIC_ACCUMULATION_LEVEL", required = false) var metricAccumulationLevel: List[String] = config("metricaccumulationlevel") - - @Argument(doc="BAIT_SET_NAME", required = false) + + @Argument(doc = "BAIT_SET_NAME", required = false) var baitSetName: String = _ - + override def commandLine = super.commandLine + - required("INPUT=", input, spaceSeparated=false) + - required("OUTPUT=", output, spaceSeparated=false) + - optional("REFERENCE_SEQUENCE=", reference, spaceSeparated=false) + - repeat("METRIC_ACCUMULATION_LEVEL=", metricAccumulationLevel, spaceSeparated=false) + - required("BAIT_INTERVALS=", baitIntervals, spaceSeparated=false) + - required("TARGET_INTERVALS=", targetIntervals, spaceSeparated=false) + - optional("PER_TARGET_COVERAGE=", perTargetCoverage, spaceSeparated=false) + - optional("BAIT_SET_NAME=", baitSetName, spaceSeparated=false) + required("INPUT=", input, spaceSeparated = false) + + required("OUTPUT=", output, spaceSeparated = false) + + optional("REFERENCE_SEQUENCE=", reference, spaceSeparated = false) + + repeat("METRIC_ACCUMULATION_LEVEL=", metricAccumulationLevel, spaceSeparated = false) + + required("BAIT_INTERVALS=", baitIntervals, spaceSeparated = false) + + required("TARGET_INTERVALS=", targetIntervals, spaceSeparated = false) + + optional("PER_TARGET_COVERAGE=", perTargetCoverage, spaceSeparated = false) + + optional("BAIT_SET_NAME=", baitSetName, spaceSeparated = false) } object CalculateHsMetrics { - def apply(root:Configurable, input:File, baitIntervals:File, targetIntervals:File, outputDir:String) : CalculateHsMetrics = { + def apply(root: Configurable, input: File, baitIntervals: File, targetIntervals: File, outputDir: String): CalculateHsMetrics = { val calculateHsMetrics = new CalculateHsMetrics(root) calculateHsMetrics.input = input calculateHsMetrics.baitIntervals = baitIntervals diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/picard/CollectAlignmentSummaryMetrics.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/picard/CollectAlignmentSummaryMetrics.scala index a8c5d335561456cc3d4892d77d80e71ded0d8880..013c7ef09bb1ca3b6bed7080fecefad6d8e1a955 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/picard/CollectAlignmentSummaryMetrics.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/picard/CollectAlignmentSummaryMetrics.scala @@ -2,52 +2,52 @@ package nl.lumc.sasc.biopet.function.picard import java.io.File import nl.lumc.sasc.biopet.core.config.Configurable -import org.broadinstitute.gatk.utils.commandline.{Input, Output, Argument} +import org.broadinstitute.gatk.utils.commandline.{ Input, Output, Argument } -class CollectAlignmentSummaryMetrics(val root:Configurable) extends Picard { +class CollectAlignmentSummaryMetrics(val root: Configurable) extends Picard { javaMainClass = "picard.analysis.CollectAlignmentSummaryMetrics" - - @Input(doc="The input SAM or BAM files to analyze. Must be coordinate sorted.", required = true) + + @Input(doc = "The input SAM or BAM files to analyze. Must be coordinate sorted.", required = true) var input: File = _ - - @Argument(doc="MAX_INSERT_SIZE", required = false) + + @Argument(doc = "MAX_INSERT_SIZE", required = false) var maxInstertSize: Option[Int] = config("maxInstertSize") - - @Argument(doc="ADAPTER_SEQUENCE", required = false) + + @Argument(doc = "ADAPTER_SEQUENCE", required = false) var adapterSequence: List[String] = config("adapterSequence") - - @Argument(doc="IS_BISULFITE_SEQUENCED", required = false) + + @Argument(doc = "IS_BISULFITE_SEQUENCED", required = false) var isBisulfiteSequenced: Option[Boolean] = config("isBisulfiteSequenced") - - @Output(doc="The output file to write statistics to", required = true) + + @Output(doc = "The output file to write statistics to", required = true) var output: File = _ - - @Argument(doc="Reference file", required = false) + + @Argument(doc = "Reference file", required = false) var reference: File = config("reference") - - @Argument(doc="ASSUME_SORTED", required = false) - var assumeSorted: Boolean = config("assumeSorted", default=true) - - @Argument(doc="METRIC_ACCUMULATION_LEVEL", required=false) + + @Argument(doc = "ASSUME_SORTED", required = false) + var assumeSorted: Boolean = config("assumeSorted", default = true) + + @Argument(doc = "METRIC_ACCUMULATION_LEVEL", required = false) var metricAccumulationLevel: List[String] = config("metricaccumulationlevel") - - @Argument(doc="STOP_AFTER", required = false) + + @Argument(doc = "STOP_AFTER", required = false) var stopAfter: Option[Long] = config("stopAfter") - + override def commandLine = super.commandLine + - required("INPUT=", input, spaceSeparated=false) + - required("OUTPUT=", output, spaceSeparated=false) + - optional("REFERENCE_SEQUENCE=", reference, spaceSeparated=false) + - repeat("METRIC_ACCUMULATION_LEVEL=", metricAccumulationLevel, spaceSeparated=false) + - optional("MAX_INSERT_SIZE=", maxInstertSize, spaceSeparated=false) + - optional("IS_BISULFITE_SEQUENCED=", isBisulfiteSequenced, spaceSeparated=false) + - optional("ASSUME_SORTED=", assumeSorted, spaceSeparated=false) + - optional("STOP_AFTER=", stopAfter, spaceSeparated=false) + - repeat("ADAPTER_SEQUENCE=", adapterSequence, spaceSeparated=false) + required("INPUT=", input, spaceSeparated = false) + + required("OUTPUT=", output, spaceSeparated = false) + + optional("REFERENCE_SEQUENCE=", reference, spaceSeparated = false) + + repeat("METRIC_ACCUMULATION_LEVEL=", metricAccumulationLevel, spaceSeparated = false) + + optional("MAX_INSERT_SIZE=", maxInstertSize, spaceSeparated = false) + + optional("IS_BISULFITE_SEQUENCED=", isBisulfiteSequenced, spaceSeparated = false) + + optional("ASSUME_SORTED=", assumeSorted, spaceSeparated = false) + + optional("STOP_AFTER=", stopAfter, spaceSeparated = false) + + repeat("ADAPTER_SEQUENCE=", adapterSequence, spaceSeparated = false) } object CollectAlignmentSummaryMetrics { - def apply(root:Configurable, input:File, outputDir:String) : CollectAlignmentSummaryMetrics = { + def apply(root: Configurable, input: File, outputDir: String): CollectAlignmentSummaryMetrics = { val collectAlignmentSummaryMetrics = new CollectAlignmentSummaryMetrics(root) collectAlignmentSummaryMetrics.input = input collectAlignmentSummaryMetrics.output = new File(outputDir, input.getName.stripSuffix(".bam") + ".alignmentMetrics") diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/picard/CollectGcBiasMetrics.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/picard/CollectGcBiasMetrics.scala index 3b81ff3061ab509adb21c0e46bced1845983b393..9ae67794f14262704ab3255ccd6f898c740dae90 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/picard/CollectGcBiasMetrics.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/picard/CollectGcBiasMetrics.scala @@ -2,57 +2,57 @@ package nl.lumc.sasc.biopet.function.picard import java.io.File import nl.lumc.sasc.biopet.core.config.Configurable -import org.broadinstitute.gatk.utils.commandline.{Input, Output, Argument} +import org.broadinstitute.gatk.utils.commandline.{ Input, Output, Argument } -class CollectGcBiasMetrics(val root:Configurable) extends Picard { +class CollectGcBiasMetrics(val root: Configurable) extends Picard { javaMainClass = "picard.analysis.CollectGcBiasMetrics" - @Input(doc="The input SAM or BAM files to analyze. Must be coordinate sorted.", required = true) + @Input(doc = "The input SAM or BAM files to analyze. Must be coordinate sorted.", required = true) var input: Seq[File] = Nil - @Output(doc="The output file to write statistics to", required = true) + @Output(doc = "The output file to write statistics to", required = true) var output: File = _ - - @Output(doc="Output chart", required = false) + + @Output(doc = "Output chart", required = false) var outputChart: File = _ - - @Output(doc="Output summary", required = false) + + @Output(doc = "Output summary", required = false) var outputSummary: File = _ - @Argument(doc="Reference file", required = false) - var reference: File = config("reference", required=true) - - @Argument(doc="Window size", required = false) + @Argument(doc = "Reference file", required = false) + var reference: File = config("reference", required = true) + + @Argument(doc = "Window size", required = false) var windowSize: Option[Int] = config("windowsize") - - @Argument(doc="MINIMUM_GENOME_FRACTION", required=false) + + @Argument(doc = "MINIMUM_GENOME_FRACTION", required = false) var minGenomeFraction: Option[Double] = config("mingenomefraction") - - @Argument(doc="ASSUME_SORTED", required=false) - var assumeSorted: Boolean = config("assumesorted", default=true) - - @Argument(doc="IS_BISULFITE_SEQUENCED", required=false) + + @Argument(doc = "ASSUME_SORTED", required = false) + var assumeSorted: Boolean = config("assumesorted", default = true) + + @Argument(doc = "IS_BISULFITE_SEQUENCED", required = false) var isBisulfiteSequinced: Option[Boolean] = config("isbisulfitesequinced") - + override def afterGraph { if (outputChart == null) outputChart = new File(output + ".pdf") //require(reference.exists) } - + override def commandLine = super.commandLine + - repeat("INPUT=", input, spaceSeparated=false) + - required("OUTPUT=", output, spaceSeparated=false) + - optional("CHART_OUTPUT=", outputChart, spaceSeparated=false) + - required("REFERENCE_SEQUENCE=", reference, spaceSeparated=false) + - optional("SUMMARY_OUTPUT=", outputSummary, spaceSeparated=false) + - optional("WINDOW_SIZE=", windowSize, spaceSeparated=false) + - optional("MINIMUM_GENOME_FRACTION=", minGenomeFraction, spaceSeparated=false) + + repeat("INPUT=", input, spaceSeparated = false) + + required("OUTPUT=", output, spaceSeparated = false) + + optional("CHART_OUTPUT=", outputChart, spaceSeparated = false) + + required("REFERENCE_SEQUENCE=", reference, spaceSeparated = false) + + optional("SUMMARY_OUTPUT=", outputSummary, spaceSeparated = false) + + optional("WINDOW_SIZE=", windowSize, spaceSeparated = false) + + optional("MINIMUM_GENOME_FRACTION=", minGenomeFraction, spaceSeparated = false) + conditional(assumeSorted, "ASSUME_SORTED=TRUE") + conditional(isBisulfiteSequinced.getOrElse(false), "IS_BISULFITE_SEQUENCED=TRUE") } object CollectGcBiasMetrics { - def apply(root:Configurable, input:File, outputDir:String) : CollectGcBiasMetrics = { + def apply(root: Configurable, input: File, outputDir: String): CollectGcBiasMetrics = { val collectGcBiasMetrics = new CollectGcBiasMetrics(root) collectGcBiasMetrics.input :+= input collectGcBiasMetrics.output = new File(outputDir, input.getName.stripSuffix(".bam") + ".gcbiasmetrics") diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/picard/CollectInsertSizeMetrics.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/picard/CollectInsertSizeMetrics.scala index 6c369d3e84386c18ecc02a264f43959015e08219..7420e7fc6453a74c3dfcdea3a83f41af83583436 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/picard/CollectInsertSizeMetrics.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/picard/CollectInsertSizeMetrics.scala @@ -2,60 +2,60 @@ package nl.lumc.sasc.biopet.function.picard import java.io.File import nl.lumc.sasc.biopet.core.config.Configurable -import org.broadinstitute.gatk.utils.commandline.{Input, Output, Argument} +import org.broadinstitute.gatk.utils.commandline.{ Input, Output, Argument } -class CollectInsertSizeMetrics(val root:Configurable) extends Picard { +class CollectInsertSizeMetrics(val root: Configurable) extends Picard { javaMainClass = "picard.analysis.CollectInsertSizeMetrics" - @Input(doc="The input SAM or BAM files to analyze. Must be coordinate sorted.", required = true) + @Input(doc = "The input SAM or BAM files to analyze. Must be coordinate sorted.", required = true) var input: File = _ - @Output(doc="The output file to write statistics to", required = true) + @Output(doc = "The output file to write statistics to", required = true) var output: File = _ - - @Output(doc="Output histogram", required = true) + + @Output(doc = "Output histogram", required = true) var outputHistogram: File = _ - - @Argument(doc="Reference file", required = false) + + @Argument(doc = "Reference file", required = false) var reference: File = config("reference") - - @Argument(doc="DEVIATIONS", required = false) + + @Argument(doc = "DEVIATIONS", required = false) var deviations: Option[Double] = config("deviations") - - @Argument(doc="MINIMUM_PCT", required=false) + + @Argument(doc = "MINIMUM_PCT", required = false) var minPct: Option[Double] = config("minpct") - - @Argument(doc="ASSUME_SORTED", required=false) - var assumeSorted: Boolean = config("assumesorted", default=true) - - @Argument(doc="STOP_AFTER", required=false) + + @Argument(doc = "ASSUME_SORTED", required = false) + var assumeSorted: Boolean = config("assumesorted", default = true) + + @Argument(doc = "STOP_AFTER", required = false) var stopAfter: Option[Long] = config("stopAfter") - - @Argument(doc="METRIC_ACCUMULATION_LEVEL", required=false) + + @Argument(doc = "METRIC_ACCUMULATION_LEVEL", required = false) var metricAccumulationLevel: List[String] = config("metricaccumulationlevel") - - @Argument(doc="HISTOGRAM_WIDTH", required=false) + + @Argument(doc = "HISTOGRAM_WIDTH", required = false) var histogramWidth: Option[Int] = config("histogramWidth") - + override def afterGraph { if (outputHistogram == null) outputHistogram = new File(output + ".pdf") //require(reference.exists) } - + override def commandLine = super.commandLine + - required("INPUT=", input, spaceSeparated=false) + - required("OUTPUT=", output, spaceSeparated=false) + - optional("HISTOGRAM_FILE=", outputHistogram, spaceSeparated=false) + - required("REFERENCE_SEQUENCE=", reference, spaceSeparated=false) + - optional("DEVIATIONS=", deviations, spaceSeparated=false) + - repeat("METRIC_ACCUMULATION_LEVEL=", metricAccumulationLevel, spaceSeparated=false) + - optional("STOP_AFTER=", stopAfter, spaceSeparated=false) + - optional("HISTOGRAM_WIDTH=", histogramWidth, spaceSeparated=false) + + required("INPUT=", input, spaceSeparated = false) + + required("OUTPUT=", output, spaceSeparated = false) + + optional("HISTOGRAM_FILE=", outputHistogram, spaceSeparated = false) + + required("REFERENCE_SEQUENCE=", reference, spaceSeparated = false) + + optional("DEVIATIONS=", deviations, spaceSeparated = false) + + repeat("METRIC_ACCUMULATION_LEVEL=", metricAccumulationLevel, spaceSeparated = false) + + optional("STOP_AFTER=", stopAfter, spaceSeparated = false) + + optional("HISTOGRAM_WIDTH=", histogramWidth, spaceSeparated = false) + conditional(assumeSorted, "ASSUME_SORTED=TRUE") } object CollectInsertSizeMetrics { - def apply(root:Configurable, input:File, outputDir:String) : CollectInsertSizeMetrics = { + def apply(root: Configurable, input: File, outputDir: String): CollectInsertSizeMetrics = { val collectInsertSizeMetrics = new CollectInsertSizeMetrics(root) collectInsertSizeMetrics.input = input collectInsertSizeMetrics.output = new File(outputDir, input.getName.stripSuffix(".bam") + ".insertsizemetrics") diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/picard/MarkDuplicates.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/picard/MarkDuplicates.scala index ef4341f4a0b91c05d9efbc7a9f92b1aca0a22fb3..5f0e6a75bf0612e437eac235555cb77a2c85e5b1 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/picard/MarkDuplicates.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/picard/MarkDuplicates.scala @@ -2,87 +2,87 @@ package nl.lumc.sasc.biopet.function.picard import java.io.File import nl.lumc.sasc.biopet.core.config.Configurable -import org.broadinstitute.gatk.utils.commandline.{Input, Output, Argument} +import org.broadinstitute.gatk.utils.commandline.{ Input, Output, Argument } -class MarkDuplicates(val root:Configurable) extends Picard { +class MarkDuplicates(val root: Configurable) extends Picard { javaMainClass = "picard.sam.MarkDuplicates" - - @Input(doc="The input SAM or BAM files to analyze. Must be coordinate sorted.", required = true) + + @Input(doc = "The input SAM or BAM files to analyze. Must be coordinate sorted.", required = true) var input: List[File] = Nil - @Output(doc="The output file to bam file to", required = true) + @Output(doc = "The output file to bam file to", required = true) var output: File = _ - - @Output(doc="The output file to write statistics to", required = true) + + @Output(doc = "The output file to write statistics to", required = true) var outputMetrics: File = _ - - @Argument(doc="PROGRAM_RECORD_ID", required=false) + + @Argument(doc = "PROGRAM_RECORD_ID", required = false) var programRecordId: String = config("programrecordid") - - @Argument(doc="PROGRAM_GROUP_VERSION", required=false) + + @Argument(doc = "PROGRAM_GROUP_VERSION", required = false) var programGroupVersion: String = config("programgroupversion") - - @Argument(doc="PROGRAM_GROUP_COMMAND_LINE", required=false) + + @Argument(doc = "PROGRAM_GROUP_COMMAND_LINE", required = false) var programGroupCommandLine: String = config("programgroupcommandline") - - @Argument(doc="PROGRAM_GROUP_NAME", required=false) + + @Argument(doc = "PROGRAM_GROUP_NAME", required = false) var programGroupName: String = config("programgroupname") - - @Argument(doc="COMMENT", required=false) + + @Argument(doc = "COMMENT", required = false) var comment: String = config("comment") - - @Argument(doc="REMOVE_DUPLICATES", required=false) - var removeDuplicates: Boolean = config("removeduplicates", default=false) - - @Argument(doc="ASSUME_SORTED", required=false) - var assumeSorted: Boolean = config("assumesorted", default=false) - - @Argument(doc="MAX_SEQUENCES_FOR_DISK_READ_ENDS_MAP", required=false) + + @Argument(doc = "REMOVE_DUPLICATES", required = false) + var removeDuplicates: Boolean = config("removeduplicates", default = false) + + @Argument(doc = "ASSUME_SORTED", required = false) + var assumeSorted: Boolean = config("assumesorted", default = false) + + @Argument(doc = "MAX_SEQUENCES_FOR_DISK_READ_ENDS_MAP", required = false) var maxSequencesForDiskReadEndsMap: Option[Int] = config("maxSequencesForDiskReadEndsMap") - - @Argument(doc="MAX_FILE_HANDLES_FOR_READ_ENDS_MAP", required=false) + + @Argument(doc = "MAX_FILE_HANDLES_FOR_READ_ENDS_MAP", required = false) var maxFileHandlesForReadEndsMap: Option[Int] = config("maxFileHandlesForReadEndsMap") - - @Argument(doc="SORTING_COLLECTION_SIZE_RATIO", required=false) + + @Argument(doc = "SORTING_COLLECTION_SIZE_RATIO", required = false) var sortingCollectionSizeRatio: Option[Double] = config("sortingCollectionSizeRatio") - - @Argument(doc="READ_NAME_REGEX", required=false) + + @Argument(doc = "READ_NAME_REGEX", required = false) var readNameRegex: String = config("readNameRegex") - - @Argument(doc="OPTICAL_DUPLICATE_PIXEL_DISTANCE", required=false) + + @Argument(doc = "OPTICAL_DUPLICATE_PIXEL_DISTANCE", required = false) var opticalDuplicatePixelDistance: Option[Int] = config("opticalDuplicatePixelDistance") - + override def commandLine = super.commandLine + - repeat("INPUT=", input, spaceSeparated=false) + - required("OUTPUT=", output, spaceSeparated=false) + - required("METRICS_FILE=", outputMetrics, spaceSeparated=false) + - optional("PROGRAM_RECORD_ID=", programRecordId, spaceSeparated=false) + - optional("PROGRAM_GROUP_VERSION=", programGroupVersion, spaceSeparated=false) + - optional("PROGRAM_GROUP_COMMAND_LINE=", programGroupCommandLine, spaceSeparated=false) + - optional("PROGRAM_GROUP_NAME=", programGroupName, spaceSeparated=false) + - optional("COMMENT=", comment, spaceSeparated=false) + - conditional(removeDuplicates, "REMOVE_DUPLICATES=TRUE") + - conditional(assumeSorted, "ASSUME_SORTED=TRUE") + - optional("MAX_SEQUENCES_FOR_DISK_READ_ENDS_MAP=", maxSequencesForDiskReadEndsMap, spaceSeparated=false) + - optional("MAX_FILE_HANDLES_FOR_READ_ENDS_MAP=", maxFileHandlesForReadEndsMap, spaceSeparated=false) + - optional("SORTING_COLLECTION_SIZE_RATIO=", sortingCollectionSizeRatio, spaceSeparated=false) + - optional("READ_NAME_REGEX=", readNameRegex, spaceSeparated=false) + - optional("OPTICAL_DUPLICATE_PIXEL_DISTANCE=", opticalDuplicatePixelDistance, spaceSeparated=false) + repeat("INPUT=", input, spaceSeparated = false) + + required("OUTPUT=", output, spaceSeparated = false) + + required("METRICS_FILE=", outputMetrics, spaceSeparated = false) + + optional("PROGRAM_RECORD_ID=", programRecordId, spaceSeparated = false) + + optional("PROGRAM_GROUP_VERSION=", programGroupVersion, spaceSeparated = false) + + optional("PROGRAM_GROUP_COMMAND_LINE=", programGroupCommandLine, spaceSeparated = false) + + optional("PROGRAM_GROUP_NAME=", programGroupName, spaceSeparated = false) + + optional("COMMENT=", comment, spaceSeparated = false) + + conditional(removeDuplicates, "REMOVE_DUPLICATES=TRUE") + + conditional(assumeSorted, "ASSUME_SORTED=TRUE") + + optional("MAX_SEQUENCES_FOR_DISK_READ_ENDS_MAP=", maxSequencesForDiskReadEndsMap, spaceSeparated = false) + + optional("MAX_FILE_HANDLES_FOR_READ_ENDS_MAP=", maxFileHandlesForReadEndsMap, spaceSeparated = false) + + optional("SORTING_COLLECTION_SIZE_RATIO=", sortingCollectionSizeRatio, spaceSeparated = false) + + optional("READ_NAME_REGEX=", readNameRegex, spaceSeparated = false) + + optional("OPTICAL_DUPLICATE_PIXEL_DISTANCE=", opticalDuplicatePixelDistance, spaceSeparated = false) } - object MarkDuplicates { - def apply(root:Configurable, input:List[File], outputDir:String) : MarkDuplicates = { +object MarkDuplicates { + def apply(root: Configurable, input: List[File], outputDir: String): MarkDuplicates = { val markDuplicates = new MarkDuplicates(root) markDuplicates.input = input markDuplicates.output = new File(outputDir, input.head.getName.stripSuffix(".bam") + ".dedup.bam") markDuplicates.outputMetrics = new File(outputDir, input.head.getName.stripSuffix(".bam") + ".dedup.metrics") return markDuplicates } - - def apply(root:Configurable, input:List[File], output:File) : MarkDuplicates = { + + def apply(root: Configurable, input: List[File], output: File): MarkDuplicates = { val markDuplicates = new MarkDuplicates(root) markDuplicates.input = input markDuplicates.output = output markDuplicates.outputMetrics = new File(output.getParent, output.getName.stripSuffix(".bam") + ".metrics") return markDuplicates } - } \ No newline at end of file +} \ No newline at end of file diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/picard/Picard.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/picard/Picard.scala index ab9b94fa5f1df3e1416b9f457be2160ddf9c5efa..8a04fc69f41a720405b472655461ec6946e7cb9d 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/picard/Picard.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/picard/Picard.scala @@ -1,44 +1,44 @@ package nl.lumc.sasc.biopet.function.picard import nl.lumc.sasc.biopet.core.BiopetJavaCommandLineFunction -import org.broadinstitute.gatk.utils.commandline.{Argument} +import org.broadinstitute.gatk.utils.commandline.{ Argument } trait Picard extends BiopetJavaCommandLineFunction { - @Argument(doc="VERBOSITY", required = false) - var verbosity: String = config("verbosity", submodule="picard") - - @Argument(doc="QUIET", required = false) - var quiet: Boolean = config("quiet", default=false, submodule="picard") - - @Argument(doc="VALIDATION_STRINGENCY", required = false) - var stringency: String = config("validationstringency", submodule="picard") - - @Argument(doc="COMPRESSION_LEVEL", required = false) - var compression: Int = config("compressionlevel", submodule="picard") - - @Argument(doc="MAX_RECORDS_IN_RAM", required = false) - var maxRecordsInRam: Int = config("maxrecordsinram", submodule="picard") - - @Argument(doc="CREATE_INDEX", required = false) - var createIndex: Boolean = config("createindex", default=true, submodule="picard") - - @Argument(doc="CREATE_MD5_FILE", required = false) - var createMd5: Boolean = config("createmd5", default=false, submodule="picard") - + @Argument(doc = "VERBOSITY", required = false) + var verbosity: String = config("verbosity", submodule = "picard") + + @Argument(doc = "QUIET", required = false) + var quiet: Boolean = config("quiet", default = false, submodule = "picard") + + @Argument(doc = "VALIDATION_STRINGENCY", required = false) + var stringency: String = config("validationstringency", submodule = "picard") + + @Argument(doc = "COMPRESSION_LEVEL", required = false) + var compression: Int = config("compressionlevel", submodule = "picard") + + @Argument(doc = "MAX_RECORDS_IN_RAM", required = false) + var maxRecordsInRam: Int = config("maxrecordsinram", submodule = "picard") + + @Argument(doc = "CREATE_INDEX", required = false) + var createIndex: Boolean = config("createindex", default = true, submodule = "picard") + + @Argument(doc = "CREATE_MD5_FILE", required = false) + var createMd5: Boolean = config("createmd5", default = false, submodule = "picard") + override def versionCommand = executable + " " + javaOpts + " " + javaExecutable + " -h" override val versionRegex = """Version: (.*)""".r - override val versionExitcode = List(0,1) - + override val versionExitcode = List(0, 1) + override val defaultVmem = "8G" memoryLimit = Option(5.0) - + override def commandLine = super.commandLine + required("TMP_DIR=" + jobTempDir) + - optional("VERBOSITY=", verbosity, spaceSeparated=false) + + optional("VERBOSITY=", verbosity, spaceSeparated = false) + conditional(quiet, "QUIET=TRUE") + - optional("VALIDATION_STRINGENCY=", stringency, spaceSeparated=false) + - optional("COMPRESSION_LEVEL=", compression, spaceSeparated=false) + - optional("MAX_RECORDS_IN_RAM=", maxRecordsInRam, spaceSeparated=false) + + optional("VALIDATION_STRINGENCY=", stringency, spaceSeparated = false) + + optional("COMPRESSION_LEVEL=", compression, spaceSeparated = false) + + optional("MAX_RECORDS_IN_RAM=", maxRecordsInRam, spaceSeparated = false) + conditional(createIndex, "CREATE_INDEX=TRUE") + conditional(createMd5, "CREATE_MD5_FILE=TRUE") } diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/samtools/Samtools.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/samtools/Samtools.scala index 1f281fe32097cc4e2190db5915eafda187c31403..15fe7fc3599dcb201b7119d78479c20f145ab92d 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/samtools/Samtools.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/samtools/Samtools.scala @@ -3,8 +3,8 @@ package nl.lumc.sasc.biopet.function.samtools import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction abstract class Samtools extends BiopetCommandLineFunction { - executable = config("exe", default="samtools", submodule="samtools") + executable = config("exe", default = "samtools", submodule = "samtools") override def versionCommand = executable override val versionRegex = """Version: (.*)""".r - override val versionExitcode = List(0,1) + override val versionExitcode = List(0, 1) } \ No newline at end of file diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/samtools/SamtoolsFlagstat.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/samtools/SamtoolsFlagstat.scala index 53f1eb6e16aafc9f9a48afb216f76faf87a7b872..f4b91043b79b9413946a05292a93b688fc3a5c4c 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/samtools/SamtoolsFlagstat.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/samtools/SamtoolsFlagstat.scala @@ -1,36 +1,36 @@ package nl.lumc.sasc.biopet.function.samtools import nl.lumc.sasc.biopet.core.config.Configurable -import org.broadinstitute.gatk.utils.commandline.{Input, Output} +import org.broadinstitute.gatk.utils.commandline.{ Input, Output } import java.io.File -class SamtoolsFlagstat(val root:Configurable) extends Samtools { - @Input(doc="Bam File") +class SamtoolsFlagstat(val root: Configurable) extends Samtools { + @Input(doc = "Bam File") var input: File = _ - - @Output(doc="output File") + + @Output(doc = "output File") var output: File = _ - + def cmdLine = required(executable) + required("flagstat") + required(input) + " > " + required(output) } object SamtoolsFlagstat { - def apply(root:Configurable, input:File, output:File) : SamtoolsFlagstat = { + def apply(root: Configurable, input: File, output: File): SamtoolsFlagstat = { val flagstat = new SamtoolsFlagstat(root) flagstat.input = input flagstat.output = output return flagstat } - - def apply(root:Configurable, input:File, outputDir:String) : SamtoolsFlagstat = { + + def apply(root: Configurable, input: File, outputDir: String): SamtoolsFlagstat = { val dir = if (outputDir.endsWith("/")) outputDir else outputDir + "/" val outputFile = new File(dir + swapExtension(input.getName)) return apply(root, input, outputFile) } - - def apply(root:Configurable, input:File) : SamtoolsFlagstat = { + + def apply(root: Configurable, input: File): SamtoolsFlagstat = { return apply(root, input, new File(swapExtension(input.getAbsolutePath))) } - - private def swapExtension(inputFile:String) = inputFile.substring(0, inputFile.lastIndexOf(".bam")) + ".flagstat" + + private def swapExtension(inputFile: String) = inputFile.substring(0, inputFile.lastIndexOf(".bam")) + ".flagstat" } \ No newline at end of file diff --git a/flexiprep/src/main/scala/nl/lumc/sasc/biopet/function/fastq/Cutadapt.scala b/flexiprep/src/main/scala/nl/lumc/sasc/biopet/function/fastq/Cutadapt.scala index 207ddde87ac3ed00e5934e575fe10a1680f1337d..267650200972873b2a2a6d9b3f2efde5a16f84b7 100644 --- a/flexiprep/src/main/scala/nl/lumc/sasc/biopet/function/fastq/Cutadapt.scala +++ b/flexiprep/src/main/scala/nl/lumc/sasc/biopet/function/fastq/Cutadapt.scala @@ -2,65 +2,65 @@ package nl.lumc.sasc.biopet.function.fastq import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction import nl.lumc.sasc.biopet.core.config.Configurable -import org.broadinstitute.gatk.utils.commandline.{Input, Output} +import org.broadinstitute.gatk.utils.commandline.{ Input, Output } import java.io.File import scala.io.Source._ import scala.sys.process._ -class Cutadapt(val root:Configurable) extends BiopetCommandLineFunction { - @Input(doc="Input fastq file") +class Cutadapt(val root: Configurable) extends BiopetCommandLineFunction { + @Input(doc = "Input fastq file") var fastq_input: File = _ - - @Input(doc="Fastq contams file", required=false) + + @Input(doc = "Fastq contams file", required = false) var contams_file: File = _ - - @Output(doc="Output fastq file") + + @Output(doc = "Output fastq file") var fastq_output: File = _ - - executable = config("exe", default="cutadapt") + + executable = config("exe", default = "cutadapt") override def versionCommand = executable + " --version" override val versionRegex = """(.*)""".r - - var default_clip_mode:String = config("default_clip_mode", default="3") + + var default_clip_mode: String = config("default_clip_mode", default = "3") var opt_adapter: Set[String] = Set() + config("adapter") var opt_anywhere: Set[String] = Set() + config("anywhere") var opt_front: Set[String] = Set() + config("front") - + var opt_discard: Boolean = config("discard") var opt_minimum_length: String = config("minimum_length", 1) var opt_maximum_length: String = config("maximum_length") - + override def beforeCmd() { getContamsFromFile } - + def cmdLine = { if (!opt_adapter.isEmpty || !opt_anywhere.isEmpty || !opt_front.isEmpty) { analysisName = getClass.getName required(executable) + - // options - repeat("-a", opt_adapter) + - repeat("-b", opt_anywhere) + - repeat("-g", opt_front) + - conditional(opt_discard, "--discard") + - optional("-m", opt_minimum_length) + - optional("-M", opt_maximum_length) + - // input / output - required(fastq_input) + - " > " + required(fastq_output) + // options + repeat("-a", opt_adapter) + + repeat("-b", opt_anywhere) + + repeat("-g", opt_front) + + conditional(opt_discard, "--discard") + + optional("-m", opt_minimum_length) + + optional("-M", opt_maximum_length) + + // input / output + required(fastq_input) + + " > " + required(fastq_output) } else { analysisName = getClass.getSimpleName + "-ln" - "ln -sf " + - required(fastq_input) + - required(fastq_output) + "ln -sf " + + required(fastq_input) + + required(fastq_output) } } - + def getContamsFromFile { if (contams_file != null) { if (contams_file.exists()) { for (line <- fromFile(contams_file).getLines) { - var s: String = line.substring(line.lastIndexOf("\t")+1, line.size) + var s: String = line.substring(line.lastIndexOf("\t") + 1, line.size) if (default_clip_mode == "3") opt_adapter += s else if (default_clip_mode == "5") opt_front += s else if (default_clip_mode == "both") opt_anywhere += s diff --git a/flexiprep/src/main/scala/nl/lumc/sasc/biopet/function/fastq/Fastqc.scala b/flexiprep/src/main/scala/nl/lumc/sasc/biopet/function/fastq/Fastqc.scala index 75dea6551ca19b31949af24d665f09527c04fc54..7b87bbdc2b848696a7bf790181cd576b5786fffa 100644 --- a/flexiprep/src/main/scala/nl/lumc/sasc/biopet/function/fastq/Fastqc.scala +++ b/flexiprep/src/main/scala/nl/lumc/sasc/biopet/function/fastq/Fastqc.scala @@ -2,50 +2,50 @@ package nl.lumc.sasc.biopet.function.fastq import nl.lumc.sasc.biopet.core._ import nl.lumc.sasc.biopet.core.config._ -import org.broadinstitute.gatk.utils.commandline.{Input, Output} +import org.broadinstitute.gatk.utils.commandline.{ Input, Output } import java.io.File import scala.sys.process._ -class Fastqc(val root:Configurable) extends BiopetCommandLineFunction { - - @Input(doc="Contaminants", required=false) +class Fastqc(val root: Configurable) extends BiopetCommandLineFunction { + + @Input(doc = "Contaminants", required = false) var contaminants: File = _ - - @Input(doc="Fastq file", shortName="FQ") + + @Input(doc = "Fastq file", shortName = "FQ") var fastqfile: File = _ - - @Output(doc="Output", shortName="out") + + @Output(doc = "Output", shortName = "out") var output: File = _ - - executable = config("exe", default="fastqc") - var java_exe: String = config("exe", default="java", submodule="java") + + executable = config("exe", default = "fastqc") + var java_exe: String = config("exe", default = "java", submodule = "java") var kmers: Option[Int] = config("kmers") var quiet: Boolean = config("quiet") var noextract: Boolean = config("noextract") var nogroup: Boolean = config("nogroup") - + override val versionRegex = """FastQC (.*)""".r override val defaultThreads = 4 - + override def afterGraph { this.checkExecutable val fastqcDir = executable.substring(0, executable.lastIndexOf("/")) if (contaminants == null) contaminants = new File(fastqcDir + "/Contaminants/contaminant_list.txt") } - + override def versionCommand = executable + " --version" - + def cmdLine = { - required(executable) + + required(executable) + optional("--java", java_exe) + - optional("--threads",threads) + - optional("--contaminants",contaminants) + - optional("--kmers",kmers) + + optional("--threads", threads) + + optional("--contaminants", contaminants) + + optional("--kmers", kmers) + conditional(nogroup, "--nogroup") + conditional(noextract, "--noextract") + conditional(quiet, "--quiet") + - required("-o",output.getParent()) + - required(fastqfile) + - required(" > ", output, escape=false) + required("-o", output.getParent()) + + required(fastqfile) + + required(" > ", output, escape = false) } } \ No newline at end of file diff --git a/flexiprep/src/main/scala/nl/lumc/sasc/biopet/function/fastq/Sickle.scala b/flexiprep/src/main/scala/nl/lumc/sasc/biopet/function/fastq/Sickle.scala index 66cbdd2b422ad166085011bf53630a94dfab14ed..40b5b9f310762bacc17dc531fe531a1967a5b372 100644 --- a/flexiprep/src/main/scala/nl/lumc/sasc/biopet/function/fastq/Sickle.scala +++ b/flexiprep/src/main/scala/nl/lumc/sasc/biopet/function/fastq/Sickle.scala @@ -2,71 +2,71 @@ package nl.lumc.sasc.biopet.function.fastq import nl.lumc.sasc.biopet.core._ import nl.lumc.sasc.biopet.core.config._ -import org.broadinstitute.gatk.utils.commandline.{Input, Output, Argument} +import org.broadinstitute.gatk.utils.commandline.{ Input, Output, Argument } import java.io.File import scala.io.Source._ import scala.sys.process._ -class Sickle(val root:Configurable) extends BiopetCommandLineFunction { - @Input(doc="R1 input") +class Sickle(val root: Configurable) extends BiopetCommandLineFunction { + @Input(doc = "R1 input") var input_R1: File = null - - @Input(doc="R2 input", required=false) + + @Input(doc = "R2 input", required = false) var input_R2: File = null - - @Input(doc="qualityType file", required=false) + + @Input(doc = "qualityType file", required = false) var qualityTypeFile: File = _ - - @Output(doc="R1 output") + + @Output(doc = "R1 output") var output_R1: File = null - - @Output(doc="R2 output", required=false) + + @Output(doc = "R2 output", required = false) var output_R2: File = null - - @Output(doc="singles output", required=false) + + @Output(doc = "singles output", required = false) var output_singles: File = null - - @Output(doc="stats output") + + @Output(doc = "stats output") var output_stats: File = null - - executable = config("exe", default="sickle") + + executable = config("exe", default = "sickle") var qualityType: String = config("qualitytype") - + var defaultQualityType: String = _ override val versionRegex = """sickle version (.*)""".r - + override def afterGraph { - if (defaultQualityType == null) defaultQualityType = config("defaultqualitytype", default="sanger") + if (defaultQualityType == null) defaultQualityType = config("defaultqualitytype", default = "sanger") if (qualityType == null && defaultQualityType != null) qualityType = defaultQualityType } - + override def versionCommand = executable + " --version" - + override def beforeCmd { qualityType = getQualityTypeFromFile } - + def cmdLine = { var cmd: String = required(executable) if (input_R2 != null) { cmd += required("pe") + - required("-r", input_R2) + - required("-p", output_R2) + - required("-s", output_singles) + required("-r", input_R2) + + required("-p", output_R2) + + required("-s", output_singles) } else cmd += required("se") - cmd + - required("-f", input_R1) + - required("-f", input_R1) + - required("-t", qualityType) + + cmd + + required("-f", input_R1) + + required("-f", input_R1) + + required("-t", qualityType) + required("-o", output_R1) + " > " + required(output_stats) } - + def getQualityTypeFromFile: String = { if (qualityType == null && qualityTypeFile != null) { if (qualityTypeFile.exists()) { for (line <- fromFile(qualityTypeFile).getLines) { - var s: String = line.substring(0,line.lastIndexOf("\t")) + var s: String = line.substring(0, line.lastIndexOf("\t")) return s } } else logger.warn("File : " + qualityTypeFile + " does not exist") diff --git a/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Flexiprep.scala b/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Flexiprep.scala index 519035453157383c2f6923075d72e7d5cea4e7d5..222853ba928bb105d70852013fb819b46c40af9e 100644 --- a/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Flexiprep.scala +++ b/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Flexiprep.scala @@ -9,49 +9,49 @@ import org.broadinstitute.gatk.queue.extensions.gatk._ import org.broadinstitute.gatk.queue.extensions.picard._ import org.broadinstitute.gatk.queue.function._ import scala.util.parsing.json._ -import org.broadinstitute.gatk.utils.commandline.{Input, Argument} +import org.broadinstitute.gatk.utils.commandline.{ Input, Argument } import nl.lumc.sasc.biopet.pipelines.flexiprep.scripts._ -class Flexiprep(val root:Configurable) extends QScript with BiopetQScript { +class Flexiprep(val root: Configurable) extends QScript with BiopetQScript { def this() = this(null) - - @Input(doc="R1 fastq file (gzipped allowed)", shortName="R1",required=true) + + @Input(doc = "R1 fastq file (gzipped allowed)", shortName = "R1", required = true) var input_R1: File = _ - - @Input(doc="R2 fastq file (gzipped allowed)", shortName="R2", required=false) + + @Input(doc = "R2 fastq file (gzipped allowed)", shortName = "R2", required = false) var input_R2: File = _ - - @Argument(doc="Skip Trim fastq files", shortName="skiptrim", required=false) + + @Argument(doc = "Skip Trim fastq files", shortName = "skiptrim", required = false) var skipTrim: Boolean = false - - @Argument(doc="Skip Clip fastq files", shortName="skipclip", required=false) + + @Argument(doc = "Skip Clip fastq files", shortName = "skipclip", required = false) var skipClip: Boolean = false - - @Argument(doc="Skip summary", shortName="skipsummary", required=false) + + @Argument(doc = "Skip summary", shortName = "skipsummary", required = false) var skipSummary: Boolean = false - + var paired: Boolean = (input_R2 != null) var R1_ext: String = _ var R2_ext: String = _ var R1_name: String = _ var R2_name: String = _ - + def init() { for (file <- configfiles) globalConfig.loadConfigFile(file) - if (!skipTrim) skipTrim = config("skiptrim", default=false) - if (!skipClip) skipClip = config("skipclip", default=false) + if (!skipTrim) skipTrim = config("skiptrim", default = false) + if (!skipClip) skipClip = config("skipclip", default = false) if (input_R1 == null) throw new IllegalStateException("Missing R1 on flexiprep module") if (outputDir == null) throw new IllegalStateException("Missing Output directory on flexiprep module") else if (!outputDir.endsWith("/")) outputDir += "/" paired = (input_R2 != null) - + if (input_R1.endsWith(".gz")) R1_name = input_R1.getName.substring(0, input_R1.getName.lastIndexOf(".gz")) else if (input_R1.endsWith(".gzip")) R1_name = input_R1.getName.substring(0, input_R1.getName.lastIndexOf(".gzip")) else R1_name = input_R1.getName R1_ext = R1_name.substring(R1_name.lastIndexOf("."), R1_name.size) R1_name = R1_name.substring(0, R1_name.lastIndexOf(R1_ext)) - - if (paired) { + + if (paired) { if (input_R2.endsWith(".gz")) R2_name = input_R2.getName.substring(0, input_R2.getName.lastIndexOf(".gz")) else if (input_R2.endsWith(".gzip")) R2_name = input_R2.getName.substring(0, input_R2.getName.lastIndexOf(".gzip")) else R2_name = input_R2.getName @@ -59,48 +59,48 @@ class Flexiprep(val root:Configurable) extends QScript with BiopetQScript { R2_name = R2_name.substring(0, R2_name.lastIndexOf(R2_ext)) } } - + def biopetScript() { runInitialJobs() - + if (paired) runTrimClip(outputFiles("fastq_input_R1"), outputFiles("fastq_input_R2"), outputDir) else runTrimClip(outputFiles("fastq_input_R1"), outputDir) - + runFinalize(List(outputFiles("output_R1")), if (outputFiles.contains("output_R2")) List(outputFiles("output_R2")) else List()) } - + def runInitialJobs() { - outputFiles += ("fastq_input_R1" -> extractIfNeeded(input_R1,outputDir)) - if (paired) outputFiles += ("fastq_input_R2" -> extractIfNeeded(input_R2,outputDir)) - + outputFiles += ("fastq_input_R1" -> extractIfNeeded(input_R1, outputDir)) + if (paired) outputFiles += ("fastq_input_R2" -> extractIfNeeded(input_R2, outputDir)) + addSeqstat(outputFiles("fastq_input_R1"), "seqstat_R1") if (paired) addSeqstat(outputFiles("fastq_input_R2"), "seqstat_R2") - + addSha1sum(outputFiles("fastq_input_R1"), "sha1_R1") if (paired) addSha1sum(outputFiles("fastq_input_R2"), "sha1_R2") - - var fastqc_R1 = runFastqc(input_R1,outputDir + "/" + R1_name + ".fastqc/") + + var fastqc_R1 = runFastqc(input_R1, outputDir + "/" + R1_name + ".fastqc/") outputFiles += ("fastqc_R1" -> fastqc_R1.output) outputFiles += ("qualtype_R1" -> getQualtype(fastqc_R1, R1_name)) outputFiles += ("contams_R1" -> getContams(fastqc_R1, R1_name)) - + if (paired) { - var fastqc_R2 = runFastqc(input_R2,outputDir + "/" + R2_name + ".fastqc/") + var fastqc_R2 = runFastqc(input_R2, outputDir + "/" + R2_name + ".fastqc/") outputFiles += ("fastqc_R2" -> fastqc_R2.output) outputFiles += ("qualtype_R2" -> getQualtype(fastqc_R2, R2_name)) outputFiles += ("contams_R2" -> getContams(fastqc_R2, R2_name)) } } - - def getQualtype(fastqc:Fastqc, pairname:String): File = { + + def getQualtype(fastqc: Fastqc, pairname: String): File = { val fastqcToQualtype = new FastqcToQualtype(this) fastqcToQualtype.fastqc_output = fastqc.output fastqcToQualtype.out = new File(outputDir + pairname + ".qualtype.txt") add(fastqcToQualtype) return fastqcToQualtype.out } - - def getContams(fastqc:Fastqc, pairname:String): File = { + + def getContams(fastqc: Fastqc, pairname: String): File = { val fastqcToContams = new FastqcToContams(this) fastqcToContams.fastqc_output = fastqc.output fastqcToContams.out = new File(outputDir + pairname + ".contams.txt") @@ -108,28 +108,28 @@ class Flexiprep(val root:Configurable) extends QScript with BiopetQScript { add(fastqcToContams) return fastqcToContams.out } - - def runTrimClip(R1_in:File, outDir:String, chunk:String) { + + def runTrimClip(R1_in: File, outDir: String, chunk: String) { runTrimClip(R1_in, new File(""), outDir, chunk) } - def runTrimClip(R1_in:File, outDir:String) { + def runTrimClip(R1_in: File, outDir: String) { runTrimClip(R1_in, new File(""), outDir, "") } - def runTrimClip(R1_in:File, R2_in:File, outDir:String) { + def runTrimClip(R1_in: File, R2_in: File, outDir: String) { runTrimClip(R1_in, R2_in, outDir, "") } - def runTrimClip(R1_in:File, R2_in:File, outDir:String, chunkarg:String) : (File,File) = { + def runTrimClip(R1_in: File, R2_in: File, outDir: String, chunkarg: String): (File, File) = { val chunk = if (chunkarg.isEmpty || chunkarg.endsWith("_")) chunkarg else chunkarg + "_" - var results: Map[String,File] = Map() + var results: Map[String, File] = Map() var R1: File = new File(R1_in) var R2: File = new File(R2_in) - + if (!skipClip) { // Adapter clipping val cutadapt_R1 = new Cutadapt(this) if (!skipTrim || paired) cutadapt_R1.isIntermediate = true cutadapt_R1.fastq_input = R1 - cutadapt_R1.fastq_output = swapExt(outDir, R1, R1_ext, ".clip"+R1_ext) + cutadapt_R1.fastq_output = swapExt(outDir, R1, R1_ext, ".clip" + R1_ext) if (outputFiles.contains("contams_R1")) cutadapt_R1.contams_file = outputFiles("contams_R1") add(cutadapt_R1) R1 = cutadapt_R1.fastq_output @@ -137,7 +137,7 @@ class Flexiprep(val root:Configurable) extends QScript with BiopetQScript { val cutadapt_R2 = new Cutadapt(this) if (!skipTrim || paired) cutadapt_R2.isIntermediate = true cutadapt_R2.fastq_input = R2 - cutadapt_R2.fastq_output = swapExt(outDir, R2, R2_ext, ".clip"+R2_ext) + cutadapt_R2.fastq_output = swapExt(outDir, R2, R2_ext, ".clip" + R2_ext) if (outputFiles.contains("contams_R2")) cutadapt_R2.contams_file = outputFiles("contams_R2") add(cutadapt_R2) R2 = cutadapt_R2.fastq_output @@ -147,8 +147,8 @@ class Flexiprep(val root:Configurable) extends QScript with BiopetQScript { fastqSync.input_start_fastq = cutadapt_R1.fastq_input fastqSync.input_R1 = cutadapt_R1.fastq_output fastqSync.input_R2 = cutadapt_R2.fastq_output - fastqSync.output_R1 = swapExt(outDir, R1, R1_ext, ".sync"+R1_ext) - fastqSync.output_R2 = swapExt(outDir, R2, R2_ext, ".sync"+R2_ext) + fastqSync.output_R1 = swapExt(outDir, R1, R1_ext, ".sync" + R1_ext) + fastqSync.output_R2 = swapExt(outDir, R2, R2_ext, ".sync" + R2_ext) fastqSync.output_stats = swapExt(outDir, R1, R1_ext, ".sync.stats") add(fastqSync) outputFiles += ("syncStats" -> fastqSync.output_stats) @@ -156,19 +156,19 @@ class Flexiprep(val root:Configurable) extends QScript with BiopetQScript { R2 = fastqSync.output_R2 } } - + if (!skipTrim) { // Quality trimming val sickle = new Sickle(this) sickle.input_R1 = R1 sickle.deps :+= outputFiles("fastq_input_R1") - sickle.output_R1 = swapExt(outDir, R1, R1_ext, ".trim"+R1_ext) + sickle.output_R1 = swapExt(outDir, R1, R1_ext, ".trim" + R1_ext) if (outputFiles.contains("qualtype_R1")) sickle.qualityTypeFile = outputFiles("qualtype_R1") if (!skipClip) sickle.deps :+= R1_in if (paired) { sickle.deps :+= outputFiles("fastq_input_R2") sickle.input_R2 = R2 - sickle.output_R2 = swapExt(outDir, R2, R2_ext, ".trim"+R2_ext) - sickle.output_singles = swapExt(outDir, R2, R2_ext, ".trim.singles"+R1_ext) + sickle.output_R2 = swapExt(outDir, R2, R2_ext, ".trim" + R2_ext) + sickle.output_singles = swapExt(outDir, R2, R2_ext, ".trim.singles" + R1_ext) if (!skipClip) sickle.deps :+= R2_in } sickle.output_stats = swapExt(outDir, R1, R1_ext, ".trim.stats") @@ -176,17 +176,17 @@ class Flexiprep(val root:Configurable) extends QScript with BiopetQScript { R1 = sickle.output_R1 if (paired) R2 = sickle.output_R2 } - + outputFiles += (chunk + "output_R1" -> R1) if (paired) outputFiles += (chunk + "output_R2" -> R2) return (R1, R2) } - - def runFinalize(fastq_R1:List[File], fastq_R2:List[File]) { + + def runFinalize(fastq_R1: List[File], fastq_R2: List[File]) { if (fastq_R1.length != fastq_R2.length && paired) throw new IllegalStateException("R1 and R2 file number is not the same") var R1: File = "" var R2: File = "" - if (fastq_R1.length == 1) { + if (fastq_R1.length == 1) { for (file <- fastq_R1) R1 = file for (file <- fastq_R2) R2 = file } else { @@ -199,8 +199,8 @@ class Flexiprep(val root:Configurable) extends QScript with BiopetQScript { add(Cat(this, fastq_R2, R2), true) } } - - if (fastq_R1.length == 1 && !config("skip_native_link", default=false)) { + + if (fastq_R1.length == 1 && !config("skip_native_link", default = false)) { val lnR1 = new Ln(this) lnR1.in = R1 R1 = new File(outputDir + R1_name + ".qc" + R1_ext) @@ -214,23 +214,23 @@ class Flexiprep(val root:Configurable) extends QScript with BiopetQScript { add(lnR2) } } - + outputFiles += ("output_R1" -> R1) if (paired) outputFiles += ("output_R2" -> R2) - + if (!skipTrim || !skipClip) { addSeqstat(R1, "seqstat_qc_R1") if (paired) addSeqstat(R2, "seqstat_qc_R2") - + addSha1sum(R1, "sha1_qc_R1") if (paired) addSha1sum(R2, "sha1_qc_R2") - - outputFiles += ("fastqc_R1_final" -> runFastqc(outputFiles("output_R1"),outputDir + "/" + R1_name + ".qc.fastqc/").output) - if (paired) outputFiles += ("fastqc_R2_final" -> runFastqc(outputFiles("output_R2"),outputDir + "/" + R2_name + ".qc.fastqc/").output) + + outputFiles += ("fastqc_R1_final" -> runFastqc(outputFiles("output_R1"), outputDir + "/" + R1_name + ".qc.fastqc/").output) + if (paired) outputFiles += ("fastqc_R2_final" -> runFastqc(outputFiles("output_R2"), outputDir + "/" + R2_name + ".qc.fastqc/").output) } - + if (!skipSummary) { - val summarize = new Summarize(this) + val summarize = new Summarize(this) summarize.runDir = outputDir summarize.samplea = R1_name if (paired) summarize.sampleb = R2_name @@ -238,43 +238,43 @@ class Flexiprep(val root:Configurable) extends QScript with BiopetQScript { summarize.clip = !skipClip summarize.trim = !skipTrim summarize.out = new File(outputDir + R1_name + ".summary.json") - for ((k,v) <- outputFiles) summarize.deps +:= v + for ((k, v) <- outputFiles) summarize.deps +:= v add(summarize) } } - - def runFastqc(fastqfile:File, outDir:String) : Fastqc = { + + def runFastqc(fastqfile: File, outDir: String): Fastqc = { val fastqcCommand = new Fastqc(this) fastqcCommand.fastqfile = fastqfile var filename: String = fastqfile.getName() - if (filename.endsWith(".gz")) filename = filename.substring(0,filename.size - 3) - if (filename.endsWith(".gzip")) filename = filename.substring(0,filename.size - 5) - if (filename.endsWith(".fastq")) filename = filename.substring(0,filename.size - 6) + if (filename.endsWith(".gz")) filename = filename.substring(0, filename.size - 3) + if (filename.endsWith(".gzip")) filename = filename.substring(0, filename.size - 5) + if (filename.endsWith(".fastq")) filename = filename.substring(0, filename.size - 6) //if (filename.endsWith(".fq")) filename = filename.substring(0,filename.size - 3) fastqcCommand.output = outDir + "/" + filename + "_fastqc.output" fastqcCommand.afterGraph add(fastqcCommand) return fastqcCommand } - - def extractIfNeeded(file:File, runDir:String) : File = { + + def extractIfNeeded(file: File, runDir: String): File = { if (file.getName().endsWith(".gz") || file.getName().endsWith(".gzip")) { - var newFile: File = swapExt(runDir, file,".gz","") - if (file.getName().endsWith(".gzip")) newFile = swapExt(runDir, file,".gzip","") + var newFile: File = swapExt(runDir, file, ".gz", "") + if (file.getName().endsWith(".gzip")) newFile = swapExt(runDir, file, ".gzip", "") val zcatCommand = Zcat(this, file, newFile) zcatCommand.isIntermediate = true add(zcatCommand) return newFile } else if (file.getName().endsWith(".bz2")) { - var newFile = swapExt(runDir, file,".bz2","") - val pbzip2 = Pbzip2(this,file, newFile) + var newFile = swapExt(runDir, file, ".bz2", "") + val pbzip2 = Pbzip2(this, file, newFile) pbzip2.isIntermediate = true add(pbzip2) return newFile } else return file } - - def addSeqstat(fastq:File, key:String) { + + def addSeqstat(fastq: File, key: String) { val ext = fastq.getName.substring(fastq.getName.lastIndexOf(".")) val seqstat = new Seqstat(this) seqstat.input_fastq = fastq @@ -282,8 +282,8 @@ class Flexiprep(val root:Configurable) extends QScript with BiopetQScript { add(seqstat) outputFiles += (key -> seqstat.out) } - - def addSha1sum(fastq:File, key:String) { + + def addSha1sum(fastq: File, key: String) { val ext = fastq.getName.substring(fastq.getName.lastIndexOf(".")) val sha1sum = new Sha1sum(this) sha1sum.input = fastq diff --git a/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/scripts/FastqSync.scala b/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/scripts/FastqSync.scala index e9d057fe86edb6b03b86609fa110f549be643228..56795a575533641089b4b0e8a8380a8f1ee2a025 100644 --- a/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/scripts/FastqSync.scala +++ b/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/scripts/FastqSync.scala @@ -2,31 +2,31 @@ package nl.lumc.sasc.biopet.pipelines.flexiprep.scripts import nl.lumc.sasc.biopet.core.config.Configurable import nl.lumc.sasc.biopet.function.PythonCommandLineFunction -import org.broadinstitute.gatk.utils.commandline.{Input, Output} +import org.broadinstitute.gatk.utils.commandline.{ Input, Output } import java.io.File -class FastqSync(val root:Configurable) extends PythonCommandLineFunction { +class FastqSync(val root: Configurable) extends PythonCommandLineFunction { setPythonScript("sync_paired_end_reads.py") - - @Input(doc="Start fastq") + + @Input(doc = "Start fastq") var input_start_fastq: File = _ - - @Input(doc="R1 input") + + @Input(doc = "R1 input") var input_R1: File = _ - - @Input(doc="R2 input") + + @Input(doc = "R2 input") var input_R2: File = _ - - @Output(doc="R1 output") + + @Output(doc = "R1 output") var output_R1: File = _ - - @Output(doc="R2 output") + + @Output(doc = "R2 output") var output_R2: File = _ - + //No output Annotation so file var output_stats: File = _ - - def cmdLine = getPythonCommand + + + def cmdLine = getPythonCommand + required(input_start_fastq) + required(input_R1) + required(input_R2) + diff --git a/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/scripts/FastqcToContams.scala b/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/scripts/FastqcToContams.scala index c989860f0362c81787b959ddc8953bce47dff979..843e890f236447e1c50e9f0862f0e2f3e1056fa1 100644 --- a/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/scripts/FastqcToContams.scala +++ b/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/scripts/FastqcToContams.scala @@ -2,27 +2,27 @@ package nl.lumc.sasc.biopet.pipelines.flexiprep.scripts import nl.lumc.sasc.biopet.core.config.Configurable import nl.lumc.sasc.biopet.function.PythonCommandLineFunction -import org.broadinstitute.gatk.utils.commandline.{Input, Output} +import org.broadinstitute.gatk.utils.commandline.{ Input, Output } import java.io.File -class FastqcToContams(val root:Configurable) extends PythonCommandLineFunction { +class FastqcToContams(val root: Configurable) extends PythonCommandLineFunction { setPythonScript("__init__.py", "pyfastqc/") setPythonScript("fastqc_contam.py") - - @Input(doc="Fastqc output", shortName="fastqc", required=true) + + @Input(doc = "Fastqc output", shortName = "fastqc", required = true) var fastqc_output: File = _ - - @Input(doc="Contams input", shortName="fastqc", required=false) + + @Input(doc = "Contams input", shortName = "fastqc", required = false) var contams_file: File = _ - - @Output(doc="Output file", shortName="out", required=true) + + @Output(doc = "Output file", shortName = "out", required = true) var out: File = _ - + def cmdLine = { - getPythonCommand + - required(fastqc_output.getParent()) + - required("-c",contams_file) + - " > " + - required(out) + getPythonCommand + + required(fastqc_output.getParent()) + + required("-c", contams_file) + + " > " + + required(out) } } \ No newline at end of file diff --git a/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/scripts/FastqcToQualtype.scala b/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/scripts/FastqcToQualtype.scala index 9df07031c21549d1306127fde3106def3760ae56..f76cec1a94f3a7e347af5e1b5cef54850db40322 100644 --- a/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/scripts/FastqcToQualtype.scala +++ b/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/scripts/FastqcToQualtype.scala @@ -2,23 +2,23 @@ package nl.lumc.sasc.biopet.pipelines.flexiprep.scripts import nl.lumc.sasc.biopet.core.config.Configurable import nl.lumc.sasc.biopet.function.PythonCommandLineFunction -import org.broadinstitute.gatk.utils.commandline.{Input, Output} +import org.broadinstitute.gatk.utils.commandline.{ Input, Output } import java.io.File -class FastqcToQualtype(val root:Configurable) extends PythonCommandLineFunction { +class FastqcToQualtype(val root: Configurable) extends PythonCommandLineFunction { setPythonScript("__init__.py", "pyfastqc/") setPythonScript("qual_type_sickle.py") - - @Input(doc="Fastqc output", shortName="fastqc", required=true) + + @Input(doc = "Fastqc output", shortName = "fastqc", required = true) var fastqc_output: File = _ - - @Output(doc="Output file", shortName="out", required=true) + + @Output(doc = "Output file", shortName = "out", required = true) var out: File = _ - + def cmdLine = { getPythonCommand + - required(fastqc_output.getParent()) + - " > " + - required(out) + required(fastqc_output.getParent()) + + " > " + + required(out) } } \ No newline at end of file diff --git a/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/scripts/Seqstat.scala b/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/scripts/Seqstat.scala index bcad5318cf57a65a1b4772fa2114dedccf217c61..c5e03dbb3619b40160b4c779f3dcdfcc58b96d7e 100644 --- a/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/scripts/Seqstat.scala +++ b/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/scripts/Seqstat.scala @@ -2,25 +2,25 @@ package nl.lumc.sasc.biopet.pipelines.flexiprep.scripts import nl.lumc.sasc.biopet.core.config.Configurable import nl.lumc.sasc.biopet.function.PythonCommandLineFunction -import org.broadinstitute.gatk.utils.commandline.{Input, Output} +import org.broadinstitute.gatk.utils.commandline.{ Input, Output } import java.io.File -class Seqstat(val root:Configurable) extends PythonCommandLineFunction { +class Seqstat(val root: Configurable) extends PythonCommandLineFunction { setPythonScript("__init__.py", "pyfastqc/") setPythonScript("seq_stat.py") - - @Input(doc="Fastq input", shortName="fastqc", required=true) + + @Input(doc = "Fastq input", shortName = "fastqc", required = true) var input_fastq: File = _ - - @Output(doc="Output file", shortName="out", required=true) + + @Output(doc = "Output file", shortName = "out", required = true) var out: File = _ - + var fmt: String = _ - + def cmdLine = { - getPythonCommand + - optional("--fmt", fmt) + - required("-o", out) + - required(input_fastq) + getPythonCommand + + optional("--fmt", fmt) + + required("-o", out) + + required(input_fastq) } } diff --git a/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/scripts/Summarize.scala b/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/scripts/Summarize.scala index 6b238ea894dbbf3ea4e759c9934192e832c84d6a..aa1db27aeca606af19fbce8d4f1ff4b11692a183 100644 --- a/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/scripts/Summarize.scala +++ b/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/scripts/Summarize.scala @@ -2,35 +2,35 @@ package nl.lumc.sasc.biopet.pipelines.flexiprep.scripts import nl.lumc.sasc.biopet.core.config.Configurable import nl.lumc.sasc.biopet.function.PythonCommandLineFunction -import org.broadinstitute.gatk.utils.commandline.{Input, Output} +import org.broadinstitute.gatk.utils.commandline.{ Input, Output } import java.io.File -class Summarize(val root:Configurable) extends PythonCommandLineFunction { +class Summarize(val root: Configurable) extends PythonCommandLineFunction { setPythonScript("__init__.py", "pyfastqc/") setPythonScript("summarize_flexiprep.py") - - @Output(doc="Output file", shortName="out", required=true) + + @Output(doc = "Output file", shortName = "out", required = true) var out: File = _ - + var samplea: String = _ var sampleb: String = _ var runDir: String = _ var samplename: String = _ var trim: Boolean = true var clip: Boolean = true - + def cmdLine = { var mode: String = "" if (clip) mode += "clip" if (trim) mode += "trim" if (mode.isEmpty) mode = "none" - - getPythonCommand + - optional("--run-dir", runDir) + - optional("--sampleb", sampleb) + - required(samplename) + - required(mode) + - required(samplea) + - required(out) + + getPythonCommand + + optional("--run-dir", runDir) + + optional("--sampleb", sampleb) + + required(samplename) + + required(mode) + + required(samplea) + + required(out) } } \ No newline at end of file diff --git a/gatk/gatk-benchmark-genotyping/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkBenchmarkGenotyping.scala b/gatk/gatk-benchmark-genotyping/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkBenchmarkGenotyping.scala index 2a67f18d95e69099ef55dd412a0cfed094347ed3..43e8de5c07430996e17721522682de7019796e2e 100644 --- a/gatk/gatk-benchmark-genotyping/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkBenchmarkGenotyping.scala +++ b/gatk/gatk-benchmark-genotyping/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkBenchmarkGenotyping.scala @@ -1,29 +1,29 @@ package nl.lumc.sasc.biopet.pipelines.gatk -import nl.lumc.sasc.biopet.core.{BiopetQScript, PipelineCommand} +import nl.lumc.sasc.biopet.core.{ BiopetQScript, PipelineCommand } import nl.lumc.sasc.biopet.core.config.Configurable import org.broadinstitute.gatk.queue.QScript -import org.broadinstitute.gatk.utils.commandline.{Input, Argument} +import org.broadinstitute.gatk.utils.commandline.{ Input, Argument } import scala.util.Random -class GatkBenchmarkGenotyping(val root:Configurable) extends QScript with BiopetQScript { +class GatkBenchmarkGenotyping(val root: Configurable) extends QScript with BiopetQScript { def this() = this(null) - - @Input(doc="Sample gvcf file") + + @Input(doc = "Sample gvcf file") var sampleGvcf: File = _ - - @Argument(doc="SampleName", required=true) + + @Argument(doc = "SampleName", required = true) var sampleName: String = _ - - @Input(doc="Gvcf files", shortName="I", required=false) + + @Input(doc = "Gvcf files", shortName = "I", required = false) var gvcfFiles: List[File] = Nil - - @Argument(doc="Reference", shortName="R", required=false) + + @Argument(doc = "Reference", shortName = "R", required = false) var reference: File = _ - - @Argument(doc="Dbsnp", shortName="dbsnp", required=false) + + @Argument(doc = "Dbsnp", shortName = "dbsnp", required = false) var dbsnp: File = _ - + def init() { if (configContains("gvcffiles")) for (file <- config("gvcffiles").getList) { gvcfFiles ::= file.toString @@ -33,12 +33,12 @@ class GatkBenchmarkGenotyping(val root:Configurable) extends QScript with Biopet if (outputDir == null) throw new IllegalStateException("Missing Output directory on gatk module") else if (!outputDir.endsWith("/")) outputDir += "/" } - + def biopetScript() { var todoGvcfs = gvcfFiles var gvcfPool: List[File] = Nil addGenotypingPipeline(gvcfPool) - + while (todoGvcfs.size > 0) { val index = Random.nextInt(todoGvcfs.size) gvcfPool ::= todoGvcfs(index) @@ -46,7 +46,7 @@ class GatkBenchmarkGenotyping(val root:Configurable) extends QScript with Biopet todoGvcfs = todoGvcfs.filter(b => b != todoGvcfs(index)) } } - + def addGenotypingPipeline(gvcfPool: List[File]) { val gatkGenotyping = new GatkGenotyping(this) gatkGenotyping.inputGvcfs = sampleGvcf :: gvcfPool diff --git a/gatk/gatk-genotyping/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkGenotyping.scala b/gatk/gatk-genotyping/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkGenotyping.scala index 5655e2c9f45c9bbc817250ca32a1f395cc4ad128..0c29fe9a04ea0efa0bc08c9c2fa0aa27a382fc49 100644 --- a/gatk/gatk-genotyping/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkGenotyping.scala +++ b/gatk/gatk-genotyping/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkGenotyping.scala @@ -1,32 +1,32 @@ package nl.lumc.sasc.biopet.pipelines.gatk -import nl.lumc.sasc.biopet.core.{BiopetQScript, PipelineCommand} +import nl.lumc.sasc.biopet.core.{ BiopetQScript, PipelineCommand } import nl.lumc.sasc.biopet.core.config.Configurable import org.broadinstitute.gatk.queue.QScript -import org.broadinstitute.gatk.queue.extensions.gatk.{CommandLineGATK, GenotypeGVCFs, SelectVariants} -import org.broadinstitute.gatk.utils.commandline.{Input, Output, Argument} +import org.broadinstitute.gatk.queue.extensions.gatk.{ CommandLineGATK, GenotypeGVCFs, SelectVariants } +import org.broadinstitute.gatk.utils.commandline.{ Input, Output, Argument } -class GatkGenotyping(val root:Configurable) extends QScript with BiopetQScript { +class GatkGenotyping(val root: Configurable) extends QScript with BiopetQScript { def this() = this(null) - - @Input(doc="Gvcf files", shortName="I") + + @Input(doc = "Gvcf files", shortName = "I") var inputGvcfs: List[File] = Nil - - @Argument(doc="Reference", shortName="R", required=false) + + @Argument(doc = "Reference", shortName = "R", required = false) var reference: File = _ - - @Argument(doc="Dbsnp", shortName="dbsnp", required=false) + + @Argument(doc = "Dbsnp", shortName = "dbsnp", required = false) var dbsnp: File = _ - - @Argument(doc="OutputName", required=false) + + @Argument(doc = "OutputName", required = false) var outputName: String = "genotype" - - @Output(doc="OutputFile", shortName="O", required=false) + + @Output(doc = "OutputFile", shortName = "O", required = false) var outputFile: File = _ - - @Argument(doc="Samples", shortName="sample", required=false) + + @Argument(doc = "Samples", shortName = "sample", required = false) var samples: List[String] = Nil - + def init() { if (reference == null) reference = config("reference") if (dbsnp == null) dbsnp = config("dbsnp") @@ -34,7 +34,7 @@ class GatkGenotyping(val root:Configurable) extends QScript with BiopetQScript { if (outputDir == null) throw new IllegalStateException("Missing Output directory on gatk module") else if (!outputDir.endsWith("/")) outputDir += "/" } - + def biopetScript() { addGenotypeGVCFs(inputGvcfs, outputFile) if (!samples.isEmpty) { @@ -42,39 +42,39 @@ class GatkGenotyping(val root:Configurable) extends QScript with BiopetQScript { for (sample <- samples) addSelectVariants(outputFile, List(sample), outputDir + "samples/", sample) } } - + trait gatkArguments extends CommandLineGATK { this.reference_sequence = reference this.memoryLimit = 2 this.jobResourceRequests :+= "h_vmem=4G" } - - def addGenotypeGVCFs(gvcfFiles: List[File], outputFile:File): File = { + + def addGenotypeGVCFs(gvcfFiles: List[File], outputFile: File): File = { val genotypeGVCFs = new GenotypeGVCFs() with gatkArguments { this.variant = gvcfFiles if (configContains("dbsnp")) this.dbsnp = config("dbsnp") - if (configContains("scattercount", submodule="genotypegvcfs")) - this.scatterCount = config("scattercount", submodule="genotypegvcfs") + if (configContains("scattercount", submodule = "genotypegvcfs")) + this.scatterCount = config("scattercount", submodule = "genotypegvcfs") this.out = outputFile if (config("inputtype", "dna").getString == "rna") { - this.stand_call_conf = config("stand_call_conf", default=20, submodule="haplotypecaller") - this.stand_emit_conf = config("stand_emit_conf", default=20, submodule="haplotypecaller") + this.stand_call_conf = config("stand_call_conf", default = 20, submodule = "haplotypecaller") + this.stand_emit_conf = config("stand_emit_conf", default = 20, submodule = "haplotypecaller") } else { - this.stand_call_conf = config("stand_call_conf", default=30, submodule="haplotypecaller") - this.stand_emit_conf = config("stand_emit_conf", default=30, submodule="haplotypecaller") + this.stand_call_conf = config("stand_call_conf", default = 30, submodule = "haplotypecaller") + this.stand_emit_conf = config("stand_emit_conf", default = 30, submodule = "haplotypecaller") } } add(genotypeGVCFs) return genotypeGVCFs.out } - - def addSelectVariants(inputFile:File, samples:List[String], outputDir:String, name:String) { + + def addSelectVariants(inputFile: File, samples: List[String], outputDir: String, name: String) { val selectVariants = new SelectVariants with gatkArguments { this.variant = inputFile for (sample <- samples) this.sample_name :+= sample this.excludeNonVariants = true - if (configContains("scattercount", submodule="selectvariants")) - this.scatterCount = config("scattercount", submodule="selectvariants") + if (configContains("scattercount", submodule = "selectvariants")) + this.scatterCount = config("scattercount", submodule = "selectvariants") this.out = outputDir + name + ".vcf" } add(selectVariants) diff --git a/gatk/gatk-old/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/Gatk.scala b/gatk/gatk-old/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/Gatk.scala index f95f742f4d204f2cd6d96b591b1aba864aa1716e..27dd498f66a1982529fced0363a6f7bb916a6de5 100644 --- a/gatk/gatk-old/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/Gatk.scala +++ b/gatk/gatk-old/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/Gatk.scala @@ -13,20 +13,20 @@ import org.broadinstitute.gatk.queue.QScript import org.broadinstitute.gatk.queue.extensions.gatk._ import org.broadinstitute.gatk.queue.extensions.picard._ import org.broadinstitute.gatk.queue.function._ -import org.broadinstitute.gatk.utils.commandline.{Argument} +import org.broadinstitute.gatk.utils.commandline.{ Argument } import scala.util.parsing.json._ -class Gatk(val root:Configurable) extends QScript with MultiSampleQScript { +class Gatk(val root: Configurable) extends QScript with MultiSampleQScript { def this() = this(null) - - @Argument(doc="Only Sample",shortName="sample", required=false) + + @Argument(doc = "Only Sample", shortName = "sample", required = false) val onlySample: String = "" - + var referenceFile: File = _ var dbsnp: File = _ var gvcfFiles: List[File] = Nil var finalBamFiles: List[File] = Nil - + def init() { for (file <- configfiles) globalConfig.loadConfigFile(file) referenceFile = config("referenceFile") @@ -35,11 +35,11 @@ class Gatk(val root:Configurable) extends QScript with MultiSampleQScript { if (outputDir == null) throw new IllegalStateException("Missing Output directory on gatk module") else if (!outputDir.endsWith("/")) outputDir += "/" } - + def biopetScript() { if (onlySample.isEmpty) { runSamplesJobs - + //SampleWide jobs if (gvcfFiles.size > 0) { var vcfFile = addGenotypeGVCFs(gvcfFiles, outputDir + "recalibration/") @@ -49,12 +49,12 @@ class Gatk(val root:Configurable) extends QScript with MultiSampleQScript { vcfFile = addIndelVariantRecalibrator(vcfFile, outputDir + "recalibration/") } } else logger.warn("No gVCFs to genotype") - } else runSingleSampleJobs(onlySample) + } else runSingleSampleJobs(onlySample) } - + // Called for each sample - def runSingleSampleJobs(sampleConfig:Map[String,Any]) : Map[String,List[File]] = { - var outputFiles:Map[String,List[File]] = Map() + def runSingleSampleJobs(sampleConfig: Map[String, Any]): Map[String, List[File]] = { + var outputFiles: Map[String, List[File]] = Map() var runBamfiles: List[File] = List() var sampleID: String = sampleConfig("ID").toString for ((run, runFiles) <- runRunsJobs(sampleConfig)) { @@ -62,13 +62,13 @@ class Gatk(val root:Configurable) extends QScript with MultiSampleQScript { } var bamFile = runBamfiles.head if (runBamfiles.size > 1) { - bamFile = new File(outputDir + sampleID + "/" + sampleID + ".bam" ) + bamFile = new File(outputDir + sampleID + "/" + sampleID + ".bam") add(MarkDuplicates(this, runBamfiles, bamFile)) } outputFiles += ("FinalBams" -> runBamfiles) - + addAll(BamMetrics(this, bamFile, outputDir + "metrics/").functions) // Metrics pipeline - + if (runBamfiles.size > 0) { finalBamFiles ++= runBamfiles val gvcfFile = new File(outputDir + sampleID + "/" + sampleID + ".gvcf.vcf") @@ -78,10 +78,10 @@ class Gatk(val root:Configurable) extends QScript with MultiSampleQScript { } else logger.warn("No bamfiles for variant calling for sample: " + sampleID) return outputFiles } - + // Called for each run from a sample - def runSingleRunJobs(runConfig:Map[String,Any], sampleConfig:Map[String,Any]) : Map[String,File] = { - var outputFiles:Map[String,File] = Map() + def runSingleRunJobs(runConfig: Map[String, Any], sampleConfig: Map[String, Any]): Map[String, File] = { + var outputFiles: Map[String, File] = Map() val runID: String = runConfig("ID").toString val sampleID: String = sampleConfig("ID").toString val runDir: String = outputDir + sampleID + "/run_" + runID + "/" @@ -91,42 +91,42 @@ class Gatk(val root:Configurable) extends QScript with MultiSampleQScript { if (runConfig.contains("R1")) { val mapping = Mapping.loadFromRunConfig(this, runConfig, sampleConfig, runDir) addAll(mapping.functions) // Add functions of mapping to curent function pool - - var bamFile:File = mapping.outputFiles("finalBamFile") - if (inputType == "rna") bamFile = addSplitNCigarReads(bamFile,runDir) - bamFile = addIndelRealign(bamFile,runDir) // Indel realigner - bamFile = addBaseRecalibrator(bamFile,runDir) // Base recalibrator - + + var bamFile: File = mapping.outputFiles("finalBamFile") + if (inputType == "rna") bamFile = addSplitNCigarReads(bamFile, runDir) + bamFile = addIndelRealign(bamFile, runDir) // Indel realigner + bamFile = addBaseRecalibrator(bamFile, runDir) // Base recalibrator + outputFiles += ("FinalBam" -> bamFile) - } else this.logger.error("Sample: " + sampleID + ": No R1 found for run: " + runConfig) + } else this.logger.error("Sample: " + sampleID + ": No R1 found for run: " + runConfig) return outputFiles } - - def addIndelRealign(inputBam:File, dir:String): File = { + + def addIndelRealign(inputBam: File, dir: String): File = { val realignerTargetCreator = new RealignerTargetCreator with gatkArguments { this.I :+= inputBam - this.o = swapExt(dir,inputBam,".bam",".realign.intervals") + this.o = swapExt(dir, inputBam, ".bam", ".realign.intervals") this.jobResourceRequests :+= "h_vmem=5G" if (configContains("scattercount", "realignertargetcreator")) this.scatterCount = config("scattercount", 1, "realignertargetcreator") } realignerTargetCreator.isIntermediate = true add(realignerTargetCreator) - + val indelRealigner = new IndelRealigner with gatkArguments { this.I :+= inputBam this.targetIntervals = realignerTargetCreator.o - this.o = swapExt(dir,inputBam,".bam",".realign.bam") + this.o = swapExt(dir, inputBam, ".bam", ".realign.bam") if (configContains("scattercount", "indelrealigner")) this.scatterCount = config("scattercount", 1, "indelrealigner") } add(indelRealigner) - + return indelRealigner.o } - - def addBaseRecalibrator(inputBam:File, dir:String): File = { + + def addBaseRecalibrator(inputBam: File, dir: String): File = { val baseRecalibrator = new BaseRecalibrator with gatkArguments { this.I :+= inputBam - this.o = swapExt(dir,inputBam,".bam",".baserecal") + this.o = swapExt(dir, inputBam, ".bam", ".baserecal") if (dbsnp != null) this.knownSites :+= dbsnp if (configContains("scattercount", "baserecalibrator")) this.scatterCount = config("scattercount", 1, "baserecalibrator") this.nct = 2 @@ -135,30 +135,30 @@ class Gatk(val root:Configurable) extends QScript with MultiSampleQScript { val printReads = new PrintReads with gatkArguments { this.I :+= inputBam - this.o = swapExt(dir,inputBam,".bam",".baserecal.bam") + this.o = swapExt(dir, inputBam, ".bam", ".baserecal.bam") this.BQSR = baseRecalibrator.o if (configContains("scattercount", "printreads")) this.scatterCount = config("scattercount", 1, "printreads") } add(printReads) - + return printReads.o } - - def addSplitNCigarReads(inputBam:File, dir:String) : File = { + + def addSplitNCigarReads(inputBam: File, dir: String): File = { val splitNCigarReads = new SplitNCigarReads with gatkArguments { if (configContains("scattercount", "splitncigarreads")) this.scatterCount = config("scattercount", 1, "splitncigarreads") this.input_file = Seq(inputBam) - this.out = swapExt(dir,inputBam,".bam",".split.bam") + this.out = swapExt(dir, inputBam, ".bam", ".split.bam") this.read_filter :+= "ReassignMappingQuality" - + this.U = org.broadinstitute.gatk.engine.arguments.ValidationExclusion.TYPE.ALLOW_N_CIGAR_READS } add(splitNCigarReads) - + return splitNCigarReads.out } - - def addHaplotypeCaller(bamfiles:List[File], outputfile:File): File = { + + def addHaplotypeCaller(bamfiles: List[File], outputfile: File): File = { val haplotypeCaller = new HaplotypeCaller with gatkArguments { if (configContains("scattercount", "haplotypecaller")) this.scatterCount = config("scattercount", 1, "haplotypecaller") this.input_file = bamfiles @@ -166,13 +166,13 @@ class Gatk(val root:Configurable) extends QScript with MultiSampleQScript { if (configContains("dbsnp")) this.dbsnp = config("dbsnp") this.nct = 3 this.memoryLimit = this.nct * 2 - + // GVCF options - this.emitRefConfidence = org.broadinstitute.gatk.tools.walkers.haplotypecaller.ReferenceConfidenceMode.GVCF + this.emitRefConfidence = org.broadinstitute.gatk.tools.walkers.haplotypecaller.ReferenceConfidenceMode.GVCF this.variant_index_type = org.broadinstitute.gatk.utils.variant.GATKVCFIndexType.LINEAR this.variant_index_parameter = 128000 - - val inputType:String = config("inputtype", "dna") + + val inputType: String = config("inputtype", "dna") if (inputType == "rna") { this.dontUseSoftClippedBases = config("dontusesoftclippedbases", true, "haplotypecaller") this.recoverDanglingHeads = config("recoverdanglingheads", true, "haplotypecaller") @@ -186,45 +186,45 @@ class Gatk(val root:Configurable) extends QScript with MultiSampleQScript { } } add(haplotypeCaller) - + return haplotypeCaller.out } - - def addSnpVariantRecalibrator(inputVcf:File, dir:String): File = { + + def addSnpVariantRecalibrator(inputVcf: File, dir: String): File = { val snpVariantRecalibrator = getVariantRecalibrator("snp") snpVariantRecalibrator.input +:= inputVcf - snpVariantRecalibrator.recal_file = swapExt(dir, inputVcf,".vcf",".snp.recal") - snpVariantRecalibrator.tranches_file = swapExt(dir, inputVcf,".vcf",".snp.tranches") + snpVariantRecalibrator.recal_file = swapExt(dir, inputVcf, ".vcf", ".snp.recal") + snpVariantRecalibrator.tranches_file = swapExt(dir, inputVcf, ".vcf", ".snp.tranches") add(snpVariantRecalibrator) val snpApplyRecalibration = getApplyRecalibration("snp") snpApplyRecalibration.input +:= inputVcf snpApplyRecalibration.recal_file = snpVariantRecalibrator.recal_file snpApplyRecalibration.tranches_file = snpVariantRecalibrator.tranches_file - snpApplyRecalibration.out = swapExt(dir, inputVcf,".vcf",".snp.recal.vcf") + snpApplyRecalibration.out = swapExt(dir, inputVcf, ".vcf", ".snp.recal.vcf") add(snpApplyRecalibration) - + return snpApplyRecalibration.out } - - def addIndelVariantRecalibrator(inputVcf:File, dir:String): File = { + + def addIndelVariantRecalibrator(inputVcf: File, dir: String): File = { val indelVariantRecalibrator = getVariantRecalibrator("indel") indelVariantRecalibrator.input +:= inputVcf - indelVariantRecalibrator.recal_file = swapExt(dir, inputVcf,".vcf",".indel.recal") - indelVariantRecalibrator.tranches_file = swapExt(dir, inputVcf,".vcf",".indel.tranches") + indelVariantRecalibrator.recal_file = swapExt(dir, inputVcf, ".vcf", ".indel.recal") + indelVariantRecalibrator.tranches_file = swapExt(dir, inputVcf, ".vcf", ".indel.tranches") add(indelVariantRecalibrator) val indelApplyRecalibration = getApplyRecalibration("indel") indelApplyRecalibration.input +:= inputVcf indelApplyRecalibration.recal_file = indelVariantRecalibrator.recal_file indelApplyRecalibration.tranches_file = indelVariantRecalibrator.tranches_file - indelApplyRecalibration.out = swapExt(dir, inputVcf,".vcf",".indel.recal.vcf") + indelApplyRecalibration.out = swapExt(dir, inputVcf, ".vcf", ".indel.recal.vcf") add(indelApplyRecalibration) - + return indelApplyRecalibration.out } - - def getVariantRecalibrator(mode_arg:String) : VariantRecalibrator = { + + def getVariantRecalibrator(mode_arg: String): VariantRecalibrator = { val variantRecalibrator = new VariantRecalibrator() with gatkArguments { if (mode_arg == "indel") { this.mode = org.broadinstitute.gatk.tools.walkers.variantrecalibration.VariantRecalibratorArgumentCollection.Mode.INDEL @@ -238,14 +238,14 @@ class Gatk(val root:Configurable) extends QScript with MultiSampleQScript { if (configContains("dbsnp", "variantrecalibrator")) this.resource :+= new TaggedFile(config("dbsnp", "", "variantrecalibrator").getString, "known=true,training=false,truth=false,prior=2.0") this.nt = 4 this.memoryLimit = nt * 2 - this.an = Seq("QD","DP","FS","ReadPosRankSum","MQRankSum") + this.an = Seq("QD", "DP", "FS", "ReadPosRankSum", "MQRankSum") if (configContains("minnumbadvariants", "variantrecalibrator")) this.minNumBadVariants = config("minnumbadvariants", "", "variantrecalibrator") if (configContains("maxgaussians", "variantrecalibrator")) this.maxGaussians = config("maxgaussians", "", "variantrecalibrator") } return variantRecalibrator } - - def getApplyRecalibration(mode_arg:String) : ApplyRecalibration = { + + def getApplyRecalibration(mode_arg: String): ApplyRecalibration = { val applyRecalibration = new ApplyRecalibration() with gatkArguments { if (mode_arg == "indel") { this.mode = org.broadinstitute.gatk.tools.walkers.variantrecalibration.VariantRecalibratorArgumentCollection.Mode.INDEL @@ -260,31 +260,31 @@ class Gatk(val root:Configurable) extends QScript with MultiSampleQScript { } return applyRecalibration } - - def addGenotypeGVCFs(gvcfFiles: List[File], dir:String): File = { + + def addGenotypeGVCFs(gvcfFiles: List[File], dir: String): File = { val genotypeGVCFs = new GenotypeGVCFs() with gatkArguments { this.variant = gvcfFiles this.annotation ++= Seq("FisherStrand", "QualByDepth", "ChromosomeCounts") if (configContains("scattercount", "genotypegvcfs")) this.scatterCount = config("scattercount", 1, "genotypegvcfs") - this.out = new File(outputDir,"genotype.vcf") + this.out = new File(outputDir, "genotype.vcf") } add(genotypeGVCFs) return genotypeGVCFs.out } - - def addVariantAnnotator(inputvcf:File, bamfiles:List[File], dir:String): File = { + + def addVariantAnnotator(inputvcf: File, bamfiles: List[File], dir: String): File = { val variantAnnotator = new VariantAnnotator with gatkArguments { this.variant = inputvcf this.input_file = bamfiles this.dbsnp = config("dbsnp", "variantannotator") - this.out = swapExt(dir, inputvcf,".vcf",".anotated.vcf") + this.out = swapExt(dir, inputvcf, ".vcf", ".anotated.vcf") if (configContains("scattercount", "variantannotator")) this.scatterCount = config("scattercount", 1, "variantannotator") } add(variantAnnotator) - + return variantAnnotator.out } - + trait gatkArguments extends CommandLineGATK { this.reference_sequence = referenceFile this.memoryLimit = 2 diff --git a/gatk/gatk-pipeline/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkPipeline.scala b/gatk/gatk-pipeline/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkPipeline.scala index 90e54a7c09c9bb77a0b40227c27ea80afe0cc8d4..f3cc125f3294ec88cfb29766561668877eb37fca 100644 --- a/gatk/gatk-pipeline/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkPipeline.scala +++ b/gatk/gatk-pipeline/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkPipeline.scala @@ -10,47 +10,47 @@ import nl.lumc.sasc.biopet.pipelines.mapping.Mapping import org.broadinstitute.gatk.queue.QScript import org.broadinstitute.gatk.queue.extensions.gatk._ import org.broadinstitute.gatk.queue.extensions.picard._ -import org.broadinstitute.gatk.utils.commandline.{Argument} +import org.broadinstitute.gatk.utils.commandline.{ Argument } -class GatkPipeline(val root:Configurable) extends QScript with MultiSampleQScript { +class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScript { def this() = this(null) - - @Argument(doc="Only Sample",shortName="sample", required=false) + + @Argument(doc = "Only Sample", shortName = "sample", required = false) val onlySample: String = "" - - @Argument(doc="Skip Genotyping step",shortName="skipgenotyping", required=false) + + @Argument(doc = "Skip Genotyping step", shortName = "skipgenotyping", required = false) var skipGenotyping: Boolean = false - - @Argument(doc="Merge gvcfs",shortName="mergegvcfs", required=false) + + @Argument(doc = "Merge gvcfs", shortName = "mergegvcfs", required = false) var mergeGvcfs: Boolean = false - + var referenceFile: File = _ var dbsnp: File = _ var gvcfFiles: List[File] = Nil var finalBamFiles: List[File] = Nil - + def init() { for (file <- configfiles) globalConfig.loadConfigFile(file) - referenceFile = config("reference", required=true) + referenceFile = config("reference", required = true) dbsnp = config("dbsnp") - if (configContains("gvcfFiles")) - for (file <- config("gvcfFiles").getList) + if (configContains("gvcfFiles")) + for (file <- config("gvcfFiles").getList) gvcfFiles :+= file.toString if (outputDir == null) throw new IllegalStateException("Missing Output directory on gatk module") else if (!outputDir.endsWith("/")) outputDir += "/" } - + def biopetScript() { if (onlySample.isEmpty) { runSamplesJobs - + //SampleWide jobs if (mergeGvcfs && gvcfFiles.size > 0) { val newFile = outputDir + "merged.gvcf.vcf" addCombineGVCFs(gvcfFiles, newFile) gvcfFiles = List(newFile) } - + if (!skipGenotyping && gvcfFiles.size > 0) { val gatkGenotyping = new GatkGenotyping(this) gatkGenotyping.inputGvcfs = gvcfFiles @@ -59,28 +59,28 @@ class GatkPipeline(val root:Configurable) extends QScript with MultiSampleQScrip gatkGenotyping.biopetScript addAll(gatkGenotyping.functions) var vcfFile = gatkGenotyping.outputFile - - if (config("inputtype", default="dna").getString != "rna") { + + if (config("inputtype", default = "dna").getString != "rna") { vcfFile = addVariantAnnotator(vcfFile, finalBamFiles, outputDir + "recalibration/") vcfFile = addSnpVariantRecalibrator(vcfFile, outputDir + "recalibration/") vcfFile = addIndelVariantRecalibrator(vcfFile, outputDir + "recalibration/") } } else logger.warn("No gVCFs to genotype") - } else runSingleSampleJobs(onlySample) + } else runSingleSampleJobs(onlySample) } - + // Called for each sample - def runSingleSampleJobs(sampleConfig:Map[String,Any]) : Map[String,List[File]] = { - var outputFiles:Map[String,List[File]] = Map() + def runSingleSampleJobs(sampleConfig: Map[String, Any]): Map[String, List[File]] = { + var outputFiles: Map[String, List[File]] = Map() var runBamfiles: List[File] = List() var sampleID: String = sampleConfig("ID").toString for ((run, runFiles) <- runRunsJobs(sampleConfig)) { runBamfiles +:= runFiles("FinalBam") } outputFiles += ("FinalBams" -> runBamfiles) - -// addAll(BamMetrics(this, bamFile, outputDir + "metrics/").functions) // Metrics pipeline - + + // addAll(BamMetrics(this, bamFile, outputDir + "metrics/").functions) // Metrics pipeline + if (runBamfiles.size > 0) { finalBamFiles ++= runBamfiles val gatkVariantcalling = new GatkVariantcalling(this) @@ -93,30 +93,30 @@ class GatkPipeline(val root:Configurable) extends QScript with MultiSampleQScrip } else logger.warn("No bamfiles for variant calling for sample: " + sampleID) return outputFiles } - + // Called for each run from a sample - def runSingleRunJobs(runConfig:Map[String,Any], sampleConfig:Map[String,Any]) : Map[String,File] = { - var outputFiles:Map[String,File] = Map() + def runSingleRunJobs(runConfig: Map[String, Any], sampleConfig: Map[String, Any]): Map[String, File] = { + var outputFiles: Map[String, File] = Map() val runID: String = runConfig("ID").toString val sampleID: String = sampleConfig("ID").toString val runDir: String = outputDir + sampleID + "/run_" + runID + "/" var inputType = "" if (runConfig.contains("inputtype")) inputType = runConfig("inputtype").toString - else inputType = config("inputtype", default="dna").toString + else inputType = config("inputtype", default = "dna").toString if (runConfig.contains("R1")) { val mapping = Mapping.loadFromRunConfig(this, runConfig, sampleConfig, runDir) addAll(mapping.functions) // Add functions of mapping to curent function pool - + outputFiles += ("FinalBam" -> mapping.outputFiles("finalBamFile")) - } else this.logger.error("Sample: " + sampleID + ": No R1 found for run: " + runConfig) + } else this.logger.error("Sample: " + sampleID + ": No R1 found for run: " + runConfig) return outputFiles } - - def addSnpVariantRecalibrator(inputVcf:File, dir:String): File = { + + def addSnpVariantRecalibrator(inputVcf: File, dir: String): File = { val snpVariantRecalibrator = getVariantRecalibrator("snp") snpVariantRecalibrator.input +:= inputVcf - snpVariantRecalibrator.recal_file = swapExt(dir, inputVcf,".vcf",".snp.recal") - snpVariantRecalibrator.tranches_file = swapExt(dir, inputVcf,".vcf",".snp.tranches") + snpVariantRecalibrator.recal_file = swapExt(dir, inputVcf, ".vcf", ".snp.recal") + snpVariantRecalibrator.tranches_file = swapExt(dir, inputVcf, ".vcf", ".snp.tranches") if (!snpVariantRecalibrator.resource.isEmpty) { add(snpVariantRecalibrator) @@ -124,7 +124,7 @@ class GatkPipeline(val root:Configurable) extends QScript with MultiSampleQScrip snpApplyRecalibration.input +:= inputVcf snpApplyRecalibration.recal_file = snpVariantRecalibrator.recal_file snpApplyRecalibration.tranches_file = snpVariantRecalibrator.tranches_file - snpApplyRecalibration.out = swapExt(dir, inputVcf,".vcf",".snp.recal.vcf") + snpApplyRecalibration.out = swapExt(dir, inputVcf, ".vcf", ".snp.recal.vcf") add(snpApplyRecalibration) return snpApplyRecalibration.out @@ -133,12 +133,12 @@ class GatkPipeline(val root:Configurable) extends QScript with MultiSampleQScrip return inputVcf } } - - def addIndelVariantRecalibrator(inputVcf:File, dir:String): File = { + + def addIndelVariantRecalibrator(inputVcf: File, dir: String): File = { val indelVariantRecalibrator = getVariantRecalibrator("indel") indelVariantRecalibrator.input +:= inputVcf - indelVariantRecalibrator.recal_file = swapExt(dir, inputVcf,".vcf",".indel.recal") - indelVariantRecalibrator.tranches_file = swapExt(dir, inputVcf,".vcf",".indel.tranches") + indelVariantRecalibrator.recal_file = swapExt(dir, inputVcf, ".vcf", ".indel.recal") + indelVariantRecalibrator.tranches_file = swapExt(dir, inputVcf, ".vcf", ".indel.tranches") if (!indelVariantRecalibrator.resource.isEmpty) { add(indelVariantRecalibrator) @@ -146,7 +146,7 @@ class GatkPipeline(val root:Configurable) extends QScript with MultiSampleQScrip indelApplyRecalibration.input +:= inputVcf indelApplyRecalibration.recal_file = indelVariantRecalibrator.recal_file indelApplyRecalibration.tranches_file = indelVariantRecalibrator.tranches_file - indelApplyRecalibration.out = swapExt(dir, inputVcf,".vcf",".indel.recal.vcf") + indelApplyRecalibration.out = swapExt(dir, inputVcf, ".vcf", ".indel.recal.vcf") add(indelApplyRecalibration) return indelApplyRecalibration.out @@ -155,78 +155,78 @@ class GatkPipeline(val root:Configurable) extends QScript with MultiSampleQScrip return inputVcf } } - - def getVariantRecalibrator(mode_arg:String) : VariantRecalibrator = { + + def getVariantRecalibrator(mode_arg: String): VariantRecalibrator = { val variantRecalibrator = new VariantRecalibrator() with gatkArguments { if (mode_arg == "indel") { this.mode = org.broadinstitute.gatk.tools.walkers.variantrecalibration.VariantRecalibratorArgumentCollection.Mode.INDEL - if (configContains("mills", submodule="variantrecalibrator")) - this.resource :+= new TaggedFile(config("mills", submodule="variantrecalibrator").getString, "known=false,training=true,truth=true,prior=12.0") + if (configContains("mills", submodule = "variantrecalibrator")) + this.resource :+= new TaggedFile(config("mills", submodule = "variantrecalibrator").getString, "known=false,training=true,truth=true,prior=12.0") } else { // SNP this.mode = org.broadinstitute.gatk.tools.walkers.variantrecalibration.VariantRecalibratorArgumentCollection.Mode.SNP - if (configContains("hapmap", submodule="variantrecalibrator")) - this.resource +:= new TaggedFile(config("hapmap", submodule="variantrecalibrator").getString, "known=false,training=true,truth=true,prior=15.0") - if (configContains("omni", submodule="variantrecalibrator")) - this.resource +:= new TaggedFile(config("omni", submodule="variantrecalibrator").getString, "known=false,training=true,truth=true,prior=12.0") - if (configContains("1000G", submodule="variantrecalibrator")) - this.resource +:= new TaggedFile(config("1000G", submodule="variantrecalibrator").getString, "known=false,training=true,truth=false,prior=10.0") + if (configContains("hapmap", submodule = "variantrecalibrator")) + this.resource +:= new TaggedFile(config("hapmap", submodule = "variantrecalibrator").getString, "known=false,training=true,truth=true,prior=15.0") + if (configContains("omni", submodule = "variantrecalibrator")) + this.resource +:= new TaggedFile(config("omni", submodule = "variantrecalibrator").getString, "known=false,training=true,truth=true,prior=12.0") + if (configContains("1000G", submodule = "variantrecalibrator")) + this.resource +:= new TaggedFile(config("1000G", submodule = "variantrecalibrator").getString, "known=false,training=true,truth=false,prior=10.0") } - if (configContains("dbsnp", submodule="variantrecalibrator")) - this.resource :+= new TaggedFile(config("dbsnp", submodule="variantrecalibrator").getString, "known=true,training=false,truth=false,prior=2.0") + if (configContains("dbsnp", submodule = "variantrecalibrator")) + this.resource :+= new TaggedFile(config("dbsnp", submodule = "variantrecalibrator").getString, "known=true,training=false,truth=false,prior=2.0") this.nt = 4 this.memoryLimit = nt * 2 - this.an = Seq("QD","DP","FS","ReadPosRankSum","MQRankSum") - if (configContains("minnumbadvariants", submodule="variantrecalibrator")) - this.minNumBadVariants = config("minnumbadvariants", submodule="variantrecalibrator") - if (configContains("maxgaussians", submodule="variantrecalibrator")) - this.maxGaussians = config("maxgaussians", submodule="variantrecalibrator") + this.an = Seq("QD", "DP", "FS", "ReadPosRankSum", "MQRankSum") + if (configContains("minnumbadvariants", submodule = "variantrecalibrator")) + this.minNumBadVariants = config("minnumbadvariants", submodule = "variantrecalibrator") + if (configContains("maxgaussians", submodule = "variantrecalibrator")) + this.maxGaussians = config("maxgaussians", submodule = "variantrecalibrator") } return variantRecalibrator } - - def getApplyRecalibration(mode_arg:String) : ApplyRecalibration = { + + def getApplyRecalibration(mode_arg: String): ApplyRecalibration = { val applyRecalibration = new ApplyRecalibration() with gatkArguments { if (mode_arg == "indel") { this.mode = org.broadinstitute.gatk.tools.walkers.variantrecalibration.VariantRecalibratorArgumentCollection.Mode.INDEL - this.ts_filter_level = config("ts_filter_level", default=99.0, submodule="applyrecalibration") + this.ts_filter_level = config("ts_filter_level", default = 99.0, submodule = "applyrecalibration") } else { // SNP this.mode = org.broadinstitute.gatk.tools.walkers.variantrecalibration.VariantRecalibratorArgumentCollection.Mode.SNP - this.ts_filter_level = config("ts_filter_level", default=99.5, submodule="applyrecalibration") + this.ts_filter_level = config("ts_filter_level", default = 99.5, submodule = "applyrecalibration") } this.nt = 3 this.memoryLimit = nt * 2 - if (configContains("scattercount", submodule="applyrecalibration")) - this.scatterCount = config("scattercount", submodule="applyrecalibration") + if (configContains("scattercount", submodule = "applyrecalibration")) + this.scatterCount = config("scattercount", submodule = "applyrecalibration") } return applyRecalibration } - - def addVariantAnnotator(inputvcf:File, bamfiles:List[File], dir:String): File = { + + def addVariantAnnotator(inputvcf: File, bamfiles: List[File], dir: String): File = { val variantAnnotator = new VariantAnnotator with gatkArguments { this.variant = inputvcf this.input_file = bamfiles - this.dbsnp = config("dbsnp", submodule="variantannotator") - this.out = swapExt(dir, inputvcf,".vcf",".anotated.vcf") - if (configContains("scattercount", submodule="variantannotator")) - this.scatterCount = config("scattercount", submodule="variantannotator") + this.dbsnp = config("dbsnp", submodule = "variantannotator") + this.out = swapExt(dir, inputvcf, ".vcf", ".anotated.vcf") + if (configContains("scattercount", submodule = "variantannotator")) + this.scatterCount = config("scattercount", submodule = "variantannotator") } add(variantAnnotator) - + return variantAnnotator.out } - - def addCombineGVCFs(input:List[File], output:File): File = { + + def addCombineGVCFs(input: List[File], output: File): File = { val combineGVCFs = new CombineGVCFs with gatkArguments { this.variant = input this.o = output - if (configContains("scattercount", submodule="variantannotator")) - this.scatterCount = config("scattercount", submodule="combinegvcfs") + if (configContains("scattercount", submodule = "variantannotator")) + this.scatterCount = config("scattercount", submodule = "combinegvcfs") } add(combineGVCFs) - + return output } - + trait gatkArguments extends CommandLineGATK { this.reference_sequence = referenceFile this.memoryLimit = 2 diff --git a/gatk/gatk-variantcalling/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVariantcalling.scala b/gatk/gatk-variantcalling/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVariantcalling.scala index d2f94bb54e31e157c1718b91fe70ed419ceb747d..e88a8422e3ed4b4fc5aa6ada5367405d28f4cc46 100644 --- a/gatk/gatk-variantcalling/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVariantcalling.scala +++ b/gatk/gatk-variantcalling/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVariantcalling.scala @@ -4,42 +4,42 @@ import nl.lumc.sasc.biopet.core._ import nl.lumc.sasc.biopet.core.config._ import nl.lumc.sasc.biopet.function._ import org.broadinstitute.gatk.queue.QScript -import org.broadinstitute.gatk.queue.extensions.gatk.{BaseRecalibrator, CommandLineGATK, HaplotypeCaller, IndelRealigner, PrintReads, RealignerTargetCreator, GenotypeGVCFs, AnalyzeCovariates} +import org.broadinstitute.gatk.queue.extensions.gatk.{ BaseRecalibrator, CommandLineGATK, HaplotypeCaller, IndelRealigner, PrintReads, RealignerTargetCreator, GenotypeGVCFs, AnalyzeCovariates } import org.broadinstitute.gatk.queue.function._ -import org.broadinstitute.gatk.utils.commandline.{Input, Output, Argument} +import org.broadinstitute.gatk.utils.commandline.{ Input, Output, Argument } import org.broadinstitute.gatk.utils.variant.GATKVCFIndexType -class GatkVariantcalling(val root:Configurable) extends QScript with BiopetQScript { +class GatkVariantcalling(val root: Configurable) extends QScript with BiopetQScript { def this() = this(null) - - @Input(doc="Bam files (should be deduped bams)", shortName="BAM") + + @Input(doc = "Bam files (should be deduped bams)", shortName = "BAM") var inputBams: List[File] = Nil - - @Argument(doc="Reference", shortName="R", required=false) + + @Argument(doc = "Reference", shortName = "R", required = false) var reference: File = _ - - @Argument(doc="Dbsnp", shortName="dbsnp", required=false) + + @Argument(doc = "Dbsnp", shortName = "dbsnp", required = false) var dbsnp: File = _ - - @Argument(doc="OutputName", required=false) + + @Argument(doc = "OutputName", required = false) var outputName: String = "hc" - - @Output(doc="OutputFile", required=false) + + @Output(doc = "OutputFile", required = false) var outputFile: File = _ - + var gvcfMode = true var singleGenotyping = false - + def init() { - if (gvcfMode) gvcfMode = config("gvcfmode", default=true) + if (gvcfMode) gvcfMode = config("gvcfmode", default = true) if (!singleGenotyping) singleGenotyping = config("singlegenotyping") - if (reference == null) reference = config("reference", required=true) + if (reference == null) reference = config("reference", required = true) if (dbsnp == null) dbsnp = config("dbsnp") if (outputFile == null) outputFile = outputDir + outputName + (if (gvcfMode) ".gvcf.vcf" else ".vcf") if (outputDir == null) throw new IllegalStateException("Missing Output directory on gatk module") else if (!outputDir.endsWith("/")) outputDir += "/" } - + def biopetScript() { var bamFiles: List[File] = Nil for (inputBam <- inputBams) { @@ -49,75 +49,75 @@ class GatkVariantcalling(val root:Configurable) extends QScript with BiopetQScri addHaplotypeCaller(bamFiles, outputFile) if (gvcfMode && singleGenotyping) addGenotypeGVCFs(List(outputFile), outputDir) } - + trait gatkArguments extends CommandLineGATK { this.reference_sequence = reference this.memoryLimit = 2 this.jobResourceRequests :+= "h_vmem=4G" } - - def addIndelRealign(inputBam:File, dir:String): File = { + + def addIndelRealign(inputBam: File, dir: String): File = { val realignerTargetCreator = new RealignerTargetCreator with gatkArguments { this.I :+= inputBam - this.o = swapExt(dir,inputBam,".bam",".realign.intervals") + this.o = swapExt(dir, inputBam, ".bam", ".realign.intervals") this.jobResourceRequests :+= "h_vmem=5G" if (configContains("scattercount", "realignertargetcreator")) this.scatterCount = config("scattercount", 1, "realignertargetcreator") } realignerTargetCreator.isIntermediate = true add(realignerTargetCreator) - + val indelRealigner = new IndelRealigner with gatkArguments { this.I :+= inputBam this.targetIntervals = realignerTargetCreator.o - this.o = swapExt(dir,inputBam,".bam",".realign.bam") + this.o = swapExt(dir, inputBam, ".bam", ".realign.bam") if (configContains("scattercount", "indelrealigner")) this.scatterCount = config("scattercount", 1, "indelrealigner") } indelRealigner.isIntermediate = true add(indelRealigner) - + return indelRealigner.o } - - def addBaseRecalibrator(inputBam:File, dir:String): File = { + + def addBaseRecalibrator(inputBam: File, dir: String): File = { val baseRecalibrator = new BaseRecalibrator with gatkArguments { this.I :+= inputBam - this.o = swapExt(dir,inputBam,".bam",".baserecal") + this.o = swapExt(dir, inputBam, ".bam", ".baserecal") if (dbsnp != null) this.knownSites :+= dbsnp if (configContains("scattercount", "baserecalibrator")) this.scatterCount = config("scattercount", 1, "baserecalibrator") this.nct = config("threads", 1, "baserecalibrator") } add(baseRecalibrator) - + val baseRecalibratorAfter = new BaseRecalibrator with gatkArguments { this.I :+= inputBam - this.o = swapExt(dir,inputBam,".bam",".baserecal.after") + this.o = swapExt(dir, inputBam, ".bam", ".baserecal.after") this.BQSR = baseRecalibrator.o if (dbsnp != null) this.knownSites :+= dbsnp if (configContains("scattercount", "baserecalibrator")) this.scatterCount = config("scattercount", 1, "baserecalibrator") this.nct = config("threads", 1, "baserecalibrator") } add(baseRecalibratorAfter) - + val analyzeCovariates = new AnalyzeCovariates with gatkArguments { this.before = baseRecalibrator.o this.after = baseRecalibratorAfter.o - this.plots = swapExt(dir,inputBam,".bam",".baserecal.pdf") + this.plots = swapExt(dir, inputBam, ".bam", ".baserecal.pdf") } add(analyzeCovariates) - + val printReads = new PrintReads with gatkArguments { this.I :+= inputBam - this.o = swapExt(dir,inputBam,".bam",".baserecal.bam") + this.o = swapExt(dir, inputBam, ".bam", ".baserecal.bam") this.BQSR = baseRecalibrator.o if (configContains("scattercount", "printreads")) this.scatterCount = config("scattercount", 1, "printreads") } printReads.isIntermediate = true add(printReads) - + return printReads.o } - - def addHaplotypeCaller(bamfiles:List[File], outputfile:File): File = { + + def addHaplotypeCaller(bamfiles: List[File], outputfile: File): File = { val haplotypeCaller = new HaplotypeCaller with gatkArguments { this.min_mapping_quality_score = config("minMappingQualityScore", 20, "haplotypecaller") if (configContains("scattercount", "haplotypecaller")) this.scatterCount = config("scattercount", 1, "haplotypecaller") @@ -126,15 +126,15 @@ class GatkVariantcalling(val root:Configurable) extends QScript with BiopetQScri if (configContains("dbsnp")) this.dbsnp = config("dbsnp") this.nct = config("threads", 3, "haplotypecaller") this.memoryLimit = this.nct * 2 - + // GVCF options if (gvcfMode) { this.emitRefConfidence = org.broadinstitute.gatk.tools.walkers.haplotypecaller.ReferenceConfidenceMode.GVCF this.variant_index_type = GATKVCFIndexType.LINEAR this.variant_index_parameter = 128000 } - - val inputType:String = config("inputtype", "dna") + + val inputType: String = config("inputtype", "dna") if (inputType == "rna") { this.dontUseSoftClippedBases = config("dontusesoftclippedbases", true, "haplotypecaller") this.recoverDanglingHeads = config("recoverdanglingheads", true, "haplotypecaller") @@ -148,11 +148,11 @@ class GatkVariantcalling(val root:Configurable) extends QScript with BiopetQScri } } add(haplotypeCaller) - + return haplotypeCaller.out } - - def addGenotypeGVCFs(gvcfFiles: List[File], dir:String): File = { + + def addGenotypeGVCFs(gvcfFiles: List[File], dir: String): File = { val genotypeGVCFs = new GenotypeGVCFs() with gatkArguments { this.variant = gvcfFiles this.annotation ++= Seq("FisherStrand", "QualByDepth", "ChromosomeCounts") diff --git a/mapping/src/main/scala/nl/lumc/sasc/biopet/function/aligners/Bwa.scala b/mapping/src/main/scala/nl/lumc/sasc/biopet/function/aligners/Bwa.scala index b4ab0020607476bb8cf781dd898414f3aa9e5a12..c7e12143610333cdaf8d2d44139c6404dcb2d1da 100644 --- a/mapping/src/main/scala/nl/lumc/sasc/biopet/function/aligners/Bwa.scala +++ b/mapping/src/main/scala/nl/lumc/sasc/biopet/function/aligners/Bwa.scala @@ -2,44 +2,44 @@ package nl.lumc.sasc.biopet.function.aligners import nl.lumc.sasc.biopet.core._ import nl.lumc.sasc.biopet.core.config._ -import org.broadinstitute.gatk.utils.commandline.{Input, Output, Argument} +import org.broadinstitute.gatk.utils.commandline.{ Input, Output, Argument } import java.io.File import scala.sys.process._ -class Bwa(val root:Configurable) extends BiopetCommandLineFunction { - @Input(doc="Fastq file R1", shortName="R1") +class Bwa(val root: Configurable) extends BiopetCommandLineFunction { + @Input(doc = "Fastq file R1", shortName = "R1") var R1: File = _ - - @Input(doc="Fastq file R2", shortName="R2", required=false) + + @Input(doc = "Fastq file R2", shortName = "R2", required = false) var R2: File = _ - - @Input(doc="The reference file for the bam files.", shortName="R") - var referenceFile: File = config("referenceFile", required=true) - - @Output(doc="Output file SAM", shortName="output") + + @Input(doc = "The reference file for the bam files.", shortName = "R") + var referenceFile: File = config("referenceFile", required = true) + + @Output(doc = "Output file SAM", shortName = "output") var output: File = _ - + var RG: String = _ - var M: Boolean = config("M", default=true) - - executable = config("exe", default="bwa") + var M: Boolean = config("M", default = true) + + executable = config("exe", default = "bwa") override val versionRegex = """Version: (.*)""".r - override val versionExitcode = List(0,1) - + override val versionExitcode = List(0, 1) + override val defaultVmem = "6G" override val defaultThreads = 8 - + override def versionCommand = executable - + def cmdLine = { - required(executable) + - required("mem") + - optional("-t", nCoresRequest) + - optional("-R", RG) + - conditional(M, "-M") + - required(referenceFile) + - required(R1) + - optional(R2) + - " > " + required(output) + required(executable) + + required("mem") + + optional("-t", nCoresRequest) + + optional("-R", RG) + + conditional(M, "-M") + + required(referenceFile) + + required(R1) + + optional(R2) + + " > " + required(output) } } diff --git a/mapping/src/main/scala/nl/lumc/sasc/biopet/function/aligners/Star.scala b/mapping/src/main/scala/nl/lumc/sasc/biopet/function/aligners/Star.scala index cf116fbb390c16d1adc8b27ce75f5dadad003244..7c2bfb3ae0b8c19d020654fe6ecfae40cd01ae5a 100644 --- a/mapping/src/main/scala/nl/lumc/sasc/biopet/function/aligners/Star.scala +++ b/mapping/src/main/scala/nl/lumc/sasc/biopet/function/aligners/Star.scala @@ -2,55 +2,55 @@ package nl.lumc.sasc.biopet.function.aligners import nl.lumc.sasc.biopet.core._ import nl.lumc.sasc.biopet.core.config._ -import org.broadinstitute.gatk.utils.commandline.{Input, Output, Argument} +import org.broadinstitute.gatk.utils.commandline.{ Input, Output, Argument } import java.io.File import scala.sys.process._ -class Star(val root:Configurable) extends BiopetCommandLineFunction { - @Input(doc="The reference file for the bam files.", required=false) +class Star(val root: Configurable) extends BiopetCommandLineFunction { + @Input(doc = "The reference file for the bam files.", required = false) var referenceFile: File = new File(config("referenceFile")) - - @Input(doc="Fastq file R1", required=false) + + @Input(doc = "Fastq file R1", required = false) var R1: File = _ - - @Input(doc="Fastq file R2", required=false) + + @Input(doc = "Fastq file R2", required = false) var R2: File = _ - - @Output(doc="Output SAM file", required=false) + + @Output(doc = "Output SAM file", required = false) var outputSam: File = _ - - @Output(doc="Output tab file", required=false) + + @Output(doc = "Output tab file", required = false) var outputTab: File = _ - - @Input(doc="sjdbFileChrStartEnd file", required=false) + + @Input(doc = "sjdbFileChrStartEnd file", required = false) var sjdbFileChrStartEnd: File = _ - - @Output(doc="Output genome file", required=false) + + @Output(doc = "Output genome file", required = false) var outputGenome: File = _ - - @Output(doc="Output SA file", required=false) + + @Output(doc = "Output SA file", required = false) var outputSA: File = _ - - @Output(doc="Output SAindex file", required=false) + + @Output(doc = "Output SAindex file", required = false) var outputSAindex: File = _ - + executable = config("exe", "STAR") - - @Argument(doc="Output Directory") + + @Argument(doc = "Output Directory") var outputDir: String = _ - + var genomeDir: String = config("genomeDir", referenceFile.getParent + "/star/") var runmode: String = _ var sjdbOverhang: Int = _ var outFileNamePrefix: String = _ - + override val defaultVmem = "6G" override val defaultThreads = 8 - + override def afterGraph() { - if (outFileNamePrefix != null && !outFileNamePrefix.endsWith(".")) outFileNamePrefix +="." + if (outFileNamePrefix != null && !outFileNamePrefix.endsWith(".")) outFileNamePrefix += "." if (!outputDir.endsWith("/")) outputDir += "/" - val prefix = if (outFileNamePrefix != null) outputDir+outFileNamePrefix else outputDir + val prefix = if (outFileNamePrefix != null) outputDir + outFileNamePrefix else outputDir if (runmode == null) { outputSam = new File(prefix + "Aligned.out.sam") outputTab = new File(prefix + "SJ.out.tab") @@ -62,9 +62,9 @@ class Star(val root:Configurable) extends BiopetCommandLineFunction { sjdbOverhang = config("sjdboverhang", 75) } } - - def cmdLine : String = { - var cmd: String = required("cd",outputDir) + "&&" + required(executable) + + def cmdLine: String = { + var cmd: String = required("cd", outputDir) + "&&" + required(executable) if (runmode != null && runmode == "genomeGenerate") { // Create index cmd += required("--runMode", runmode) + required("--genomeFastaFiles", referenceFile) @@ -76,13 +76,13 @@ class Star(val root:Configurable) extends BiopetCommandLineFunction { optional("--runThreadN", nCoresRequest) + optional("--outFileNamePrefix", outFileNamePrefix) if (sjdbOverhang > 0) cmd += optional("--sjdbOverhang", sjdbOverhang) - + return cmd } } object Star { - def apply(configurable:Configurable, R1:File, R2:File, outputDir:String, isIntermediate:Boolean = false): Star = { + def apply(configurable: Configurable, R1: File, R2: File, outputDir: String, isIntermediate: Boolean = false): Star = { val star = new Star(configurable) star.R1 = R1 if (R2 != null) star.R2 = R2 @@ -91,25 +91,25 @@ object Star { star.afterGraph return star } - - def _2pass(configurable:Configurable, R1:File, R2:File, outputDir:String, isIntermediate:Boolean = false) : (File, List[Star]) = { + + def _2pass(configurable: Configurable, R1: File, R2: File, outputDir: String, isIntermediate: Boolean = false): (File, List[Star]) = { val outDir = if (outputDir.endsWith("/")) outputDir else outputDir + "/" val starCommand_pass1 = Star(configurable, R1, if (R2 != null) R2 else null, outDir + "aln-pass1/") starCommand_pass1.isIntermediate = isIntermediate starCommand_pass1.afterGraph - + val starCommand_reindex = new Star(configurable) starCommand_reindex.sjdbFileChrStartEnd = starCommand_pass1.outputTab - starCommand_reindex.outputDir = outDir + "re-index/" + starCommand_reindex.outputDir = outDir + "re-index/" starCommand_reindex.runmode = "genomeGenerate" starCommand_reindex.isIntermediate = isIntermediate starCommand_reindex.afterGraph - + val starCommand_pass2 = Star(configurable, R1, if (R2 != null) R2 else null, outDir + "aln-pass2/") starCommand_pass2.genomeDir = starCommand_reindex.outputDir starCommand_pass2.isIntermediate = isIntermediate starCommand_pass2.afterGraph - + return (starCommand_pass2.outputSam, List(starCommand_pass1, starCommand_reindex, starCommand_pass2)) } } \ No newline at end of file diff --git a/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.scala b/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.scala index 66f42689f3d6f2e0522f0336cf296fdb356216bb..bacd0f3692bffe65d969b5bf5f52aad0c94feca0 100644 --- a/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.scala +++ b/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.scala @@ -3,78 +3,78 @@ package nl.lumc.sasc.biopet.pipelines.mapping import nl.lumc.sasc.biopet.core.config.Configurable import java.io.File import java.util.Date -import nl.lumc.sasc.biopet.core.{BiopetQScript, PipelineCommand} +import nl.lumc.sasc.biopet.core.{ BiopetQScript, PipelineCommand } import nl.lumc.sasc.biopet.core.apps.FastqSplitter -import nl.lumc.sasc.biopet.function.aligners.{Bwa, Star} +import nl.lumc.sasc.biopet.function.aligners.{ Bwa, Star } import nl.lumc.sasc.biopet.function.picard.MarkDuplicates import nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics import nl.lumc.sasc.biopet.pipelines.flexiprep.Flexiprep import org.broadinstitute.gatk.queue.QScript -import org.broadinstitute.gatk.queue.extensions.picard.{MergeSamFiles, SortSam, AddOrReplaceReadGroups} +import org.broadinstitute.gatk.queue.extensions.picard.{ MergeSamFiles, SortSam, AddOrReplaceReadGroups } import scala.math._ -class Mapping(val root:Configurable) extends QScript with BiopetQScript { +class Mapping(val root: Configurable) extends QScript with BiopetQScript { qscript => def this() = this(null) - - @Input(doc="R1 fastq file", shortName="R1",required=true) + + @Input(doc = "R1 fastq file", shortName = "R1", required = true) var input_R1: File = _ - - @Input(doc="R2 fastq file", shortName="R2", required=false) + + @Input(doc = "R2 fastq file", shortName = "R2", required = false) var input_R2: File = _ - - @Argument(doc="Output name", shortName="outputName", required=false) + + @Argument(doc = "Output name", shortName = "outputName", required = false) var outputName: String = _ - - @Argument(doc="Skip flexiprep", shortName="skipflexiprep", required=false) + + @Argument(doc = "Skip flexiprep", shortName = "skipflexiprep", required = false) var skipFlexiprep: Boolean = false - - @Argument(doc="Skip mark duplicates", shortName="skipmarkduplicates", required=false) + + @Argument(doc = "Skip mark duplicates", shortName = "skipmarkduplicates", required = false) var skipMarkduplicates: Boolean = false - - @Argument(doc="Alginer", shortName="ALN", required=false) + + @Argument(doc = "Alginer", shortName = "ALN", required = false) var aligner: String = _ - - @Argument(doc="Reference", shortName="R", required=false) + + @Argument(doc = "Reference", shortName = "R", required = false) var referenceFile: File = _ - - @Argument(doc="auto chuning", shortName="chunking", required=false) + + @Argument(doc = "auto chuning", shortName = "chunking", required = false) var chunking: Boolean = false - + // Readgroup items - @Argument(doc="Readgroup ID", shortName="RGID", required=false) + @Argument(doc = "Readgroup ID", shortName = "RGID", required = false) var RGID: String = _ - - @Argument(doc="Readgroup Library", shortName="RGLB", required=false) + + @Argument(doc = "Readgroup Library", shortName = "RGLB", required = false) var RGLB: String = _ - - @Argument(doc="Readgroup Platform", shortName="RGPL", required=false) + + @Argument(doc = "Readgroup Platform", shortName = "RGPL", required = false) var RGPL: String = _ - - @Argument(doc="Readgroup platform unit", shortName="RGPU", required=false) + + @Argument(doc = "Readgroup platform unit", shortName = "RGPU", required = false) var RGPU: String = _ - - @Argument(doc="Readgroup sample", shortName="RGSM", required=false) + + @Argument(doc = "Readgroup sample", shortName = "RGSM", required = false) var RGSM: String = _ - - @Argument(doc="Readgroup sequencing center", shortName="RGCN", required=false) + + @Argument(doc = "Readgroup sequencing center", shortName = "RGCN", required = false) var RGCN: String = _ - - @Argument(doc="Readgroup description", shortName="RGDS", required=false) + + @Argument(doc = "Readgroup description", shortName = "RGDS", required = false) var RGDS: String = _ - - @Argument(doc="Readgroup sequencing date", shortName="RGDT", required=false) + + @Argument(doc = "Readgroup sequencing date", shortName = "RGDT", required = false) var RGDT: Date = _ - - @Argument(doc="Readgroup predicted insert size", shortName="RGPI", required=false) + + @Argument(doc = "Readgroup predicted insert size", shortName = "RGPI", required = false) var RGPI: Int = _ - + var paired: Boolean = false var numberChunks = 0 - + def init() { for (file <- configfiles) globalConfig.loadConfigFile(file) - var inputtype:String = config("inputtype", "dna") + var inputtype: String = config("inputtype", "dna") if (aligner == null) { if (inputtype == "rna") aligner = config("aligner", "star-2pass") else aligner = config("aligner", "bwa") @@ -84,74 +84,75 @@ class Mapping(val root:Configurable) extends QScript with BiopetQScript { else if (!outputDir.endsWith("/")) outputDir += "/" if (input_R1 == null) throw new IllegalStateException("Missing Fastq R1 on mapping module") paired = (input_R2 != null) - + if (RGLB == null && configContains("RGLB")) RGLB = config("RGLB") else if (RGLB == null) throw new IllegalStateException("Missing Readgroup library on mapping module") if (RGSM == null && configContains("RGSM")) RGSM = config("RGSM") else if (RGLB == null) throw new IllegalStateException("Missing Readgroup sample on mapping module") if (RGID == null && configContains("RGID")) RGID = config("RGID") else if (RGID == null && RGSM != null && RGLB != null) RGID = RGSM + "-" + RGLB - else if (RGID == null) throw new IllegalStateException("Missing Readgroup ID on mapping module") - + else if (RGID == null) throw new IllegalStateException("Missing Readgroup ID on mapping module") + if (RGPL == null) RGPL = config("RGPL", "illumina") if (RGPU == null) RGPU = config("RGPU", "na") if (RGCN == null && configContains("RGCN")) RGCN = config("RGCN") if (RGDS == null && configContains("RGDS")) RGDS = config("RGDS") - + if (outputName == null) outputName = RGID - + if (!chunking && numberChunks > 1) chunking = true if (!chunking) chunking = config("chunking", false) if (chunking) { - val chunkSize:Int = config("chunksize", (1 << 30)) - val filesize = if (input_R1.getName.endsWith(".gz") || input_R1.getName.endsWith(".gzip")) input_R1.length * 3 - else input_R1.length + val chunkSize: Int = config("chunksize", (1 << 30)) + val filesize = if (input_R1.getName.endsWith(".gz") || input_R1.getName.endsWith(".gzip")) input_R1.length * 3 + else input_R1.length if (numberChunks == 0 && configContains("numberchunks")) numberChunks = config("numberchunks") else if (numberChunks == 0) numberChunks = ceil(filesize.toDouble / chunkSize).toInt logger.debug("Chunks: " + numberChunks) } } - + def biopetScript() { var fastq_R1: File = input_R1 var fastq_R2: File = if (paired) input_R2 else "" val flexiprep = new Flexiprep(this) flexiprep.outputDir = outputDir + "flexiprep/" - var bamFiles:List[File] = Nil + var bamFiles: List[File] = Nil var fastq_R1_output: List[File] = Nil var fastq_R2_output: List[File] = Nil - - def removeGz(file:String):String = { + + def removeGz(file: String): String = { if (file.endsWith(".gz")) return file.substring(0, file.lastIndexOf(".gz")) else if (file.endsWith(".gzip")) return file.substring(0, file.lastIndexOf(".gzip")) else return file } - var chunks:Map[String, (String,String)] = Map() + var chunks: Map[String, (String, String)] = Map() if (chunking) for (t <- 1 to numberChunks) { chunks += ("chunk_" + t -> (removeGz(outputDir + "chunk_" + t + "/" + fastq_R1.getName), - if (paired) removeGz(outputDir + "chunk_" + t + "/" + fastq_R2.getName) else "")) - } else chunks += ("flexiprep" -> (flexiprep.extractIfNeeded(fastq_R1, flexiprep.outputDir), - flexiprep.extractIfNeeded(fastq_R2, flexiprep.outputDir))) - + if (paired) removeGz(outputDir + "chunk_" + t + "/" + fastq_R2.getName) else "")) + } + else chunks += ("flexiprep" -> (flexiprep.extractIfNeeded(fastq_R1, flexiprep.outputDir), + flexiprep.extractIfNeeded(fastq_R2, flexiprep.outputDir))) + if (chunking) { val fastSplitter_R1 = new FastqSplitter(this) fastSplitter_R1.input = fastq_R1 fastSplitter_R1.memoryLimit = 4 fastSplitter_R1.jobResourceRequests :+= "h_vmem=8G" - for ((chunk,fastqfile) <- chunks) fastSplitter_R1.output :+= fastqfile._1 + for ((chunk, fastqfile) <- chunks) fastSplitter_R1.output :+= fastqfile._1 add(fastSplitter_R1) - + if (paired) { val fastSplitter_R2 = new FastqSplitter(this) fastSplitter_R2.input = fastq_R2 fastSplitter_R2.memoryLimit = 4 fastSplitter_R2.jobResourceRequests :+= "h_vmem=8G" - for ((chunk,fastqfile) <- chunks) fastSplitter_R2.output :+= fastqfile._2 + for ((chunk, fastqfile) <- chunks) fastSplitter_R2.output :+= fastqfile._2 add(fastSplitter_R2) } } - - for ((chunk,fastqfile) <- chunks) { + + for ((chunk, fastqfile) <- chunks) { var R1 = fastqfile._1 var R2 = fastqfile._2 if (!skipFlexiprep) { @@ -176,7 +177,7 @@ class Mapping(val root:Configurable) extends QScript with BiopetQScript { bwaCommand.RG = getReadGroup bwaCommand.output = new File(chunkDir + outputName + ".sam") add(bwaCommand, isIntermediate = true) - bamFiles :+= addSortSam(List(bwaCommand.output), swapExt(chunkDir,bwaCommand.output,".sam",".bam"), chunkDir) + bamFiles :+= addSortSam(List(bwaCommand.output), swapExt(chunkDir, bwaCommand.output, ".sam", ".bam"), chunkDir) } else if (aligner == "star") { val starCommand = Star(this, R1, if (paired) R2 else null, outputDir, isIntermediate = true) add(starCommand) @@ -191,17 +192,17 @@ class Mapping(val root:Configurable) extends QScript with BiopetQScript { flexiprep.runFinalize(fastq_R1_output, fastq_R2_output) addAll(flexiprep.functions) // Add function of flexiprep to curent function pool } - + var bamFile = bamFiles.head if (!skipMarkduplicates) { bamFile = new File(outputDir + outputName + ".dedup.bam") add(MarkDuplicates(this, bamFiles, bamFile)) } else if (skipMarkduplicates && chunking) bamFile = addMergeBam(bamFiles, new File(outputDir + outputName + ".bam"), outputDir) - + outputFiles += ("finalBamFile" -> bamFile) } - - def addSortSam(inputSam:List[File], outputFile:File, dir:String) : File = { + + def addSortSam(inputSam: List[File], outputFile: File, dir: String): File = { val sortSam = new SortSam sortSam.input = inputSam sortSam.createIndex = true @@ -211,11 +212,11 @@ class Mapping(val root:Configurable) extends QScript with BiopetQScript { sortSam.jobResourceRequests :+= "h_vmem=4G" if (!skipMarkduplicates) sortSam.isIntermediate = true add(sortSam) - + return sortSam.output } - - def addMergeBam(inputSam:List[File], outputFile:File, dir:String) : File = { + + def addMergeBam(inputSam: List[File], outputFile: File, dir: String): File = { val mergeSam = new MergeSamFiles mergeSam.input = inputSam mergeSam.createIndex = true @@ -227,11 +228,11 @@ class Mapping(val root:Configurable) extends QScript with BiopetQScript { mergeSam.jobResourceRequests :+= "h_vmem=4G" if (!skipMarkduplicates) mergeSam.isIntermediate = true add(mergeSam) - + return mergeSam.output } - - def addAddOrReplaceReadGroups(inputSam:List[File], outputFile:File, dir:String) : File = { + + def addAddOrReplaceReadGroups(inputSam: List[File], outputFile: File, dir: String): File = { val addOrReplaceReadGroups = new AddOrReplaceReadGroups addOrReplaceReadGroups.input = inputSam addOrReplaceReadGroups.output = outputFile @@ -249,11 +250,11 @@ class Mapping(val root:Configurable) extends QScript with BiopetQScript { if (RGDS != null) addOrReplaceReadGroups.RGDS = RGDS if (!skipMarkduplicates) addOrReplaceReadGroups.isIntermediate = true add(addOrReplaceReadGroups) - + return addOrReplaceReadGroups.output } - - def getReadGroup() : String = { + + def getReadGroup(): String = { var RG: String = "@RG\\t" + "ID:" + RGID + "\\t" RG += "LB:" + RGLB + "\\t" RG += "PL:" + RGPL + "\\t" @@ -263,17 +264,17 @@ class Mapping(val root:Configurable) extends QScript with BiopetQScript { if (RGDS != null) RG += "DS" + RGDS + "\\t" if (RGDT != null) RG += "DT" + RGDT + "\\t" if (RGPI > 0) RG += "PI" + RGPI + "\\t" - + return RG.substring(0, RG.lastIndexOf("\\t")) } } object Mapping extends PipelineCommand { override val pipeline = "/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.class" - - def loadFromRunConfig(root:Configurable, runConfig:Map[String,Any], sampleConfig:Map[String,Any], runDir: String) : Mapping = { + + def loadFromRunConfig(root: Configurable, runConfig: Map[String, Any], sampleConfig: Map[String, Any], runDir: String): Mapping = { val mapping = new Mapping(root) - + logger.debug("Mapping runconfig: " + runConfig) var inputType = "" if (runConfig.contains("inputtype")) inputType = runConfig("inputtype").toString @@ -288,7 +289,7 @@ object Mapping extends PipelineCommand { if (runConfig.contains("PU")) mapping.RGPU = runConfig("PU").toString if (runConfig.contains("CN")) mapping.RGCN = runConfig("CN").toString mapping.outputDir = runDir - + mapping.init mapping.biopetScript return mapping diff --git a/pipeline-template/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/PipelineTemplate.scala b/pipeline-template/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/PipelineTemplate.scala index 84689b03c333c69a46b6dbed144d691cf7aca21d..e76a17edb5ec386648276b71793c1d2b8f157fd1 100644 --- a/pipeline-template/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/PipelineTemplate.scala +++ b/pipeline-template/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/PipelineTemplate.scala @@ -5,15 +5,15 @@ import nl.lumc.sasc.biopet.core.config._ import nl.lumc.sasc.biopet.function._ import org.broadinstitute.gatk.queue.QScript import org.broadinstitute.gatk.queue.function._ -import org.broadinstitute.gatk.utils.commandline.{Input, Output, Argument} +import org.broadinstitute.gatk.utils.commandline.{ Input, Output, Argument } -class PipelineTemplate(val root:Configurable) extends QScript with BiopetQScript { +class PipelineTemplate(val root: Configurable) extends QScript with BiopetQScript { def this() = this(null) - + def init() { for (file <- configfiles) globalConfig.loadConfigFile(file) } - + def biopetScript() { } }