diff --git a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVariantcalling.scala b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVariantcalling.scala index 911a8349d502ad6bc63588a19acad7575d2fe181..503f67e308f1b9e7a9e613ce298159215a227621 100644 --- a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVariantcalling.scala +++ b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVariantcalling.scala @@ -77,7 +77,7 @@ class GatkVariantcalling(val root: Configurable) extends QScript with BiopetQScr } List(bamFile) } else { - val markDup = MarkDuplicates(this, files, new File(outputDir + outputName + ".dedup.bam")) + val markDup = MarkDuplicates(this, files, new File(outputDir, outputName + ".dedup.bam")) markDup.isIntermediate = useIndelRealigner add(markDup) if (useIndelRealigner) { diff --git a/public/basty/src/main/scala/nl/lumc/sasc/biopet/pipelines/basty/BastyTrait.scala b/public/basty/src/main/scala/nl/lumc/sasc/biopet/pipelines/basty/BastyTrait.scala index 79dfe02f8ad64c2c0cc0923e78d6df9651272438..60f9b10ff1aed9748e40716d16e5fd73537d5fa4 100644 --- a/public/basty/src/main/scala/nl/lumc/sasc/biopet/pipelines/basty/BastyTrait.scala +++ b/public/basty/src/main/scala/nl/lumc/sasc/biopet/pipelines/basty/BastyTrait.scala @@ -129,7 +129,7 @@ trait BastyTrait extends MultiSampleQScript { raxmlBoot.N = Some(1) raxmlBoot.n = outputName + "_boot_" + t add(raxmlBoot) - raxmlBoot.getBootstrapFile + raxmlBoot.getBootstrapFile.get } val cat = Cat(this, bootList.toList, new File(outputDir, "/boot_list")) @@ -138,7 +138,7 @@ trait BastyTrait extends MultiSampleQScript { val raxmlBi = new Raxml(this) raxmlBi.input = concensusVariants raxmlBi.t = raxmlMl.getBestTreeFile - raxmlBi.z = cat.output + raxmlBi.z = Some(cat.output) raxmlBi.m = config("raxml_ml_model", default = "GTRGAMMAX") raxmlBi.p = Some(seed) raxmlBi.f = "b" @@ -148,7 +148,7 @@ trait BastyTrait extends MultiSampleQScript { val gubbins = new RunGubbins(this) gubbins.fastafile = concensusVariants - gubbins.startingTree = Some(raxmlBi.getBipartitionsFile) + gubbins.startingTree = raxmlBi.getBipartitionsFile gubbins.outputDirectory = dirSufixGubbins add(gubbins) } diff --git a/public/biopet-framework/nb-configuration.xml b/public/biopet-framework/nb-configuration.xml deleted file mode 100644 index bf0170b5b027d741082d582f5e56636c6e5e3605..0000000000000000000000000000000000000000 --- a/public/biopet-framework/nb-configuration.xml +++ /dev/null @@ -1,46 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<!-- - - Biopet is built on top of GATK Queue for building bioinformatic - pipelines. It is mainly intended to support LUMC SHARK cluster which is running - SGE. But other types of HPC that are supported by GATK Queue (such as PBS) - should also be able to execute Biopet tools and pipelines. - - Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center - - Contact us at: sasc@lumc.nl - - A dual licensing mode is applied. The source code within this project that are - not part of GATK Queue is freely available for non-commercial use under an AGPL - license; For commercial users or users who do not want to follow the AGPL - license, please contact us to obtain a separate license. - ---> -<project-shared-configuration> - <!-- -This file contains additional configuration written by modules in the NetBeans IDE. -The configuration is intended to be shared among all the users of project and -therefore it is assumed to be part of version control checkout. -Without this configuration present, some functionality in the IDE may be limited or fail altogether. ---> - <config-data xmlns="http://www.netbeans.org/ns/maven-config-data/1"> - <configurations> - <configuration id="yamsvp" profiles=""/> - </configurations> - </config-data> - <properties xmlns="http://www.netbeans.org/ns/maven-properties-data/1"> - <!-- -Properties that influence various parts of the IDE, especially code formatting and the like. -You can copy and paste the single properties, into the pom.xml file and the IDE will pick them up. -That way multiple projects can share the same settings (useful for formatting rules for example). -Any value defined here will override the pom.xml file value but is only applicable to the current project. ---> - <netbeans.hint.license>apache20</netbeans.hint.license> - <com-junichi11-netbeans-changelf.enable>true</com-junichi11-netbeans-changelf.enable> - <com-junichi11-netbeans-changelf.use-project>true</com-junichi11-netbeans-changelf.use-project> - <com-junichi11-netbeans-changelf.lf-kind>LF</com-junichi11-netbeans-changelf.lf-kind> - <com-junichi11-netbeans-changelf.use-global>false</com-junichi11-netbeans-changelf.use-global> - <com-junichi11-netbeans-changelf.show-dialog>true</com-junichi11-netbeans-changelf.show-dialog> - <org-netbeans-modules-javascript2-requirejs.enabled>true</org-netbeans-modules-javascript2-requirejs.enabled> - </properties> -</project-shared-configuration> diff --git a/public/biopet-framework/nbactions-yamsvp.xml b/public/biopet-framework/nbactions-yamsvp.xml deleted file mode 100644 index 3eb4ab5ce9a85570326db29fef02b40dadd793b1..0000000000000000000000000000000000000000 --- a/public/biopet-framework/nbactions-yamsvp.xml +++ /dev/null @@ -1,67 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<!-- - - Biopet is built on top of GATK Queue for building bioinformatic - pipelines. It is mainly intended to support LUMC SHARK cluster which is running - SGE. But other types of HPC that are supported by GATK Queue (such as PBS) - should also be able to execute Biopet tools and pipelines. - - Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center - - Contact us at: sasc@lumc.nl - - A dual licensing mode is applied. The source code within this project that are - not part of GATK Queue is freely available for non-commercial use under an AGPL - license; For commercial users or users who do not want to follow the AGPL - license, please contact us to obtain a separate license. - ---> -<actions> - <action> - <actionName>run</actionName> - <preAction>build-with-dependencies</preAction> - <packagings> - <packaging>jar</packaging> - </packagings> - <goals> - <goal>process-classes</goal> - <goal>org.codehaus.mojo:exec-maven-plugin:1.2.1:exec</goal> - </goals> - <properties> - <exec.args>-classpath %classpath nl.lumc.sasc.biopet.pipelines.yamsvp.Yamsvp -outDir /usr/local/waiyi/ -config ../test/samples.json -run -l debug</exec.args> - <exec.executable>java</exec.executable> - <exec.workingdir>../test</exec.workingdir> - </properties> - </action> - <action> - <actionName>debug</actionName> - <packagings> - <packaging>jar</packaging> - </packagings> - <goals> - <goal>process-classes</goal> - <goal>org.codehaus.mojo:exec-maven-plugin:1.2.1:exec</goal> - </goals> - <properties> - <exec.args>-Xdebug -Xrunjdwp:transport=dt_socket,server=n,address=${jpda.address} -classpath %classpath nl.lumc.sasc.biopet.pipelines.yamsvp.Yamsvp -outDir /usr/local/waiyi/ -config ../test/samples.json -run -l debug</exec.args> - <exec.executable>java</exec.executable> - <jpda.listen>true</jpda.listen> - <exec.workingdir>../test</exec.workingdir> - </properties> - </action> - <action> - <actionName>profile</actionName> - <packagings> - <packaging>jar</packaging> - </packagings> - <goals> - <goal>process-classes</goal> - <goal>org.codehaus.mojo:exec-maven-plugin:1.2.1:exec</goal> - </goals> - <properties> - <exec.args>-classpath %classpath nl.lumc.sasc.biopet.pipelines.yamsvp.Yamsvp -outDir /usr/local/waiyi/ -config ../test/samples.json -run -l debug</exec.args> - <exec.executable>java</exec.executable> - <exec.workingdir>../test</exec.workingdir> - </properties> - </action> - </actions> diff --git a/public/biopet-framework/nbactions.xml b/public/biopet-framework/nbactions.xml deleted file mode 100644 index cc2b2ad541a38f78e0eeae19913f68b8ce0f490b..0000000000000000000000000000000000000000 --- a/public/biopet-framework/nbactions.xml +++ /dev/null @@ -1,66 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<!-- - - Biopet is built on top of GATK Queue for building bioinformatic - pipelines. It is mainly intended to support LUMC SHARK cluster which is running - SGE. But other types of HPC that are supported by GATK Queue (such as PBS) - should also be able to execute Biopet tools and pipelines. - - Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center - - Contact us at: sasc@lumc.nl - - A dual licensing mode is applied. The source code within this project that are - not part of GATK Queue is freely available for non-commercial use under an AGPL - license; For commercial users or users who do not want to follow the AGPL - license, please contact us to obtain a separate license. - ---> -<actions> - <action> - <actionName>run</actionName> - <packagings> - <packaging>jar</packaging> - </packagings> - <goals> - <goal>process-classes</goal> - <goal>org.codehaus.mojo:exec-maven-plugin:1.2.1:exec</goal> - </goals> - <properties> - <exec.args>-classpath %classpath nl.lumc.sasc.biopet.pipelines.gatk.GatkPipeline -config /data/DIV5/SASC/project-062-snake/analysis/scripts/biopet/compare_test/samples.json -config /data/DIV5/SASC/project-062-snake/analysis/scripts/biopet/config.json -outDir /home/pjvan_thof/pipelines/test</exec.args> - <exec.executable>java</exec.executable> - <exec.workingdir>/home/pjvan_thof/pipelines/test</exec.workingdir> - </properties> - </action> - <action> - <actionName>debug</actionName> - <packagings> - <packaging>jar</packaging> - </packagings> - <goals> - <goal>process-classes</goal> - <goal>org.codehaus.mojo:exec-maven-plugin:1.2.1:exec</goal> - </goals> - <properties> - <exec.args>-Xdebug -Xrunjdwp:transport=dt_socket,server=n,address=${jpda.address} -classpath %classpath nl.lumc.sasc.biopet.pipelines.gatk.GatkPipeline -config /data/DIV5/SASC/project-062-snake/analysis/scripts/biopet/compare_test/samples.json -config /data/DIV5/SASC/project-062-snake/analysis/scripts/biopet/config.json -outDir /home/pjvan_thof/pipelines/test</exec.args> - <exec.executable>java</exec.executable> - <jpda.listen>true</jpda.listen> - <exec.workingdir>/home/pjvan_thof/pipelines/test</exec.workingdir> - </properties> - </action> - <action> - <actionName>profile</actionName> - <packagings> - <packaging>jar</packaging> - </packagings> - <goals> - <goal>process-classes</goal> - <goal>org.codehaus.mojo:exec-maven-plugin:1.2.1:exec</goal> - </goals> - <properties> - <exec.args>-classpath %classpath nl.lumc.sasc.biopet.pipelines.gatk.GatkPipeline -config /data/DIV5/SASC/project-062-snake/analysis/scripts/biopet/compare_test/samples.json -config /data/DIV5/SASC/project-062-snake/analysis/scripts/biopet/config.json -outDir /home/pjvan_thof/pipelines/test</exec.args> - <exec.executable>java</exec.executable> - <exec.workingdir>/home/pjvan_thof/pipelines/test</exec.workingdir> - </properties> - </action> - </actions> diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetCommandLineFunctionTrait.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetCommandLineFunctionTrait.scala index 115bca1b89aa8e6e7c41b48bce511e379de72a45..ba38670536bfc6e920513adf3aabb4ab247547f5 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetCommandLineFunctionTrait.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetCommandLineFunctionTrait.scala @@ -25,9 +25,7 @@ import scala.util.matching.Regex import java.io.FileInputStream import java.security.MessageDigest -/** - * Biopet command line trait to auto check executable and cluster values - */ +/** Biopet command line trait to auto check executable and cluster values */ trait BiopetCommandLineFunctionTrait extends CommandLineFunction with Configurable { analysisName = configName @@ -47,14 +45,10 @@ trait BiopetCommandLineFunctionTrait extends CommandLineFunction with Configurab */ protected[core] def beforeCmd {} - /** - * Can override this method. This is executed after the script is done en queue starts to generate the graph - */ + /** Can override this method. This is executed after the script is done en queue starts to generate the graph */ protected[core] def beforeGraph {} - /** - * Set default output file, threads and vmem for current job - */ + /** Set default output file, threads and vmem for current job */ override def freezeFieldValues() { preProcesExecutable beforeGraph @@ -118,9 +112,7 @@ trait BiopetCommandLineFunctionTrait extends CommandLineFunction with Configurab addJobReportBinding("md5sum_exe", md5.getOrElse("None")) } - /** - * executes checkExecutable method and fill job report - */ + /** executes checkExecutable method and fill job report */ final protected def preCmdInternal { preProcesExecutable @@ -133,10 +125,7 @@ trait BiopetCommandLineFunctionTrait extends CommandLineFunction with Configurab addJobReportBinding("version", getVersion) } - /** - * Command to get version of executable - * @return - */ + /** Command to get version of executable */ protected def versionCommand: String = null /** Regex to get version from version command output */ @@ -146,10 +135,10 @@ trait BiopetCommandLineFunctionTrait extends CommandLineFunction with Configurab protected val versionExitcode = List(0) /** Executes the version command */ - private def getVersionInternal: String = { - if (versionCommand == null || versionRegex == null) return "N/A" + private def getVersionInternal: Option[String] = { + if (versionCommand == null || versionRegex == null) return None val exe = new File(versionCommand.trim.split(" ")(0)) - if (!exe.exists()) return "N/A" + if (!exe.exists()) return None val stdout = new StringBuffer() val stderr = new StringBuffer() def outputLog = "Version command: \n" + versionCommand + @@ -158,25 +147,28 @@ trait BiopetCommandLineFunctionTrait extends CommandLineFunction with Configurab val process = Process(versionCommand).run(ProcessLogger(stdout append _ + "\n", stderr append _ + "\n")) if (!versionExitcode.contains(process.exitValue)) { logger.warn("getVersion give exit code " + process.exitValue + ", version not found \n" + outputLog) - return "N/A" + return None } for (line <- stdout.toString.split("\n") ++ stderr.toString.split("\n")) { line match { - case versionRegex(m) => return m + case versionRegex(m) => return Some(m) case _ => } } logger.warn("getVersion give a exit code " + process.exitValue + " but no version was found, executable correct? \n" + outputLog) - return "N/A" + return None } /** Get version from cache otherwise execute the version command */ - def getVersion: String = { + def getVersion: Option[String] = { if (!BiopetCommandLineFunctionTrait.executableCache.contains(executable)) preProcesExecutable - if (!BiopetCommandLineFunctionTrait.versionCache.contains(executable)) - BiopetCommandLineFunctionTrait.versionCache += executable -> getVersionInternal - return BiopetCommandLineFunctionTrait.versionCache(executable) + if (!BiopetCommandLineFunctionTrait.versionCache.contains(versionCommand)) + getVersionInternal match { + case Some(version) => BiopetCommandLineFunctionTrait.versionCache += versionCommand -> version + case _ => + } + BiopetCommandLineFunctionTrait.versionCache.get(versionCommand) } /** @@ -205,9 +197,7 @@ trait BiopetCommandLineFunctionTrait extends CommandLineFunction with Configurab } } -/** - * stores global caches - */ +/** stores global caches */ object BiopetCommandLineFunctionTrait { import scala.collection.mutable.Map private val versionCache: Map[String, String] = Map() diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/summary/WriteSummary.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/summary/WriteSummary.scala index eb84ecbca9ed817c85a6e5c6920c4d887bf934d6..2e8c4002fb7e95404f41522ef970c84de9b50f63 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/summary/WriteSummary.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/summary/WriteSummary.scala @@ -57,12 +57,12 @@ class WriteSummary(val root: Configurable) extends InProcessFunction with Config (for (f <- qscript.functions if f.isInstanceOf[BiopetCommandLineFunctionTrait]) yield { f match { case f: BiopetJavaCommandLineFunction => { - f.configName -> Map("version" -> f.getVersion, + f.configName -> Map("version" -> f.getVersion.getOrElse(None), "java_md5" -> BiopetCommandLineFunctionTrait.executableMd5Cache.getOrElse(f.executable, None), "jar_md5" -> SummaryQScript.md5sumCache.getOrElse(f.jarFile, None)) } case f: BiopetCommandLineFunction => { - f.configName -> Map("version" -> f.getVersion, + f.configName -> Map("version" -> f.getVersion.getOrElse(None), "md5" -> BiopetCommandLineFunctionTrait.executableMd5Cache.getOrElse(f.executable, None)) } case _ => throw new IllegalStateException("This should not be possible") diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Bowtie.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Bowtie.scala index ac1d2a7a8d6e1fc0666d20b6ddbbd361f05cd27d..ef8fb9d6c1e78a7a22085bf08d06f38fb5f72de1 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Bowtie.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Bowtie.scala @@ -21,6 +21,11 @@ import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction import nl.lumc.sasc.biopet.core.config.Configurable import org.broadinstitute.gatk.utils.commandline.{ Input, Output } +/** + * Extension for bowtie 1 + * + * Based on version 1.1.1 + */ class Bowtie(val root: Configurable) extends BiopetCommandLineFunction { @Input(doc = "Fastq file R1", shortName = "R1") var R1: File = null @@ -53,6 +58,7 @@ class Bowtie(val root: Configurable) extends BiopetCommandLineFunction { var strata: Boolean = config("strata", default = false) var maqerr: Option[Int] = config("maqerr") + /** return commandline to execute */ def cmdLine = { required(executable) + optional("--threads", nCoresRequest) + diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Cat.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Cat.scala index b6e496401257e9a259588f9092cfa089d608f980..2263b09db70b40902a802b9fbf315a6a6f317381 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Cat.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Cat.scala @@ -20,6 +20,9 @@ import nl.lumc.sasc.biopet.core.config.Configurable import org.broadinstitute.gatk.utils.commandline.{ Input, Output } import java.io.File +/** + * Extension for GNU cat + */ class Cat(val root: Configurable) extends BiopetCommandLineFunction { @Input(doc = "Input file", required = true) var input: List[File] = Nil @@ -29,10 +32,21 @@ class Cat(val root: Configurable) extends BiopetCommandLineFunction { executable = config("exe", default = "cat") + /** return commandline to execute */ def cmdLine = required(executable) + repeat(input) + " > " + required(output) } +/** + * Object for constructors for cat + */ object Cat { + /** + * Basis constructor + * @param root root object for config + * @param input list of files to use + * @param output output File + * @return + */ def apply(root: Configurable, input: List[File], output: File): Cat = { val cat = new Cat(root) cat.input = input diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Cutadapt.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Cutadapt.scala index fc90139f8db4dba65da7e7c90df8988f68bb87e7..10aeca2920d79ccff132f6b1e86e6dc6b9e02818 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Cutadapt.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Cutadapt.scala @@ -24,6 +24,10 @@ import nl.lumc.sasc.biopet.core.config.Configurable import scala.collection.mutable import scala.io.Source +/** + * Extension for cutadept + * Based on version 1.5 + */ class Cutadapt(val root: Configurable) extends BiopetCommandLineFunction with Summarizable { @Input(doc = "Input fastq file") var fastq_input: File = _ @@ -39,17 +43,15 @@ class Cutadapt(val root: Configurable) extends BiopetCommandLineFunction with Su override val versionRegex = """(.*)""".r var default_clip_mode: String = config("default_clip_mode", default = "3") - var opt_adapter: Set[String] = Set() - if (config.contains("adapter")) for (adapter <- config("adapter").asList) opt_adapter += adapter.toString - var opt_anywhere: Set[String] = Set() - if (config.contains("anywhere")) for (adapter <- config("anywhere").asList) opt_anywhere += adapter.toString - var opt_front: Set[String] = Set() - if (config.contains("front")) for (adapter <- config("front").asList) opt_front += adapter.toString + var opt_adapter: Set[String] = config("adapter", default = Nil) + var opt_anywhere: Set[String] = config("anywhere", default = Nil) + var opt_front: Set[String] = config("front", default = Nil) var opt_discard: Boolean = config("discard", default = false) var opt_minimum_length: Int = config("minimum_length", 1) var opt_maximum_length: Option[Int] = config("maximum_length") + /** return commandline to execute */ def cmdLine = required(executable) + // options repeat("-a", opt_adapter) + @@ -63,6 +65,7 @@ class Cutadapt(val root: Configurable) extends BiopetCommandLineFunction with Su required("--output", fastq_output) + " > " + required(stats_output) + /** Output summary stats */ def summaryStats: Map[String, Any] = { val trimR = """.*Trimmed reads: *(\d*) .*""".r val tooShortR = """.*Too short reads: *(\d*) .*""".r @@ -89,6 +92,7 @@ class Cutadapt(val root: Configurable) extends BiopetCommandLineFunction with Su ) } + /** Merges values that can be merged for the summary */ override def resolveSummaryConflict(v1: Any, v2: Any, key: String): Any = { (v1, v2) match { case (v1: Int, v2: Int) => v1 + v2 diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Fastqc.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Fastqc.scala index 05867d694490ebe64f6d48588f72d4288227e904..2b599ffe28587c40b2b760c06ee910f1b51c0848 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Fastqc.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Fastqc.scala @@ -22,6 +22,10 @@ import org.broadinstitute.gatk.utils.commandline.{ Input, Output } import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction import nl.lumc.sasc.biopet.core.config.Configurable +/** + * Extension for fastqc + * Based on version 0.10.1 and 0.11.2 + */ class Fastqc(val root: Configurable) extends BiopetCommandLineFunction { @Input(doc = "Contaminants", required = false) @@ -48,6 +52,7 @@ class Fastqc(val root: Configurable) extends BiopetCommandLineFunction { override def versionCommand = executable + " --version" override val defaultThreads = 4 + /** Sets contaminants and adapters when not yet set */ override def beforeGraph { this.preProcesExecutable @@ -59,8 +64,8 @@ class Fastqc(val root: Configurable) extends BiopetCommandLineFunction { // otherwise, use default contaminants file (depending on FastQC version) case None => val defaultContams = getVersion match { - case "v0.11.2" => new File(fastqcDir + "/Configuration/contaminant_list.txt") - case _ => new File(fastqcDir + "/Contaminants/contaminant_list.txt") + case Some("v0.11.2") => new File(fastqcDir + "/Configuration/contaminant_list.txt") + case _ => new File(fastqcDir + "/Contaminants/contaminant_list.txt") } config("contaminants", default = defaultContams) } @@ -71,13 +76,14 @@ class Fastqc(val root: Configurable) extends BiopetCommandLineFunction { // otherwise, check if adapters are already present (depending on FastQC version) case None => val defaultAdapters = getVersion match { - case "v0.11.2" => Option(new File(fastqcDir + "/Configuration/adapter_list.txt")) - case _ => None + case Some("v0.11.2") => Option(new File(fastqcDir + "/Configuration/adapter_list.txt")) + case _ => None } defaultAdapters.collect { case adp => config("adapters", default = adp) } } } + /** return commandline to execute */ def cmdLine = required(executable) + optional("--java", java_exe) + optional("--threads", threads) + diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Ln.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Ln.scala index 6925a963f846c996baffac1ae0cea588cf2f7e79..d90a17b348d91bd474e90dcd91c7f58af77c31ad 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Ln.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Ln.scala @@ -21,6 +21,9 @@ import org.broadinstitute.gatk.queue.function.InProcessFunction import org.broadinstitute.gatk.utils.commandline.{ Input, Output } import nl.lumc.sasc.biopet.core.config.Configurable +/** + * This class can execute ln as InProcessFunction or used to only generate the ln command + */ class Ln(val root: Configurable) extends InProcessFunction with Configurable { this.analysisName = getClass.getSimpleName @@ -35,12 +38,14 @@ class Ln(val root: Configurable) extends InProcessFunction with Configurable { var relative: Boolean = true + /** Generate out file for job */ override def freezeFieldValues(): Unit = { val outLog: String = ".%s.%s.out".format(out.getName, analysisName) jobOutputFile = new File(out.getAbsoluteFile.getParentFile, outLog) super.freezeFieldValues() } + /** return commandline to execute */ lazy val cmd: String = { lazy val inCanonical: String = { // need to remove "/~" to correctly expand path with tilde @@ -113,7 +118,17 @@ class Ln(val root: Configurable) extends InProcessFunction with Configurable { } } +/** Object for constructors for ln */ object Ln { + /** + * Basis constructor + * @param root root object for config + * @param input list of files to use + * @param output output File + * @param relative make reletive links (default true) + * @return + */ + def apply(root: Configurable, input: File, output: File, relative: Boolean = true): Ln = { val ln = new Ln(root) ln.in = input diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Md5sum.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Md5sum.scala index a266719a58af2345badfd5501261cb4473e9fa5f..fa177b8fe4cd52cfee1307dbb4e9aeeb5ded4e08 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Md5sum.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Md5sum.scala @@ -23,6 +23,7 @@ import argonaut._, Argonaut._ import scalaz._, Scalaz._ import scala.io.Source +/** Extension for md5sum */ class Md5sum(val root: Configurable) extends BiopetCommandLineFunction { @Input(doc = "Input") var input: File = _ @@ -32,17 +33,13 @@ class Md5sum(val root: Configurable) extends BiopetCommandLineFunction { executable = config("exe", default = "md5sum") + /** return commandline to execute */ def cmdLine = required(executable) + required(input) + " > " + required(output) - - def getSummary: Json = { - val data = Source.fromFile(output).mkString.split(" ") - return ("path" := output.getAbsolutePath) ->: - ("md5sum" := data(0)) ->: - jEmptyObject - } } +/** Object for constructors for md5sum */ object Md5sum { + /** Makes md5sum with md5 file in given dir */ def apply(root: Configurable, fastqfile: File, outDir: File): Md5sum = { val md5sum = new Md5sum(root) md5sum.input = fastqfile @@ -50,6 +47,7 @@ object Md5sum { return md5sum } + /** Makes md5sum with md5 file in same dir as input file */ def apply(root: Configurable, file: File): Md5sum = { val md5sum = new Md5sum(root) md5sum.input = file diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Pbzip2.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Pbzip2.scala index 03f76abd6162f5759fb772158a5f1dc941012b1c..6756c5eafc7ab5b9a3ba1fd60fc89154bc0aa0df 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Pbzip2.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Pbzip2.scala @@ -20,6 +20,7 @@ import nl.lumc.sasc.biopet.core.config.Configurable import org.broadinstitute.gatk.utils.commandline.{ Input, Output } import java.io.File +/** Extension for pbzip2 */ class Pbzip2(val root: Configurable) extends BiopetCommandLineFunction { @Input(doc = "Zipped file") var input: File = _ @@ -39,6 +40,7 @@ class Pbzip2(val root: Configurable) extends BiopetCommandLineFunction { if (!memory.isEmpty) memory = Option(memory.get * threads) } + /** return commandline to execute */ def cmdLine = required(executable) + conditional(decomrpess, "-d") + conditional(!decomrpess, "-z") + @@ -48,7 +50,9 @@ class Pbzip2(val root: Configurable) extends BiopetCommandLineFunction { required(input) } +/** Object for constructors for Pbzip2 */ object Pbzip2 { + /** Default constructor */ def apply(root: Configurable, input: File, output: File): Pbzip2 = { val pbzip2 = new Pbzip2(root) pbzip2.input = input diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/PythonCommandLineFunction.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/PythonCommandLineFunction.scala index ebfaac812bbedec10a14d0daad609d54469f9c03..cf088cb29fa57d66b1fb9fe24b247054e0ae2edc 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/PythonCommandLineFunction.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/PythonCommandLineFunction.scala @@ -28,6 +28,11 @@ trait PythonCommandLineFunction extends BiopetCommandLineFunction { executable = config("exe", default = "python", submodule = "python") protected var python_script_name: String = _ + + /** + * checks if script already exist in jar otherwise try to fetch from the jar + * @param script name / location of script + */ def setPythonScript(script: String) { python_script = new File(script) if (!python_script.exists()) { @@ -36,6 +41,12 @@ trait PythonCommandLineFunction extends BiopetCommandLineFunction { python_script_name = script } } + + /** + * Set and extract python script from jar file + * @param script name of script in jar + * @param subpackage location of script in jar + */ def setPythonScript(script: String, subpackage: String) { python_script_name = script python_script = new File(".queue/tmp/" + subpackage + python_script_name) @@ -46,6 +57,7 @@ trait PythonCommandLineFunction extends BiopetCommandLineFunction { os.close() } + /** return basic command to prefix the complete command with */ def getPythonCommand(): String = { required(executable) + required(python_script) } diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Raxml.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Raxml.scala index 12a36ad800a36d78d0237cc1358a1891e3a66675..ff7b56110c342c32b30e0af7c67cfc3118240eb5 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Raxml.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Raxml.scala @@ -19,7 +19,12 @@ import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction import nl.lumc.sasc.biopet.core.config.Configurable import org.broadinstitute.gatk.utils.commandline.{ Input, Output, Argument } import java.io.File +import scalaz.std.boolean.option +/** + * extension for raxml + * based on version 8.1.3 + */ class Raxml(val root: Configurable) extends BiopetCommandLineFunction { override val defaultThreads = 1 @@ -50,49 +55,51 @@ class Raxml(val root: Configurable) extends BiopetCommandLineFunction { @Argument(doc = "Output directory", required = true) var w: File = null - var noBfgs: Boolean = config("no_bfgs", default = false) - @Input(required = false) - var t: File = _ + var t: Option[File] = _ @Input(required = false) - var z: File = _ + var z: Option[File] = _ @Output(doc = "Output files", required = false) private var out: List[File] = Nil + var noBfgs: Boolean = config("no_bfgs", default = false) + var executableNonThreads: String = config("exe", default = "raxmlHPC") var executableThreads: Option[String] = config("exe_pthreads") + /** Sets correct output files to job */ override def beforeGraph { require(w != null) if (threads == 0) threads = getThreads(defaultThreads) executable = if (threads > 1 && executableThreads.isDefined) executableThreads.get else executableNonThreads super.beforeGraph - out +:= getInfoFile + out :::= List(Some(getInfoFile), getBestTreeFile, getBootstrapFile, getBipartitionsFile).flatten f match { - case "d" if b.isEmpty => { - out +:= getBestTreeFile - for (t <- 0 until N.getOrElse(1)) { - out +:= new File(w, "RAxML_log." + n + ".RUN." + t) - out +:= new File(w, "RAxML_parsimonyTree." + n + ".RUN." + t) - out +:= new File(w, "RAxML_result." + n + ".RUN." + t) - } - } - case "d" if b.isDefined => out +:= getBootstrapFile - case "b" => { - out +:= new File(w, "RAxML_bipartitionsBranchLabels." + n) - out +:= new File(w, "RAxML_bipartitions." + n) + case "d" if b.isEmpty => for (t <- 0 until N.getOrElse(1)) { + out +:= new File(w, "RAxML_log." + n + ".RUN." + t) + out +:= new File(w, "RAxML_parsimonyTree." + n + ".RUN." + t) + out +:= new File(w, "RAxML_result." + n + ".RUN." + t) } - case _ => + case "b" => out +:= new File(w, "RAxML_bipartitionsBranchLabels." + n) + case _ => } } - def getBestTreeFile: File = new File(w, "RAxML_bestTree." + n) - def getBootstrapFile: File = new File(w, "RAxML_bootstrap." + n) - def getBipartitionsFile: File = new File(w, "RAxML_bipartitions." + n) - def getInfoFile: File = new File(w, "RAxML_info." + n) + /** Returns bestTree file */ + def getBestTreeFile = option(f == "d" && b.isEmpty, new File(w, "RAxML_bestTree." + n)) + + /** Returns bootstrap file */ + def getBootstrapFile = option(f == "d" && b.isDefined, new File(w, "RAxML_bootstrap." + n)) + + /** Returns bipartitions file */ + def getBipartitionsFile = option(f == "b", new File(w, "RAxML_bipartitions." + n)) + + /** Returns info file */ + def getInfoFile = new File(w, "RAxML_info." + n) + /** return commandline to execute */ def cmdLine = required(executable) + required("-m", m) + required("-s", input) + diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/RunGubbins.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/RunGubbins.scala index 454572dab193e18d10056a516d41555e75bc87f6..df69543eaa4b4b4667786dc6b2008a85b28faf5e 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/RunGubbins.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/RunGubbins.scala @@ -21,6 +21,11 @@ import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction import nl.lumc.sasc.biopet.core.config.Configurable import org.broadinstitute.gatk.utils.commandline.{ Argument, Input, Output } +/** + * Extension for gubbins + * See; https://github.com/sanger-pathogens/gubbins + * No version known + */ class RunGubbins(val root: Configurable) extends BiopetCommandLineFunction { @Input(doc = "Contaminants", required = false) @@ -47,6 +52,7 @@ class RunGubbins(val root: Configurable) extends BiopetCommandLineFunction { var verbose: Boolean = config("verbose", default = false) var noCleanup: Boolean = config("no_cleanup", default = false) + /** Set correct output files */ override def beforeGraph: Unit = { super.beforeGraph require(outputDirectory != null) @@ -63,6 +69,7 @@ class RunGubbins(val root: Configurable) extends BiopetCommandLineFunction { for (t <- out) outputFiles ::= new File(outputDirectory + File.separator + prefix.getOrElse("gubbins") + t) } + /** Return command to execute */ def cmdLine = required("cd", outputDirectory) + " && " + required(executable) + optional("--outgroup", outgroup) + optional("--starting_tree", startingTree) + diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Sha1sum.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Sha1sum.scala index 44cb16ec0f2b1d8077b471fbd0f313d9ca9319ca..91eb8458df0cbd820b3d5bb7ecec9613882d2cf4 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Sha1sum.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Sha1sum.scala @@ -23,6 +23,7 @@ import argonaut._, Argonaut._ import scalaz._, Scalaz._ import scala.io.Source +/** Extension for sha1sum */ class Sha1sum(val root: Configurable) extends BiopetCommandLineFunction { @Input(doc = "Input file") var input: File = _ @@ -32,21 +33,16 @@ class Sha1sum(val root: Configurable) extends BiopetCommandLineFunction { executable = config("exe", default = "sha1sum") + /** Set correct output files */ def cmdLine = required(executable) + required(input) + " > " + required(output) - - def getSummary: Json = { - val data = Source.fromFile(output).mkString.split(" ") - return ("path" := output.getAbsolutePath) ->: - ("sha1sum" := data(0)) ->: - jEmptyObject - } } object Sha1sum { - def apply(root: Configurable, fastqfile: File, outDir: String): Sha1sum = { + /** Create default sha1sum */ + def apply(root: Configurable, input: File, outDir: File): Sha1sum = { val sha1sum = new Sha1sum(root) - sha1sum.input = fastqfile - sha1sum.output = new File(outDir + fastqfile.getName + ".sha1") + sha1sum.input = input + sha1sum.output = new File(outDir, input.getName + ".sha1") return sha1sum } } diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Sickle.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Sickle.scala index fae8c5802c3979af507421df955a91400d21923d..2ff9e6947bc7366180aaa6236b78768eadcad039 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Sickle.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Sickle.scala @@ -25,6 +25,10 @@ import org.broadinstitute.gatk.utils.commandline.{ Input, Output } import scala.collection.mutable import scala.io.Source +/** + * Extension for sickle + * Based on version 1.33 + */ class Sickle(val root: Configurable) extends BiopetCommandLineFunction with Summarizable { @Input(doc = "R1 input") var input_R1: File = _ @@ -57,10 +61,12 @@ class Sickle(val root: Configurable) extends BiopetCommandLineFunction with Summ override val versionRegex = """sickle version (.*)""".r override def versionCommand = executable + " --version" + /** Sets qualityType is still empty */ override def beforeGraph { if (qualityType.isEmpty) qualityType = Some(defaultQualityType) } + /** Return command to execute */ def cmdLine = { var cmd: String = required(executable) if (input_R2 != null) { @@ -81,6 +87,7 @@ class Sickle(val root: Configurable) extends BiopetCommandLineFunction with Summ " > " + required(output_stats) } + /** returns stats map for summary */ def summaryStats: Map[String, Any] = { val pairKept = """FastQ paired records kept: (\d*) \((\d*) pairs\)""".r val singleKept = """FastQ single records kept: (\d*) \(from PE1: (\d*), from PE2: (\d*)\)""".r @@ -108,6 +115,7 @@ class Sickle(val root: Configurable) extends BiopetCommandLineFunction with Summ stats.toMap } + /** Merge stats incase of chunking */ override def resolveSummaryConflict(v1: Any, v2: Any, key: String): Any = { (v1, v2) match { case (v1: Int, v2: Int) => v1 + v2 diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Stampy.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Stampy.scala index bb412a8fbc23a41bf64b905e349b4cc0787c6145..a9cbbd6689ea26b7ab799131df6a858e35e5c571 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Stampy.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Stampy.scala @@ -21,6 +21,7 @@ import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction import nl.lumc.sasc.biopet.core.config.Configurable import org.broadinstitute.gatk.utils.commandline.{ Input, Output } +/** Extension for stampy */ class Stampy(val root: Configurable) extends BiopetCommandLineFunction { @Input(doc = "FastQ file R1", shortName = "R1") var R1: File = _ @@ -68,12 +69,14 @@ class Stampy(val root: Configurable) extends BiopetCommandLineFunction { override def versionCommand = executable + " --help" + /** Sets readgroup when not set yet */ override def beforeGraph: Unit = { super.beforeGraph require(readgroup != null) } - def cmdLine: String = { + /** Returns command to execute */ + def cmdLine = { var cmd: String = required(executable) + optional("-t", nCoresRequest) + conditional(solexa, "--solexa") + @@ -99,6 +102,6 @@ class Stampy(val root: Configurable) extends BiopetCommandLineFunction { " -h " + required(hash) + " -o " + required(output) + " -M " + required(R1) + optional(R2) - return cmd + cmd } } diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Star.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Star.scala index 9404a895006398d7cd3cca35b3259846af1d6aa0..57f94660b47088756cc589bd8446cba389971106 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Star.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Star.scala @@ -21,6 +21,9 @@ import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction import nl.lumc.sasc.biopet.core.config.Configurable import org.broadinstitute.gatk.utils.commandline.{ Argument, Input, Output } +/** + * Extension for STAR + */ class Star(val root: Configurable) extends BiopetCommandLineFunction { @Input(doc = "The reference file for the bam files.", required = false) var reference: File = new File(config("reference")) @@ -62,6 +65,7 @@ class Star(val root: Configurable) extends BiopetCommandLineFunction { override val defaultVmem = "6G" override val defaultThreads = 8 + /** Sets output files for the graph */ override def beforeGraph() { if (outFileNamePrefix != null && !outFileNamePrefix.endsWith(".")) outFileNamePrefix += "." val prefix = if (outFileNamePrefix != null) outputDir + outFileNamePrefix else outputDir @@ -77,7 +81,8 @@ class Star(val root: Configurable) extends BiopetCommandLineFunction { } } - def cmdLine: String = { + /** Returns command to execute */ + def cmdLine = { var cmd: String = required("cd", outputDir) + "&&" + required(executable) if (runmode != null && runmode == "genomeGenerate") { // Create index cmd += required("--runMode", runmode) + @@ -91,11 +96,22 @@ class Star(val root: Configurable) extends BiopetCommandLineFunction { optional("--outFileNamePrefix", outFileNamePrefix) if (sjdbOverhang > 0) cmd += optional("--sjdbOverhang", sjdbOverhang) - return cmd + cmd } } object Star { + /** + * Create default star + * @param configurable root object + * @param R1 R1 fastq file + * @param R2 R2 fastq file + * @param outputDir Outputdir for Star + * @param isIntermediate + * @param deps Deps to add to wait on run + * @return Return Star + * + */ def apply(configurable: Configurable, R1: File, R2: File, outputDir: File, isIntermediate: Boolean = false, deps: List[File] = Nil): Star = { val star = new Star(configurable) star.R1 = R1 @@ -107,7 +123,22 @@ object Star { return star } - def _2pass(configurable: Configurable, R1: File, R2: File, outputDir: File, isIntermediate: Boolean = false, deps: List[File] = Nil): (File, List[Star]) = { + /** + * returns Star with 2pass star method + * @param configurable root object + * @param R1 R1 fastq file + * @param R2 R2 fastq file + * @param outputDir Outputdir for Star + * @param isIntermediate + * @param deps Deps to add to wait on run + * @return Return Star + */ + def _2pass(configurable: Configurable, + R1: File, + R2: File, + outputDir: File, + isIntermediate: Boolean = false, + deps: List[File] = Nil): (File, List[Star]) = { val starCommand_pass1 = Star(configurable, R1, if (R2 != null) R2 else null, new File(outputDir, "aln-pass1")) starCommand_pass1.isIntermediate = isIntermediate starCommand_pass1.deps = deps diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/TopHat.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/TopHat.scala index ce86a1ed9bd1e8d1afa04867887be2f21cf6231b..169c3274c9d68948db70111ae0aa3cf9091f0a96 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/TopHat.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/TopHat.scala @@ -21,6 +21,9 @@ import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction import nl.lumc.sasc.biopet.core.config.Configurable import org.broadinstitute.gatk.utils.commandline.{ Argument, Input, Output } +/** + * Extension for TopHad + */ class TopHat(val root: Configurable) extends BiopetCommandLineFunction { @Input(doc = "FastQ file R1", shortName = "R1") var R1: File = _ @@ -70,11 +73,10 @@ class TopHat(val root: Configurable) extends BiopetCommandLineFunction { } def cmdLine: String = { - var cmd: String = required(executable) + + required(executable) + optional("-p", nCoresRequest) + "--no-convert-bam" + required(bowtie_index) + required(R1) + optional(R2) - return cmd } } diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/VariantEffectPredictor.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/VariantEffectPredictor.scala index 1ece3133f711a4b261a6c7586d76567da10d15d7..7e6c53fb7fda7a693334920af1954f9947294d5f 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/VariantEffectPredictor.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/VariantEffectPredictor.scala @@ -7,6 +7,7 @@ import nl.lumc.sasc.biopet.core.config.Configurable import org.broadinstitute.gatk.utils.commandline.{ Output, Input } /** + * Extension for VariantEffectPredictor * Created by ahbbollen on 15-1-15. */ class VariantEffectPredictor(val root: Configurable) extends BiopetCommandLineFunction { @@ -134,6 +135,7 @@ class VariantEffectPredictor(val root: Configurable) extends BiopetCommandLineFu } } + /** Returns command to execute */ def cmdLine = required(executable) + required(vep_script) + required("-i", input) + diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/WigToBigWig.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/WigToBigWig.scala index 9c98893c9a4f22216cbcc57041bf7ca65b50be7e..5fcea15723224c7d3da5e3c78193d5ad4de32f75 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/WigToBigWig.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/WigToBigWig.scala @@ -27,6 +27,7 @@ class WigToBigWig(val root: Configurable) extends BiopetCommandLineFunction { var clip: Boolean = config("clip", default = false) var unc: Boolean = config("unc", default = false) + /** Returns command to execute */ def cmdLine = required(executable) + optional("-blockSize=", blockSize, spaceSeparated = false) + optional("-itemsPerSlot=", itemsPerSlot, spaceSeparated = false) + diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Zcat.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Zcat.scala index 7b1aeb0770be29991d791b07db5b937372d9e565..e8fa8cd7cbddf8714975386fb0d7a426c24144bc 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Zcat.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Zcat.scala @@ -20,6 +20,7 @@ import nl.lumc.sasc.biopet.core.config.Configurable import org.broadinstitute.gatk.utils.commandline.{ Input, Output } import java.io.File +/** Extension for zcat */ class Zcat(val root: Configurable) extends BiopetCommandLineFunction { @Input(doc = "Zipped file") var input: File = _ @@ -29,10 +30,12 @@ class Zcat(val root: Configurable) extends BiopetCommandLineFunction { executable = config("exe", default = "zcat") + /** Returns command to execute */ def cmdLine = required(executable) + required(input) + " > " + required(output) } object Zcat { + /** Returns a default zcat */ def apply(root: Configurable, input: File, output: File): Zcat = { val zcat = new Zcat(root) zcat.input = input diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/bedtools/Bedtools.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/bedtools/Bedtools.scala index cf7f16e381093eb0e4a547434b4e119936cd44b6..d9aa1dd948077509e73494b09046f7cd377f3398 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/bedtools/Bedtools.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/bedtools/Bedtools.scala @@ -17,6 +17,7 @@ package nl.lumc.sasc.biopet.extensions.bedtools import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction +/** General abstract class for bedtools extensions */ abstract class Bedtools extends BiopetCommandLineFunction { override def subPath = "bedtools" :: super.subPath executable = config("exe", default = "bedtools", submodule = "bedtools") diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/bedtools/BedtoolsCoverage.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/bedtools/BedtoolsCoverage.scala index 37266ccb1155726772fe2cca51280b4dbd273f5c..73ddfc8037cd1c44b865ebd6afd63aa202cb6872 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/bedtools/BedtoolsCoverage.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/bedtools/BedtoolsCoverage.scala @@ -19,6 +19,7 @@ import nl.lumc.sasc.biopet.core.config.Configurable import org.broadinstitute.gatk.utils.commandline.{ Input, Output, Argument } import java.io.File +/** Extension for bedtools coverage */ class BedtoolsCoverage(val root: Configurable) extends Bedtools { @Input(doc = "Input file (bed/gff/vcf/bam)") var input: File = _ @@ -44,6 +45,7 @@ class BedtoolsCoverage(val root: Configurable) extends Bedtools { if (input.getName.endsWith(".bam")) inputTag = "-abam" } + /** Returns command to execute */ def cmdLine = required(executable) + required("coverage") + required(inputTag, input) + required("-b", intersectFile) + @@ -54,6 +56,7 @@ class BedtoolsCoverage(val root: Configurable) extends Bedtools { } object BedtoolsCoverage { + /** Returns defaul bedtools coverage */ def apply(root: Configurable, input: File, intersect: File, output: File, depth: Boolean = true, sameStrand: Boolean = false, diffStrand: Boolean = false): BedtoolsCoverage = { val bedtoolsCoverage = new BedtoolsCoverage(root) @@ -63,6 +66,6 @@ object BedtoolsCoverage { bedtoolsCoverage.depth = depth bedtoolsCoverage.sameStrand = sameStrand bedtoolsCoverage.diffStrand = diffStrand - return bedtoolsCoverage + bedtoolsCoverage } } \ No newline at end of file diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/bedtools/BedtoolsIntersect.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/bedtools/BedtoolsIntersect.scala index 19071be64a67068f7c734245cffc68008b891a3e..8465efca792455dbeaeea1a0c74ef29fcc727380 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/bedtools/BedtoolsIntersect.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/bedtools/BedtoolsIntersect.scala @@ -19,6 +19,7 @@ import nl.lumc.sasc.biopet.core.config.Configurable import org.broadinstitute.gatk.utils.commandline.{ Input, Output, Argument } import java.io.File +/** Extension for bedtools intersect */ class BedtoolsIntersect(val root: Configurable) extends Bedtools { @Input(doc = "Input file (bed/gff/vcf/bam)") var input: File = _ @@ -41,6 +42,7 @@ class BedtoolsIntersect(val root: Configurable) extends Bedtools { if (input.getName.endsWith(".bam")) inputTag = "-abam" } + /** Returns command to execute */ def cmdLine = required(executable) + required("intersect") + required(inputTag, input) + required("-b", intersectFile) + @@ -50,6 +52,7 @@ class BedtoolsIntersect(val root: Configurable) extends Bedtools { } object BedtoolsIntersect { + /** Returns default bedtools intersect */ def apply(root: Configurable, input: File, intersect: File, output: File, minOverlap: Double = 0, count: Boolean = false): BedtoolsIntersect = { val bedtoolsIntersect = new BedtoolsIntersect(root) diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/bwa/Bwa.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/bwa/Bwa.scala index 3e0ffc8011a1669b3205157cce5bde5d3d5053f1..35299b3ef40682a5841ef3a1f0bf325b41d10c12 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/bwa/Bwa.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/bwa/Bwa.scala @@ -3,6 +3,8 @@ package nl.lumc.sasc.biopet.extensions.bwa import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction /** + * General bwa extension + * * Created by pjvan_thof on 1/16/15. */ abstract class Bwa extends BiopetCommandLineFunction { diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/bwa/BwaAln.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/bwa/BwaAln.scala index 3298702fcfd5cd98e1eaf0bae8f6b059d353d2c9..945411d77b32364002d278df27d0154c3fed47ef 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/bwa/BwaAln.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/bwa/BwaAln.scala @@ -6,6 +6,10 @@ import nl.lumc.sasc.biopet.core.config.Configurable import org.broadinstitute.gatk.utils.commandline.{ Output, Input } /** + * Extension for bwa aln + * + * Based on version 0.7.12-r1039 + * * Created by pjvan_thof on 1/16/15. */ class BwaAln(val root: Configurable) extends Bwa { @@ -44,6 +48,7 @@ class BwaAln(val root: Configurable) extends Bwa { override val defaultVmem = "5G" override val defaultThreads = 8 + /** Returns command to execute */ def cmdLine = required(executable) + required("aln") + optional("-n", n) + diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/bwa/BwaMem.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/bwa/BwaMem.scala index 731cdc2787edebf111cdc703393ad40937cbb032..3eb1cd45055183ad4174de09bc2f457f08c93c3c 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/bwa/BwaMem.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/bwa/BwaMem.scala @@ -22,6 +22,11 @@ import nl.lumc.sasc.biopet.core.config.Configurable import nl.lumc.sasc.biopet.core.summary.Summarizable import org.broadinstitute.gatk.utils.commandline.{ Input, Output } +/** + * Extension for bwa mem + * + * Based on version 0.7.12-r1039 + */ class BwaMem(val root: Configurable) extends Bwa { @Input(doc = "Fastq file R1", shortName = "R1") var R1: File = _ diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/bwa/BwaSampe.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/bwa/BwaSampe.scala index 255811561e732b229b1417c95d866fad4694c0c9..7e190e59309ae3901e90aa34c6e54cd101be933b 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/bwa/BwaSampe.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/bwa/BwaSampe.scala @@ -10,7 +10,6 @@ import org.broadinstitute.gatk.utils.commandline.{ Output, Input } * * based on executable version 0.7.10-r789 * - * @param root Configurable */ class BwaSampe(val root: Configurable) extends Bwa { @Input(doc = "Fastq file R1", required = true) diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/bwa/BwaSamse.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/bwa/BwaSamse.scala index 8bbf918474e21d2e2837ce01d5bc4490e13bd087..b302425d9a1c7e72fdea9c69fa8417f0b7885c6c 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/bwa/BwaSamse.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/bwa/BwaSamse.scala @@ -6,7 +6,10 @@ import nl.lumc.sasc.biopet.core.config.Configurable import org.broadinstitute.gatk.utils.commandline.{ Output, Input } /** - * Created by pjvan_thof on 1/16/15. + * BWA samse wrapper + * + * based on executable version 0.7.10-r789 + * */ class BwaSamse(val root: Configurable) extends Bwa { @Input(doc = "Fastq file", required = true) @@ -24,6 +27,7 @@ class BwaSamse(val root: Configurable) extends Bwa { var n: Option[Int] = config("n") var r: String = _ + /** Returns command to execute */ def cmdLine = required(executable) + required("samse") + optional("-n", n) + diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/igvtools/IGVTools.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/igvtools/IGVTools.scala index aff6ee16cdfbf1a77529d079bc1789c9ffcc494b..c232c03f8674c1eb600c53dd689ac2055df6e16b 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/igvtools/IGVTools.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/igvtools/IGVTools.scala @@ -6,6 +6,7 @@ package nl.lumc.sasc.biopet.extensions.igvtools import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction +/** General igvtools extension */ abstract class IGVTools extends BiopetCommandLineFunction { executable = config("exe", default = "igvtools", submodule = "igvtools", freeVar = false) override def versionCommand = executable + " version" diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/igvtools/IGVToolsCount.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/igvtools/IGVToolsCount.scala index 37589c5d2b32caf210cec84204bb81e1f2fbd030..fc9b404c09bde8db694fc0cdda1c676ecbfa601e 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/igvtools/IGVToolsCount.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/igvtools/IGVToolsCount.scala @@ -61,6 +61,7 @@ class IGVToolsCount(val root: Configurable) extends IGVTools { throw new IllegalArgumentException("TDF file should have a .tdf file-extension")) } + /** Returns command to execute */ def cmdLine = { required(executable) + required("count") + @@ -81,10 +82,7 @@ class IGVToolsCount(val root: Configurable) extends IGVTools { required(genomeChromSizes) } - /** - * This part should never fail, these values are set within this wrapper - * - */ + /** This part should never fail, these values are set within this wrapper */ private def outputArg: String = { (tdf, wig) match { case (None, None) => throw new IllegalArgumentException("Either TDF or WIG should be supplied"); diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/macs2/Macs2.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/macs2/Macs2.scala index 5c52970ed8cc0c049ef236f571acedbb8a4610dd..8163d47aa4228b691d1300ec80d280a174b33052 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/macs2/Macs2.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/macs2/Macs2.scala @@ -3,6 +3,8 @@ package nl.lumc.sasc.biopet.extensions.macs2 import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction /** + * General igvtools extension + * * Created by sajvanderzeeuw on 12/19/14. */ abstract class Macs2 extends BiopetCommandLineFunction { diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/macs2/Macs2CallPeak.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/macs2/Macs2CallPeak.scala index 6c572f2292bf3813b254d36370f759cb3e225b84..fbb003a3a9265ee08321ccdeec9723c1a46db7da 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/macs2/Macs2CallPeak.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/macs2/Macs2CallPeak.scala @@ -5,6 +5,7 @@ import java.io.File import nl.lumc.sasc.biopet.core.config.Configurable import org.broadinstitute.gatk.utils.commandline.{ Output, Input } +/** Extension for macs2*/ class Macs2CallPeak(val root: Configurable) extends Macs2 { @Input(doc = "Treatment input", required = true) var treatment: File = _ @@ -56,6 +57,7 @@ class Macs2CallPeak(val root: Configurable) extends Macs2 { var broadcutoff: Option[Int] = config("broadcutoff") var callsummits: Boolean = config("callsummits", default = false) + /** Sets output files */ override def beforeGraph: Unit = { if (name.isEmpty) throw new IllegalArgumentException("Name is not defined") if (outputdir == null) throw new IllegalArgumentException("Outputdir is not defined") @@ -67,6 +69,7 @@ class Macs2CallPeak(val root: Configurable) extends Macs2 { output_gapped = new File(outputdir + name.get + ".gappedPeak") } + /** Returns command to execute */ def cmdLine = { required(executable) + required("callpeak") + required("--treatment", treatment) + /* Treatment sample */ diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/AddOrReplaceReadGroups.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/AddOrReplaceReadGroups.scala index c980492349c480976e7240db0e3f83f884306b4b..232cbc24123983bc46f7c51f98284be10bb6f2a8 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/AddOrReplaceReadGroups.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/AddOrReplaceReadGroups.scala @@ -19,6 +19,7 @@ import java.io.File import nl.lumc.sasc.biopet.core.config.Configurable import org.broadinstitute.gatk.utils.commandline.{ Input, Output, Argument } +/** Extension for picard AddOrReplaceReadGroups */ class AddOrReplaceReadGroups(val root: Configurable) extends Picard { javaMainClass = "picard.sam.AddOrReplaceReadGroups" @@ -61,6 +62,7 @@ class AddOrReplaceReadGroups(val root: Configurable) extends Picard { @Argument(doc = "RGPI", required = false) var RGPI: Option[Int] = _ + /** Returns command to execute */ override def commandLine = super.commandLine + required("INPUT=", input, spaceSeparated = false) + required("OUTPUT=", output, spaceSeparated = false) + @@ -77,6 +79,7 @@ class AddOrReplaceReadGroups(val root: Configurable) extends Picard { } object AddOrReplaceReadGroups { + /** Returns default AddOrReplaceReadGroups */ def apply(root: Configurable, input: File, output: File, sortOrder: String = null): AddOrReplaceReadGroups = { val addOrReplaceReadGroups = new AddOrReplaceReadGroups(root) addOrReplaceReadGroups.input = input diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CalculateHsMetrics.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CalculateHsMetrics.scala index 4aa970f636ddbbb7216abce5d96ebbbebc8a7598..974fd43c5ad00fd0c8fe15955c7c9379417d2595 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CalculateHsMetrics.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CalculateHsMetrics.scala @@ -19,6 +19,7 @@ import java.io.File import nl.lumc.sasc.biopet.core.config.Configurable import org.broadinstitute.gatk.utils.commandline.{ Input, Output, Argument } +/** Extension for picard CalculateHsMetrics */ class CalculateHsMetrics(val root: Configurable) extends Picard { javaMainClass = "picard.analysis.directed.CalculateHsMetrics" @@ -46,6 +47,7 @@ class CalculateHsMetrics(val root: Configurable) extends Picard { @Argument(doc = "BAIT_SET_NAME", required = false) var baitSetName: String = _ + /** Returns command to execute */ override def commandLine = super.commandLine + required("INPUT=", input, spaceSeparated = false) + required("OUTPUT=", output, spaceSeparated = false) + @@ -58,6 +60,7 @@ class CalculateHsMetrics(val root: Configurable) extends Picard { } object CalculateHsMetrics { + /** Returns default CalculateHsMetrics */ def apply(root: Configurable, input: File, baitIntervals: File, targetIntervals: File, outputDir: File): CalculateHsMetrics = { val calculateHsMetrics = new CalculateHsMetrics(root) calculateHsMetrics.input = input diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectAlignmentSummaryMetrics.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectAlignmentSummaryMetrics.scala index 53b109b8c4fc549924b8f494993f661ec09598ad..692a260f4c3d238902e2cac34f7885a9dcae6c9f 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectAlignmentSummaryMetrics.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectAlignmentSummaryMetrics.scala @@ -20,6 +20,7 @@ import nl.lumc.sasc.biopet.core.config.Configurable import nl.lumc.sasc.biopet.core.summary.Summarizable import org.broadinstitute.gatk.utils.commandline.{ Input, Output, Argument } +/** Extension for picard CollectAlignmentSummaryMetrics */ class CollectAlignmentSummaryMetrics(val root: Configurable) extends Picard with Summarizable { javaMainClass = "picard.analysis.CollectAlignmentSummaryMetrics" @@ -50,6 +51,7 @@ class CollectAlignmentSummaryMetrics(val root: Configurable) extends Picard with @Argument(doc = "STOP_AFTER", required = false) var stopAfter: Option[Long] = config("stopAfter") + /** Returns command to execute */ override def commandLine = super.commandLine + required("INPUT=", input, spaceSeparated = false) + required("OUTPUT=", output, spaceSeparated = false) + @@ -63,6 +65,7 @@ class CollectAlignmentSummaryMetrics(val root: Configurable) extends Picard with def summaryFiles: Map[String, File] = Map() + /** Returns stats for summary */ def summaryStats: Map[String, Any] = { val (header, content) = Picard.getMetrics(output) @@ -79,6 +82,7 @@ class CollectAlignmentSummaryMetrics(val root: Configurable) extends Picard with } object CollectAlignmentSummaryMetrics { + /** Returns default CollectAlignmentSummaryMetrics */ def apply(root: Configurable, input: File, outputDir: File): CollectAlignmentSummaryMetrics = { val collectAlignmentSummaryMetrics = new CollectAlignmentSummaryMetrics(root) collectAlignmentSummaryMetrics.input = input diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectGcBiasMetrics.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectGcBiasMetrics.scala index 86198d2a344489a62b546f5d9c0dd4a8234c929a..f93dc4aa77241dabfee7c0703536fda9cb6afa2c 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectGcBiasMetrics.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectGcBiasMetrics.scala @@ -19,6 +19,7 @@ import java.io.File import nl.lumc.sasc.biopet.core.config.Configurable import org.broadinstitute.gatk.utils.commandline.{ Input, Output, Argument } +/** Extension for picard CollectGcBiasMetrics */ class CollectGcBiasMetrics(val root: Configurable) extends Picard { javaMainClass = "picard.analysis.CollectGcBiasMetrics" @@ -51,9 +52,9 @@ class CollectGcBiasMetrics(val root: Configurable) extends Picard { override def beforeGraph { if (outputChart == null) outputChart = new File(output + ".pdf") - //require(reference.exists) } + /** Returns command to execute */ override def commandLine = super.commandLine + repeat("INPUT=", input, spaceSeparated = false) + required("OUTPUT=", output, spaceSeparated = false) + @@ -67,6 +68,7 @@ class CollectGcBiasMetrics(val root: Configurable) extends Picard { } object CollectGcBiasMetrics { + /** Returns default CollectGcBiasMetrics */ def apply(root: Configurable, input: File, outputDir: File): CollectGcBiasMetrics = { val collectGcBiasMetrics = new CollectGcBiasMetrics(root) collectGcBiasMetrics.input :+= input diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectInsertSizeMetrics.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectInsertSizeMetrics.scala index 867e6f6802493a9dfdcf767ef44bde8d8b61abfb..b4dcba1a5aa3a44b2046643341d3124d5bc2e3cc 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectInsertSizeMetrics.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectInsertSizeMetrics.scala @@ -22,6 +22,7 @@ import org.broadinstitute.gatk.utils.commandline.{ Input, Output, Argument } import scala.collection.immutable.Nil +/** Extension for picard CollectInsertSizeMetrics */ class CollectInsertSizeMetrics(val root: Configurable) extends Picard with Summarizable { javaMainClass = "picard.analysis.CollectInsertSizeMetrics" @@ -57,10 +58,9 @@ class CollectInsertSizeMetrics(val root: Configurable) extends Picard with Summa override def beforeGraph { outputHistogram = new File(output + ".pdf") - //if (outputHistogram == null) outputHistogram = new File(output + ".pdf") - //require(reference.exists) } + /** Returns command to execute */ override def commandLine = super.commandLine + required("INPUT=", input, spaceSeparated = false) + required("OUTPUT=", output, spaceSeparated = false) + @@ -72,8 +72,10 @@ class CollectInsertSizeMetrics(val root: Configurable) extends Picard with Summa optional("HISTOGRAM_WIDTH=", histogramWidth, spaceSeparated = false) + conditional(assumeSorted, "ASSUME_SORTED=TRUE") + /** Returns files for summary */ def summaryFiles: Map[String, File] = Map("output_histogram" -> outputHistogram) + /** Returns stats for summary */ def summaryStats: Map[String, Any] = { val (header, content) = Picard.getMetrics(output) (for (i <- 0 to header.size if i < content.head.size) @@ -82,6 +84,7 @@ class CollectInsertSizeMetrics(val root: Configurable) extends Picard with Summa } object CollectInsertSizeMetrics { + /** Returns default CollectInsertSizeMetrics */ def apply(root: Configurable, input: File, outputDir: File): CollectInsertSizeMetrics = { val collectInsertSizeMetrics = new CollectInsertSizeMetrics(root) collectInsertSizeMetrics.input = input diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/MarkDuplicates.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/MarkDuplicates.scala index fd0f28e73e1149d57d000a9082132c909c00f522..1511fa93f46870bb8e89505c50bc2ecd86fa0c22 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/MarkDuplicates.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/MarkDuplicates.scala @@ -20,6 +20,7 @@ import nl.lumc.sasc.biopet.core.config.Configurable import nl.lumc.sasc.biopet.core.summary.Summarizable import org.broadinstitute.gatk.utils.commandline.{ Input, Output, Argument } +/** Extension for picard MarkDuplicates */ class MarkDuplicates(val root: Configurable) extends Picard with Summarizable { javaMainClass = "picard.sam.MarkDuplicates" @@ -76,6 +77,7 @@ class MarkDuplicates(val root: Configurable) extends Picard with Summarizable { if (createIndex) outputIndex = new File(output.getAbsolutePath.stripSuffix(".bam") + ".bai") } + /** Returns command to execute */ override def commandLine = super.commandLine + repeat("INPUT=", input, spaceSeparated = false) + required("OUTPUT=", output, spaceSeparated = false) + @@ -93,8 +95,10 @@ class MarkDuplicates(val root: Configurable) extends Picard with Summarizable { optional("READ_NAME_REGEX=", readNameRegex, spaceSeparated = false) + optional("OPTICAL_DUPLICATE_PIXEL_DISTANCE=", opticalDuplicatePixelDistance, spaceSeparated = false) + /** Returns files for summary */ def summaryFiles: Map[String, File] = Map() + /** Returns stats for summary */ def summaryStats: Map[String, Any] = { val (header, content) = Picard.getMetrics(outputMetrics) @@ -110,19 +114,12 @@ class MarkDuplicates(val root: Configurable) extends Picard with Summarizable { } } object MarkDuplicates { - def apply(root: Configurable, input: List[File], outputDir: String): MarkDuplicates = { - val markDuplicates = new MarkDuplicates(root) - markDuplicates.input = input - markDuplicates.output = new File(outputDir, input.head.getName.stripSuffix(".bam") + ".dedup.bam") - markDuplicates.outputMetrics = new File(outputDir, input.head.getName.stripSuffix(".bam") + ".dedup.metrics") - return markDuplicates - } - + /** Returns default MarkDuplicates */ def apply(root: Configurable, input: List[File], output: File): MarkDuplicates = { val markDuplicates = new MarkDuplicates(root) markDuplicates.input = input markDuplicates.output = output markDuplicates.outputMetrics = new File(output.getParent, output.getName.stripSuffix(".bam") + ".metrics") - return markDuplicates + markDuplicates } } \ No newline at end of file diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/MergeSamFiles.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/MergeSamFiles.scala index 8feaf782ac57aba5a6779e515fe2c6ce452f5871..ef3f358ab1598680a5f7b1fcb690232dd020bbe6 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/MergeSamFiles.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/MergeSamFiles.scala @@ -19,6 +19,7 @@ import java.io.File import nl.lumc.sasc.biopet.core.config.Configurable import org.broadinstitute.gatk.utils.commandline.{ Input, Output, Argument } +/** Extension for picard MergeSamFiles */ class MergeSamFiles(val root: Configurable) extends Picard { javaMainClass = "picard.sam.MergeSamFiles" @@ -43,6 +44,7 @@ class MergeSamFiles(val root: Configurable) extends Picard { @Argument(doc = "COMMENT", required = false) var comment: Option[String] = config("comment") + /** Returns command to execute */ override def commandLine = super.commandLine + repeat("INPUT=", input, spaceSeparated = false) + required("OUTPUT=", output, spaceSeparated = false) + @@ -53,12 +55,13 @@ class MergeSamFiles(val root: Configurable) extends Picard { } object MergeSamFiles { + /** Returns default MergeSamFiles */ def apply(root: Configurable, input: List[File], outputDir: File, sortOrder: String = null): MergeSamFiles = { val mergeSamFiles = new MergeSamFiles(root) mergeSamFiles.input = input mergeSamFiles.output = new File(outputDir, input.head.getName.stripSuffix(".bam").stripSuffix(".sam") + ".merge.bam") if (sortOrder == null) mergeSamFiles.sortOrder = "coordinate" else mergeSamFiles.sortOrder = sortOrder - return mergeSamFiles + mergeSamFiles } } \ No newline at end of file diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/Picard.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/Picard.scala index 12d9f05f6998fcac6462c3b65a2695a8471a73eb..035194d4836da5f9da66dc3e7328bfc7a1303131 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/Picard.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/Picard.scala @@ -22,9 +22,16 @@ import org.broadinstitute.gatk.utils.commandline.{ Argument } import scala.io.Source +/** + * General picard extension + * + * This is based on using class files directly from the jar, if needed other picard jar can be used + */ abstract class Picard extends BiopetJavaCommandLineFunction { override def subPath = "picard" :: super.subPath + if (config.contains("picard_jar")) jarFile = config("picard_jar") + @Argument(doc = "VERBOSITY", required = false) var verbosity: Option[String] = config("verbosity") @@ -46,10 +53,12 @@ abstract class Picard extends BiopetJavaCommandLineFunction { @Argument(doc = "CREATE_MD5_FILE", required = false) var createMd5: Boolean = config("createmd5", default = false) - //FIXME: picard version - // override def versionCommand = executable + " " + javaOpts + " " + javaExecutable + " -h" - // override val versionRegex = """Version: (.*)""".r - // override val versionExitcode = List(0, 1) + override def versionCommand = { + if (jarFile != null) executable + " -cp " + jarFile + " " + javaMainClass + " -h" + else null + } + override val versionRegex = """Version: (.*)""".r + override val versionExitcode = List(0, 1) override val defaultVmem = "8G" memoryLimit = Option(3.0) @@ -66,6 +75,11 @@ abstract class Picard extends BiopetJavaCommandLineFunction { } object Picard { + /** + * This function parse a metrics file in separated values + * @param file input metrics file + * @return (header, content) + */ def getMetrics(file: File) = { val lines = Source.fromFile(file).getLines().toArray diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/SamToFastq.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/SamToFastq.scala index c3a4226d01cbbb0680a22e3e0c6ccaeb68b7398c..96bfdf4223851b83d137a066cfbf3df8d222659e 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/SamToFastq.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/SamToFastq.scala @@ -19,6 +19,7 @@ import java.io.File import nl.lumc.sasc.biopet.core.config.Configurable import org.broadinstitute.gatk.utils.commandline.{ Input, Output, Argument } +/** Extension for picard SamToFastq */ class SamToFastq(val root: Configurable) extends Picard { javaMainClass = "picard.sam.SamToFastq" @@ -70,6 +71,7 @@ class SamToFastq(val root: Configurable) extends Picard { @Argument(doc = "includeNonPrimaryAlignments", required = false) var includeNonPrimaryAlignments: Boolean = config("includeNonPrimaryAlignments", default = false) + /** Returns command to execute */ override def commandLine = super.commandLine + required("INPUT=", input, spaceSeparated = false) + required("FASTQ=", fastqR1, spaceSeparated = false) + @@ -90,11 +92,12 @@ class SamToFastq(val root: Configurable) extends Picard { } object SamToFastq { + /** Returns default SamToFastq */ def apply(root: Configurable, input: File, fastqR1: File, fastqR2: File = null): SamToFastq = { val samToFastq = new SamToFastq(root) samToFastq.input = input samToFastq.fastqR1 = fastqR1 samToFastq.fastqR2 = fastqR2 - return samToFastq + samToFastq } } \ No newline at end of file diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/SortSam.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/SortSam.scala index ef7ec250d5de2bd88c36e48af157ae29bce0a3b0..8d2946291928f4019c52e1cdc7b6bf52c00df89a 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/SortSam.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/SortSam.scala @@ -19,6 +19,7 @@ import java.io.File import nl.lumc.sasc.biopet.core.config.Configurable import org.broadinstitute.gatk.utils.commandline.{ Input, Output, Argument } +/** Extension for picard SortSam */ class SortSam(val root: Configurable) extends Picard { javaMainClass = "picard.sam.SortSam" @@ -39,6 +40,7 @@ class SortSam(val root: Configurable) extends Picard { if (createIndex) outputIndex = new File(output.getAbsolutePath.stripSuffix(".bam") + ".bai") } + /** Returns command to execute */ override def commandLine = super.commandLine + required("INPUT=", input, spaceSeparated = false) + required("OUTPUT=", output, spaceSeparated = false) + @@ -46,6 +48,7 @@ class SortSam(val root: Configurable) extends Picard { } object SortSam { + /** Returns default SortSam */ def apply(root: Configurable, input: File, output: File, sortOrder: String = null): SortSam = { val sortSam = new SortSam(root) sortSam.input = input diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/Sambamba.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/Sambamba.scala index a6fe688c2486f46701d6397c8fcffacee4b1c29c..a2804668e5b77e71d8d2793aaaf80bdd7e68c646 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/Sambamba.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/Sambamba.scala @@ -17,6 +17,7 @@ package nl.lumc.sasc.biopet.extensions.sambamba import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction +/** General Sambamba extension */ abstract class Sambamba extends BiopetCommandLineFunction { override val defaultVmem = "4G" override val defaultThreads = 2 diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/SambambaFlagstat.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/SambambaFlagstat.scala index 5be51decae455e3ff1a06c2379ee12b836a1c745..f324001077ac6c9d8b544783aa01c7f8f2dad190 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/SambambaFlagstat.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/SambambaFlagstat.scala @@ -19,6 +19,7 @@ import nl.lumc.sasc.biopet.core.config.Configurable import org.broadinstitute.gatk.utils.commandline.{ Input, Output } import java.io.File +/** Extension for sambemba flagstat */ class SambambaFlagstat(val root: Configurable) extends Sambamba { override val defaultThreads = 2 @@ -28,6 +29,7 @@ class SambambaFlagstat(val root: Configurable) extends Sambamba { @Output(doc = "output File") var output: File = _ + /** Returns command to execute */ def cmdLine = required(executable) + required("flagstat") + optional("-t", nCoresRequest) + @@ -35,24 +37,3 @@ class SambambaFlagstat(val root: Configurable) extends Sambamba { " > " + required(output) } - -object SambambaFlagstat { - def apply(root: Configurable, input: File, output: File): SambambaFlagstat = { - val flagstat = new SambambaFlagstat(root) - flagstat.input = input - flagstat.output = output - return flagstat - } - - def apply(root: Configurable, input: File, outputDir: String): SambambaFlagstat = { - val dir = if (outputDir.endsWith("/")) outputDir else outputDir + "/" - val outputFile = new File(dir + swapExtension(input.getName)) - return apply(root, input, outputFile) - } - - def apply(root: Configurable, input: File): SambambaFlagstat = { - return apply(root, input, new File(swapExtension(input.getAbsolutePath))) - } - - private def swapExtension(inputFile: String) = inputFile.stripSuffix(".bam") + ".flagstat" -} \ No newline at end of file diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/SambambaIndex.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/SambambaIndex.scala index 4d988ef13c93074ff2ecc842be6aed69be07676a..924a420b44eb878d8a53d888eabe6a1daa2a61d5 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/SambambaIndex.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/SambambaIndex.scala @@ -19,6 +19,7 @@ import nl.lumc.sasc.biopet.core.config.Configurable import org.broadinstitute.gatk.utils.commandline.{ Input, Output } import java.io.File +/** Extension for sambemba index */ class SambambaIndex(val root: Configurable) extends Sambamba { override val defaultThreads = 2 @@ -28,30 +29,10 @@ class SambambaIndex(val root: Configurable) extends Sambamba { @Output(doc = "Output .bai file to") var output: File = _ + /** Returns command to execute */ def cmdLine = required(executable) + required("index") + optional("-t", nCoresRequest) + required(input) + required(output) } - -object SambambaIndex { - def apply(root: Configurable, input: File, output: File): SambambaIndex = { - val flagstat = new SambambaIndex(root) - flagstat.input = input - flagstat.output = output - return flagstat - } - - def apply(root: Configurable, input: File, outputDir: String): SambambaIndex = { - val dir = if (outputDir.endsWith("/")) outputDir else outputDir + "/" - val outputFile = new File(dir + swapExtension(input.getName)) - return apply(root, input, outputFile) - } - - def apply(root: Configurable, input: File): SambambaIndex = { - return apply(root, input, new File(swapExtension(input.getAbsolutePath))) - } - - private def swapExtension(inputFile: String) = inputFile.stripSuffix(".bam") + ".bam.bai" -} \ No newline at end of file diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/SambambaMarkdup.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/SambambaMarkdup.scala index a45bc73f43fa56a1968487e18e09e0c4d38f129d..bb4ec3ef2f064abad45108be8a33e47fffb9963e 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/SambambaMarkdup.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/SambambaMarkdup.scala @@ -19,6 +19,7 @@ import nl.lumc.sasc.biopet.core.config.Configurable import org.broadinstitute.gatk.utils.commandline.{ Input, Output } import java.io.File +/** Extension for sambemba markdup */ class SambambaMarkdup(val root: Configurable) extends Sambamba { override val defaultThreads = 4 @@ -36,6 +37,7 @@ class SambambaMarkdup(val root: Configurable) extends Sambamba { val overflow_list_size: Option[Int] = config("overflow-list-size", default = 200000) val io_buffer_size: Option[Int] = config("io-buffer-size", default = 128) + /** Returns command to execute */ def cmdLine = required(executable) + required("markdup") + conditional(remove_duplicates, "--remove-duplicates") + @@ -47,24 +49,3 @@ class SambambaMarkdup(val root: Configurable) extends Sambamba { required(input) + required(output) } - -object SambambaMarkdup { - def apply(root: Configurable, input: File, output: File): SambambaMarkdup = { - val flagstat = new SambambaMarkdup(root) - flagstat.input = input - flagstat.output = output - return flagstat - } - - def apply(root: Configurable, input: File, outputDir: String): SambambaMarkdup = { - val dir = if (outputDir.endsWith("/")) outputDir else outputDir + "/" - val outputFile = new File(dir + swapExtension(input.getName)) - return apply(root, input, outputFile) - } - - def apply(root: Configurable, input: File): SambambaMarkdup = { - return apply(root, input, new File(swapExtension(input.getAbsolutePath))) - } - - private def swapExtension(inputFile: String) = inputFile.stripSuffix(".bam") + ".bam.bai" -} \ No newline at end of file diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/SambambaMerge.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/SambambaMerge.scala index 45af3a8c387fdf5bcac8f10e2b5c9a98c912f062..8b514317a1980106c2d9b63e09d98e929c84904e 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/SambambaMerge.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/SambambaMerge.scala @@ -19,6 +19,7 @@ import nl.lumc.sasc.biopet.core.config.Configurable import org.broadinstitute.gatk.utils.commandline.{ Input, Output } import java.io.File +/** Extension for sambemba merge */ class SambambaMerge(val root: Configurable) extends Sambamba { override val defaultThreads = 4 @@ -31,6 +32,7 @@ class SambambaMerge(val root: Configurable) extends Sambamba { // @doc: compression_level 6 is average, 0 = no compression, 9 = best val compression_level: Option[Int] = config("compression_level", default = 6) + /** Returns command to execute */ def cmdLine = required(executable) + required("merge") + optional("-t", nCoresRequest) + @@ -38,36 +40,3 @@ class SambambaMerge(val root: Configurable) extends Sambamba { required(output) + repeat("", input) } - -object SambambaMerge { - def apply(root: Configurable, input: List[File], output: File): SambambaMerge = { - val flagstat = new SambambaMerge(root) - flagstat.input = input - flagstat.output = output - return flagstat - } - - def apply(root: Configurable, input: List[File], outputDir: String): SambambaMerge = { - val dir = if (outputDir.endsWith("/")) outputDir else outputDir + "/" - val outputFile = new File(dir + swapExtension(input.head.getName)) - return apply(root, input, outputFile) - } - - def apply(root: Configurable, input: List[File]): SambambaMerge = { - return apply(root, input, new File(swapExtension(input.head.getAbsolutePath))) - } - - private def swapExtension(inputFile: String) = inputFile.stripSuffix(".bam") + ".merge.bam" -} - -// -//object MergeSamFiles { -// def apply(root: Configurable, input: List[File], outputDir: String, sortOrder: String = null): MergeSamFiles = { -// val mergeSamFiles = new MergeSamFiles(root) -// mergeSamFiles.input = input -// mergeSamFiles.output = new File(outputDir, input.head.getName.stripSuffix(".bam").stripSuffix(".sam") + ".merge.bam") -// if (sortOrder == null) mergeSamFiles.sortOrder = "coordinate" -// else mergeSamFiles.sortOrder = sortOrder -// return mergeSamFiles -// } -//} \ No newline at end of file diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/samtools/Samtools.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/samtools/Samtools.scala index 740da10524d33f6e5e370bdc3320defa50f7ee79..d220be089572d7988f6c12c315620d35b211beea 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/samtools/Samtools.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/samtools/Samtools.scala @@ -17,6 +17,7 @@ package nl.lumc.sasc.biopet.extensions.samtools import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction +/** General class for samtools extensions */ abstract class Samtools extends BiopetCommandLineFunction { override def subPath = "samtools" :: super.subPath executable = config("exe", default = "samtools") diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/samtools/SamtoolsFlagstat.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/samtools/SamtoolsFlagstat.scala index a4833d5f17c509fc0a47ca77a631e2ec32596bd1..67fd2f333d55ae5a30715bbb15a3132999c16aac 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/samtools/SamtoolsFlagstat.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/samtools/SamtoolsFlagstat.scala @@ -19,6 +19,7 @@ import nl.lumc.sasc.biopet.core.config.Configurable import org.broadinstitute.gatk.utils.commandline.{ Input, Output } import java.io.File +/** Extension for samtools flagstat */ class SamtoolsFlagstat(val root: Configurable) extends Samtools { @Input(doc = "Bam File") var input: File = _ @@ -26,6 +27,7 @@ class SamtoolsFlagstat(val root: Configurable) extends Samtools { @Output(doc = "output File") var output: File = _ + /** Returns command to execute */ def cmdLine = required(executable) + required("flagstat") + required(input) + " > " + required(output) } diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/samtools/SamtoolsMpileup.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/samtools/SamtoolsMpileup.scala index 2de91382d2a5231f4c112ed1915dd34559a11cc5..a90c569037ce707ccc420664255dd13a6cbd9e5a 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/samtools/SamtoolsMpileup.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/samtools/SamtoolsMpileup.scala @@ -19,6 +19,7 @@ import nl.lumc.sasc.biopet.core.config.Configurable import org.broadinstitute.gatk.utils.commandline.{ Input, Output } import java.io.File +/** Extension for samtools mpileup */ class SamtoolsMpileup(val root: Configurable) extends Samtools { @Input(doc = "Bam File") var input: List[File] = Nil @@ -47,6 +48,8 @@ class SamtoolsMpileup(val root: Configurable) extends Samtools { conditional(u, "-u") def cmdPipeInput = cmdBase + "-" def cmdPipe = cmdBase + repeat(input) + + /** Returns command to execute */ def cmdLine = cmdPipe + " > " + required(output) } @@ -69,4 +72,4 @@ object SamtoolsMpileup { } private def swapExtension(inputFile: String) = inputFile.stripSuffix(".bam") + ".mpileup" -} \ No newline at end of file +} diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/samtools/SamtoolsView.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/samtools/SamtoolsView.scala index 883c21ac1ff2bb0940d2e61fdb13486829454dee..5dcc66ee16e4f885227db0d97d435ec972300fe2 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/samtools/SamtoolsView.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/samtools/SamtoolsView.scala @@ -19,6 +19,7 @@ import nl.lumc.sasc.biopet.core.config.Configurable import org.broadinstitute.gatk.utils.commandline.{ Input, Output } import java.io.File +/** Extension for samtools view */ class SamtoolsView(val root: Configurable) extends Samtools { @Input(doc = "Bam File") var input: File = _ @@ -37,6 +38,8 @@ class SamtoolsView(val root: Configurable) extends Samtools { conditional(h, "-h") def cmdPipeInput = cmdBase + "-" def cmdPipe = cmdBase + required(input) + + /** Returns command to execute */ def cmdLine = cmdPipe + " > " + required(output) } diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/seqtk/Seqtk.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/seqtk/Seqtk.scala index a1131371ade82303aca47155593568f2cadbeb2d..c12ca6c5f5f8c822fe0206c189485a2e7957bfc6 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/seqtk/Seqtk.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/seqtk/Seqtk.scala @@ -17,9 +17,7 @@ package nl.lumc.sasc.biopet.extensions.seqtk import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction -/** - * Abstract class for all seqtk wrappers. - */ +/** Abstract class for all seqtk wrappers. */ abstract class Seqtk extends BiopetCommandLineFunction { override def subPath = "seqtk" :: super.subPath executable = config("exe", default = "seqtk", freeVar = true) diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/MultisamplePipelineTemplate.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/MultisamplePipelineTemplate.scala index b3233ecfecbf070a36cf5db6aaf73f0a8cf4aa07..a75a1424610ba1a537ea9c5011785b248d9f8839 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/MultisamplePipelineTemplate.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/MultisamplePipelineTemplate.scala @@ -19,46 +19,63 @@ import nl.lumc.sasc.biopet.core.{ PipelineCommand, MultiSampleQScript, BiopetQSc import nl.lumc.sasc.biopet.core.config.Configurable import org.broadinstitute.gatk.queue.QScript +/** Template for a multisample pipeline */ class MultisamplePipelineTemplate(val root: Configurable) extends QScript with MultiSampleQScript { def this() = this(null) + /** Location of summary file */ def summaryFile: File = new File(outputDir, "MultisamplePipelineTemplate.summary.json") + /** File to add to the summary */ def summaryFiles: Map[String, File] = Map() + /** Pipeline settings to add to the summary */ def summarySettings: Map[String, Any] = Map() + /** Function to make a sample */ def makeSample(id: String) = new Sample(id) + /** This class will contain jobs and libraries for a sample */ class Sample(sampleId: String) extends AbstractSample(sampleId) { + /** Sample specific files for summary */ def summaryFiles: Map[String, File] = Map() + /** Sample specific stats for summary */ def summaryStats: Map[String, Any] = Map() + /** Function to make a library */ def makeLibrary(id: String) = new Library(id) + + /** This class will contain all jobs for a library */ class Library(libId: String) extends AbstractLibrary(libId) { + /** Library specific files for summary */ def summaryFiles: Map[String, File] = Map() + /** Library specific stats for summary */ def summaryStats: Map[String, Any] = Map() + /** Method to add library jobs */ protected def addJobs(): Unit = { - // Library jobs } } + /** Method to add sample jobs */ protected def addJobs(): Unit = { - // Sample jobs } } + /** Method where multisample jobs are added */ def addMultiSampleJobs(): Unit = { } + /** This is executed before the script starts */ def init(): Unit = { } + /** Method where jobs must be added */ def biopetScript() { } } +/** Object to let to generate a main method */ object MultisamplePipelineTemplate extends PipelineCommand \ No newline at end of file diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/PipelineTemplate.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/PipelineTemplate.scala index 6278ab6f87cdf9c23ed3b8431642117d82ad8fc9..d4998770e07bfd19a26746e01a49adb29e7cb962 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/PipelineTemplate.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/PipelineTemplate.scala @@ -20,14 +20,18 @@ import nl.lumc.sasc.biopet.core.config.Configurable import org.broadinstitute.gatk.queue.QScript import org.broadinstitute.gatk.queue.function._ +/** Template for a pipeline */ class PipelineTemplate(val root: Configurable) extends QScript with BiopetQScript { def this() = this(null) + /** This is executed before the script starts */ def init() { } + /** Method where jobs must be added */ def biopetScript() { } } +/** Object to let to generate a main method */ object PipelineTemplate extends PipelineCommand diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/Seqstat.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/Seqstat.scala index 4e1d50fbcd96b45aa067ce845a2dc501eff2e4a7..446bdb29c433fd8c45313caf0bb770c494deecac 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/Seqstat.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/Seqstat.scala @@ -164,14 +164,11 @@ object Seqstat extends ToolCommand { case class ReadStat(qual: mutable.ArrayBuffer[Long] = mutable.ArrayBuffer(), nuc: mutable.ArrayBuffer[Long] = mutable.ArrayBuffer.fill('T'.toInt + 1)(0), - var withN: Long, + var withN: Long = 0L, lengths: mutable.ArrayBuffer[Int] = mutable.ArrayBuffer()) val baseStats: mutable.ArrayBuffer[BaseStat] = mutable.ArrayBuffer() - val readStats: ReadStat = new ReadStat(mutable.ArrayBuffer(), - mutable.ArrayBuffer.fill('T'.toInt + 1)(0), - 0L, - mutable.ArrayBuffer()) + val readStats: ReadStat = new ReadStat() /** * Compute the quality metric per read @@ -194,6 +191,9 @@ object Seqstat extends ToolCommand { val readQual = record.getBaseQualityString val readNucleotides = record.getReadString + if (record.length >= readStats.lengths.size) // Extends array when length not yet possible + (0 to (record.length - readStats.lengths.size)).foreach(_ => readStats.lengths.append(0)) + readStats.lengths(record.length) += 1 for (t <- 0 until record.length()) { @@ -211,10 +211,7 @@ object Seqstat extends ToolCommand { readStats.qual ++= mutable.ArrayBuffer.fill(avgQual - readStats.qual.length + 1)(0) } readStats.qual(avgQual) += 1 - readStats.withN += { - if (readNucleotides.contains("N")) 1L - else 0L - } + if (readNucleotides.contains("N")) readStats.withN += 1L } /** @@ -278,7 +275,7 @@ object Seqstat extends ToolCommand { } for (pos <- 0 until readStats.qual.length) { - var key: Int = pos - phredEncoding.id + val key: Int = pos - phredEncoding.id if (key > 0) { // count till the max of baseHistogram.length for (histokey <- 0 until key + 1) { @@ -319,7 +316,7 @@ object Seqstat extends ToolCommand { ), ("stats", Map( ("bases", Map( - ("num_n", nucleotideHistoMap('N')), + ("num_n", nucleotideHistoMap.getOrElse('N', 0)), ("num_total", nucleotideHistoMap.values.sum), ("num_qual_gte", baseQualHistoMap.toMap), ("nucleotides", nucleotideHistoMap.toMap) @@ -335,9 +332,6 @@ object Seqstat extends ToolCommand { )) ) - val jsonReport: Json = { - ConfigUtils.mapToJson(report) - } - println(jsonReport.spaces2) + println(ConfigUtils.mapToJson(report).spaces2) } } diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/utils/VcfUtils.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/utils/VcfUtils.scala index 9d6e983a4f90120f5f2df6d31a489e29c560bb5a..7f704c12aae8d78a73e42ca81323f69a940bda0d 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/utils/VcfUtils.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/utils/VcfUtils.scala @@ -32,6 +32,13 @@ object VcfUtils { alleles(longestAlleleId) } + /** + * Method will extend a allele till a new length + * @param bases Allele + * @param newSize New size of allele + * @param fillWith Char to fill gap + * @return + */ def fillAllele(bases: String, newSize: Int, fillWith: Char = '-'): String = { bases + (Array.fill[Char](newSize - bases.size)(fillWith)).mkString } diff --git a/public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.scala b/public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.scala index 876da87289b56d183d070293468dcff9bc493931..1d22bb55b9ea174c2c640b1585dc01015116bb81 100644 --- a/public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.scala +++ b/public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.scala @@ -20,7 +20,7 @@ import java.io.File import java.util.Date import nl.lumc.sasc.biopet.core.summary.SummaryQScript import nl.lumc.sasc.biopet.core.{ SampleLibraryTag, BiopetQScript, PipelineCommand } -import nl.lumc.sasc.biopet.extensions.{ Ln, Star, Stampy, Bowtie } +import nl.lumc.sasc.biopet.extensions._ import nl.lumc.sasc.biopet.extensions.bwa.{ BwaSamse, BwaSampe, BwaAln, BwaMem } import nl.lumc.sasc.biopet.pipelines.bamtobigwig.Bam2Wig import nl.lumc.sasc.biopet.tools.FastqSplitter @@ -163,9 +163,14 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S chunks += (chunkDir -> (removeGz(chunkDir + input_R1.getName), if (paired) removeGz(chunkDir + input_R2.get.getName) else "")) } - else chunks += (outputDir -> ( - flexiprep.extractIfNeeded(input_R1, flexiprep.outputDir), - if (paired) flexiprep.extractIfNeeded(input_R2.get, flexiprep.outputDir) else "") + else if (skipFlexiprep) { + chunks += (outputDir -> ( + extractIfNeeded(input_R1, flexiprep.outputDir), + if (paired) extractIfNeeded(input_R2.get, outputDir) else "") + ) + } else chunks += (outputDir -> ( + flexiprep.outputFiles("fastq_input_R1"), + if (paired) flexiprep.outputFiles("fastq_input_R2") else "") ) if (chunking) { @@ -441,6 +446,32 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S return RG.substring(0, RG.lastIndexOf("\\t")) } + + //FIXME: This is code duplication from flexiprep, need general class to pass jobs inside a util function + /** + * Extracts file if file is compressed + * @param file + * @param runDir + * @return returns extracted file + */ + def extractIfNeeded(file: File, runDir: File): File = { + if (file == null) return file + else if (file.getName().endsWith(".gz") || file.getName().endsWith(".gzip")) { + var newFile: File = swapExt(runDir, file, ".gz", "") + if (file.getName().endsWith(".gzip")) newFile = swapExt(runDir, file, ".gzip", "") + val zcatCommand = Zcat(this, file, newFile) + zcatCommand.isIntermediate = true + add(zcatCommand) + return newFile + } else if (file.getName().endsWith(".bz2")) { + val newFile = swapExt(runDir, file, ".bz2", "") + val pbzip2 = Pbzip2(this, file, newFile) + pbzip2.isIntermediate = true + add(pbzip2) + return newFile + } else return file + } + } object Mapping extends PipelineCommand \ No newline at end of file diff --git a/public/mapping/src/test/scala/nl/lumc/sasc/biopet/pipelines/mapping/MappingTest.scala b/public/mapping/src/test/scala/nl/lumc/sasc/biopet/pipelines/mapping/MappingTest.scala index 5357811e964b4955115dbded66a6a3d3baaa0878..c95fe06035f42fd6e7b5bd64df2b76a6bef99542 100644 --- a/public/mapping/src/test/scala/nl/lumc/sasc/biopet/pipelines/mapping/MappingTest.scala +++ b/public/mapping/src/test/scala/nl/lumc/sasc/biopet/pipelines/mapping/MappingTest.scala @@ -37,18 +37,23 @@ class MappingTest extends TestNGSuite with Matchers { val chunks = Array(1, 5, 10, 100) val skipMarkDuplicates = Array(true, false) val skipFlexipreps = Array(true, false) + val zipped = Array(true, false) for ( aligner <- aligners; pair <- paired; chunk <- chunks; skipMarkDuplicate <- skipMarkDuplicates; - skipFlexiprep <- skipFlexipreps - ) yield Array(aligner, pair, chunk, skipMarkDuplicate, skipFlexiprep) + skipFlexiprep <- skipFlexipreps; + zipped <- zipped + ) yield Array(aligner, pair, chunk, skipMarkDuplicate, skipFlexiprep, zipped) } @Test(dataProvider = "mappingOptions") - def testMapping(aligner: String, paired: Boolean, chunks: Int, skipMarkDuplicate: Boolean, skipFlexiprep: Boolean) = { + def testMapping(aligner: String, paired: Boolean, chunks: Int, + skipMarkDuplicate: Boolean, + skipFlexiprep: Boolean, + zipped: Boolean) = { val map = ConfigUtils.mergeMaps(Map("output_dir" -> MappingTest.outputDir, "aligner" -> aligner, "number_chunks" -> chunks, @@ -57,15 +62,20 @@ class MappingTest extends TestNGSuite with Matchers { ), Map(MappingTest.executables.toSeq: _*)) val mapping: Mapping = initPipeline(map) - mapping.input_R1 = new File(mapping.outputDir, "bla_R1.fq") - if (paired) mapping.input_R2 = Some(new File(mapping.outputDir, "bla_R2.fq")) + if (zipped) { + mapping.input_R1 = new File(mapping.outputDir, "bla_R1.fq.gz") + if (paired) mapping.input_R2 = Some(new File(mapping.outputDir, "bla_R2.fq.gz")) + } else { + mapping.input_R1 = new File(mapping.outputDir, "bla_R1.fq") + if (paired) mapping.input_R2 = Some(new File(mapping.outputDir, "bla_R2.fq")) + } mapping.sampleId = Some("1") mapping.libId = Some("1") mapping.script() //Flexiprep mapping.functions.count(_.isInstanceOf[Fastqc]) shouldBe (if (skipFlexiprep) 0 else if (paired) 4 else 2) - mapping.functions.count(_.isInstanceOf[Zcat]) shouldBe 0 + mapping.functions.count(_.isInstanceOf[Zcat]) shouldBe (if (!zipped || (chunks > 1 && skipFlexiprep)) 0 else if (paired) 2 else 1) mapping.functions.count(_.isInstanceOf[Seqstat]) shouldBe ((if (skipFlexiprep) 0 else if (paired) 4 else 2) * chunks) mapping.functions.count(_.isInstanceOf[SeqtkSeq]) shouldBe ((if (skipFlexiprep) 0 else if (paired) 2 else 1) * chunks) mapping.functions.count(_.isInstanceOf[Cutadapt]) shouldBe ((if (skipFlexiprep) 0 else if (paired) 2 else 1) * chunks)