Commit 1be6069f authored by Peter van 't Hof's avatar Peter van 't Hof

Inspection round Biopet Extensions

parent cbb4d0e2
......@@ -33,12 +33,6 @@
<artifactId>BiopetCore</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.testng</groupId>
<artifactId>testng</artifactId>
<version>6.8</version>
<scope>test</scope>
</dependency>
</dependencies>
</project>
\ No newline at end of file
......@@ -40,7 +40,7 @@ class Awk(val parent: Configurable) extends BiopetCommandLineFunction with Versi
var command: String = _
def cmdLine =
def cmdLine: String =
executable +
required(command) +
(if (inputAsStdin) "" else required(input)) +
......
......@@ -35,7 +35,7 @@ class Cat(val parent: Configurable) extends BiopetCommandLineFunction {
executable = config("exe", default = "cat")
/** return commandline to execute */
def cmdLine =
def cmdLine: String =
required(executable) +
(if (inputAsStdin) "" else repeat(input)) +
(if (outputAsStdout) "" else (if (appending) " >> " else " > ") + required(output))
......
......@@ -19,7 +19,7 @@ class Cp(val parent: Configurable) extends BiopetCommandLineFunction {
executable = config("exe", default = "cp")
/** Returns command to execute */
def cmdLine =
def cmdLine: String =
required(executable) +
required(source) +
required(target)
......
......@@ -16,10 +16,12 @@ package nl.lumc.sasc.biopet.extensions
import java.io.File
import nl.lumc.sasc.biopet.core.{Version, BiopetCommandLineFunction}
import nl.lumc.sasc.biopet.core.{BiopetCommandLineFunction, Version}
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{Input, Output}
import scala.util.matching.Regex
/**
* Wrapper for the cufflinks command line tool.
* Written based on cufflinks version v2.2.1 (md5: 07c831c4f8b4e161882731ea5694ff80)
......@@ -37,7 +39,7 @@ class Cufflinks(val parent: Configurable) extends BiopetCommandLineFunction with
/** input file */
@Input(doc = "Input file (SAM or BAM)", required = true)
var input: File = null
var input: File = _
/** output files, computed automatically from output directory */
@Output(doc = "Output GTF file")
......@@ -187,11 +189,11 @@ class Cufflinks(val parent: Configurable) extends BiopetCommandLineFunction with
/** do not contact server to check for update availability [FALSE] */
var noUpdateCheck: Boolean = config("no_update_check", default = false)
def versionRegex = """cufflinks v(.*)""".r
def versionCommand = executable
def versionRegex: Regex = """cufflinks v(.*)""".r
def versionCommand: String = executable
override def versionExitcode = List(0, 1)
def cmdLine =
def cmdLine: String =
required(executable) +
required("--output-dir", outputDir) +
optional("--num-threads", threads) +
......
......@@ -16,10 +16,12 @@ package nl.lumc.sasc.biopet.extensions
import java.io.File
import nl.lumc.sasc.biopet.core.{Version, BiopetCommandLineFunction}
import nl.lumc.sasc.biopet.core.{BiopetCommandLineFunction, Version}
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.Output
import scala.util.matching.Regex
/**
* Created by pjvan_thof on 8/11/15.
*/
......@@ -30,8 +32,8 @@ class Curl(val parent: Configurable) extends BiopetCommandLineFunction with Vers
var url: String = _
executable = config("exe", default = "curl")
def versionCommand = executable + " --version"
def versionRegex = """curl (\w+\.\w+\.\w+) .*""".r
def versionCommand: String = executable + " --version"
def versionRegex: Regex = """curl (\w+\.\w+\.\w+) .*""".r
def cmdLine: String =
required(executable) + required(url) + (if (outputAsStdout) "" else " > " + required(output))
......
......@@ -44,8 +44,8 @@ class Cutadapt(val parent: Configurable)
var statsOutput: File = _
executable = config("exe", default = "cutadapt")
def versionCommand = executable + " --version"
def versionRegex = """(.*)""".r
def versionCommand: String = executable + " --version"
def versionRegex: Regex = """(.*)""".r
/** Name of the key containing clipped adapters information in the summary stats. */
def adaptersStatsName = "adapters"
......@@ -112,7 +112,7 @@ class Cutadapt(val parent: Configurable)
var untrimmedPairedOutput: Option[File] = config("untrimmed_paired_output")
/** return commandline to execute */
def cmdLine =
def cmdLine: String =
required(executable) +
// Options that influence how the adapters are found
repeat("-a", adapter) +
......@@ -187,10 +187,9 @@ class Cutadapt(val parent: Configurable)
// identify the adapter name and count
for (line <- adapter.split("\n")) {
line match {
case adapterR(adapter, count) => {
case adapterR(adapter, count) =>
adapterName = adapter
adapterCount = count.toInt
}
case _ =>
}
}
......
......@@ -21,6 +21,8 @@ import nl.lumc.sasc.biopet.utils.SemanticVersion
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{Input, Output}
import scala.util.matching.Regex
/**
* Extension for fastqc
* Based on version 0.10.1 and 0.11.2
......@@ -34,10 +36,10 @@ class Fastqc(val parent: Configurable) extends BiopetCommandLineFunction with Ve
var adapters: Option[File] = None
@Input(doc = "Fastq file", shortName = "FQ")
var fastqfile: File = null
var fastqfile: File = _
@Output(doc = "Output", shortName = "out", required = true)
var output: File = null
var output: File = _
executable = config("exe", default = "fastqc")
var javaExe: String = config("exe", default = "java", namespace = "java", freeVar = false)
......@@ -47,8 +49,8 @@ class Fastqc(val parent: Configurable) extends BiopetCommandLineFunction with Ve
var nogroup: Boolean = config("nogroup", default = false)
var extract: Boolean = config("extract", default = true)
def versionRegex = """FastQC (.*)""".r
def versionCommand = executable + " --version"
def versionRegex: Regex = """FastQC (.*)""".r
def versionCommand: String = executable + " --version"
override def defaultThreads = 4
/** Sets contaminants and adapters when not yet set */
......@@ -86,7 +88,7 @@ class Fastqc(val parent: Configurable) extends BiopetCommandLineFunction with Ve
}
/** return commandline to execute */
def cmdLine =
def cmdLine: String =
required(executable) +
optional("--java", javaExe) +
optional("--threads", threads) +
......
......@@ -92,7 +92,7 @@ class Flash(val parent: Configurable) extends BiopetCommandLineFunction with Ver
_outputHistogram = outputHistogram
}
def cmdLine =
def cmdLine: String =
executable +
optional("-m", minOverlap) +
optional("-M", maxOverlap) +
......
......@@ -17,9 +17,11 @@ package nl.lumc.sasc.biopet.extensions
import java.io.File
import nl.lumc.sasc.biopet.utils.config.Configurable
import nl.lumc.sasc.biopet.core.{Version, BiopetCommandLineFunction, Reference}
import nl.lumc.sasc.biopet.core.{BiopetCommandLineFunction, Reference, Version}
import org.broadinstitute.gatk.utils.commandline.{Input, Output}
import scala.util.matching.Regex
/**
* Extension for freebayes
*
......@@ -37,7 +39,7 @@ class Freebayes(val parent: Configurable)
var reference: File = _
@Output(required = true)
var outputVcf: File = null
var outputVcf: File = _
@Input(required = false)
var bamList: Option[File] = config("bam_list")
......@@ -129,15 +131,15 @@ class Freebayes(val parent: Configurable)
var haplotypeLength: Option[Int] = config("haplotype_length")
executable = config("exe", default = "freebayes")
def versionRegex = """version: (.*)""".r
def versionCommand = executable + " --version"
def versionRegex: Regex = """version: (.*)""".r
def versionCommand: String = executable + " --version"
override def beforeGraph(): Unit = {
super.beforeGraph()
reference = referenceFasta()
}
def cmdLine =
def cmdLine: String =
executable +
required("--fasta-reference", reference) +
repeat("--bam", bamfiles) +
......
......@@ -35,7 +35,7 @@ class GffRead(val parent: Configurable) extends BiopetCommandLineFunction {
var T: Boolean = config("T", default = false, freeVar = false)
def cmdLine =
def cmdLine: String =
executable +
(if (inputAsStdin) "" else required(input)) +
(if (outputAsStdout) "" else required("-o", output)) +
......
......@@ -32,14 +32,14 @@ class Grep(val parent: Configurable) extends BiopetCommandLineFunction {
executable = config("exe", default = "grep")
var grepFor: String = null
var grepFor: String = _
var invertMatch: Boolean = false
var regex: Boolean = false
var perlRegexp: Boolean = false
/** return commandline to execute */
def cmdLine =
def cmdLine: String =
required(executable) +
conditional(invertMatch, "-v") +
conditional(regex, "-e") +
......
......@@ -41,7 +41,7 @@ class GtfToGenePred(val parent: Configurable) extends BiopetCommandLineFunction
var simple: Boolean = config("simple", default = false)
var geneNameAsName2: Boolean = config("gene _name_as_name2", default = false)
def cmdLine =
def cmdLine: String =
executable +
conditional(genePredExt, "-genePredExt") +
conditional(allErrors, "-allErrors") +
......
......@@ -16,10 +16,12 @@ package nl.lumc.sasc.biopet.extensions
import java.io.File
import nl.lumc.sasc.biopet.core.{Version, BiopetCommandLineFunction}
import nl.lumc.sasc.biopet.core.{BiopetCommandLineFunction, Version}
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{Input, Output}
import scala.util.matching.Regex
class Gzip(val parent: Configurable) extends BiopetCommandLineFunction with Version {
@Input(doc = "Input file", required = true)
var input: List[File] = Nil
......@@ -29,10 +31,10 @@ class Gzip(val parent: Configurable) extends BiopetCommandLineFunction with Vers
executable = config("exe", default = "gzip", freeVar = false)
def versionRegex = """gzip (.*)""".r
def versionCommand = executable + " --version"
def versionRegex: Regex = """gzip (.*)""".r
def versionCommand: String = executable + " --version"
def cmdLine =
def cmdLine: String =
required(executable) + " -c " +
(if (inputAsStdin) "" else repeat(input)) +
(if (outputAsStdout) "" else " > " + required(output))
......
......@@ -16,10 +16,12 @@ package nl.lumc.sasc.biopet.extensions
import java.io.File
import nl.lumc.sasc.biopet.core.{Version, BiopetCommandLineFunction}
import nl.lumc.sasc.biopet.core.{BiopetCommandLineFunction, Version}
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{Input, Output}
import scala.util.matching.Regex
/**
* Wrapper for the htseq-count command line tool
* Written based on htseq-count version 0.6.1p1
......@@ -31,15 +33,15 @@ class HtseqCount(val parent: Configurable) extends BiopetCommandLineFunction wit
/** input file */
@Input(doc = "Input alignment file", required = true)
var inputAlignment: File = null
var inputAlignment: File = _
/** input GFF / GTF annotation file */
@Input(doc = "Input GFF / GTF annotation file", required = true)
var inputAnnotation: File = null
var inputAnnotation: File = _
/** output file */
@Output(doc = "Output count file", required = true)
var output: File = null
var output: File = _
/** type of input alignment */
var format: Option[String] = config("format")
......@@ -69,10 +71,10 @@ class HtseqCount(val parent: Configurable) extends BiopetCommandLineFunction wit
/** suppress progress report */
var quiet: Boolean = config("quiet", default = false)
def versionRegex = """.*, version (.*)\.""".r
def versionCommand = executable + " --help"
def versionRegex: Regex = """.*, version (.*)\.""".r
def versionCommand: String = executable + " --help"
def cmdLine = {
def cmdLine: String = {
required(executable) +
optional("--format", format) +
optional("--order", order) +
......
......@@ -33,7 +33,7 @@ class Pbzip2(val parent: Configurable) extends BiopetCommandLineFunction {
var decomrpess = true
var memory: Option[Int] = config("memory")
override def defaultCoreMemory = memory.getOrElse(1000).toDouble / 1000
override def defaultCoreMemory: Double = memory.getOrElse(1000).toDouble / 1000
override def defaultThreads = 2
override def beforeCmd() {
......@@ -41,7 +41,7 @@ class Pbzip2(val parent: Configurable) extends BiopetCommandLineFunction {
}
/** return commandline to execute */
def cmdLine =
def cmdLine: String =
required(executable) +
conditional(decomrpess, "-d") +
conditional(!decomrpess, "-z") +
......
......@@ -21,6 +21,8 @@ import nl.lumc.sasc.biopet.utils.Logging
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline._
import scala.util.matching.Regex
/**
* Created by wyleung on 8-1-16.
*/
......@@ -43,8 +45,8 @@ class Pysvtools(val parent: Configurable) extends BiopetCommandLineFunction {
executable = config("exe", default = "vcf_merge_sv_events")
def versionRegex = """PySVtools (.*)""".r
def versionCommand = executable + " --version"
def versionRegex: Regex = """PySVtools (.*)""".r
def versionCommand: String = executable + " --version"
override def defaultThreads = 2
override def beforeGraph(): Unit = {
......@@ -61,7 +63,7 @@ class Pysvtools(val parent: Configurable) extends BiopetCommandLineFunction {
}
/** return commandline to execute */
def cmdLine =
def cmdLine: String =
required(executable) +
repeat("-c", exclusionRegions) +
optional("-f", flanking) +
......
......@@ -16,10 +16,11 @@ package nl.lumc.sasc.biopet.extensions
import java.io.File
import nl.lumc.sasc.biopet.core.{Version, BiopetCommandLineFunction}
import nl.lumc.sasc.biopet.core.{BiopetCommandLineFunction, Version}
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{Argument, Input, Output}
import scala.util.matching.Regex
import scalaz.std.boolean.option
/**
......@@ -29,8 +30,8 @@ import scalaz.std.boolean.option
class Raxml(val parent: Configurable) extends BiopetCommandLineFunction with Version {
override def defaultThreads = 1
def versionCommand = executable + " -v"
def versionRegex = """.*version ([\w\.]*) .*""".r
def versionCommand: String = executable + " -v"
def versionRegex: Regex = """.*version ([\w\.]*) .*""".r
@Input(doc = "Input phy/fasta file", required = true)
var input: File = _
......@@ -54,7 +55,7 @@ class Raxml(val parent: Configurable) extends BiopetCommandLineFunction with Ver
var f: String = "d"
@Argument(doc = "Output directory", required = true)
var w: File = null
var w: File = _
@Input(required = false)
var t: Option[File] = _
......@@ -91,19 +92,19 @@ class Raxml(val parent: Configurable) extends BiopetCommandLineFunction with Ver
}
/** Returns bestTree file */
def getBestTreeFile = option(f == "d" && b.isEmpty, new File(w, "RAxML_bestTree." + n))
def getBestTreeFile: Option[File] = option(f == "d" && b.isEmpty, new File(w, "RAxML_bestTree." + n))
/** Returns bootstrap file */
def getBootstrapFile = option(f == "d" && b.isDefined, new File(w, "RAxML_bootstrap." + n))
def getBootstrapFile: Option[File] = option(f == "d" && b.isDefined, new File(w, "RAxML_bootstrap." + n))
/** Returns bipartitions file */
def getBipartitionsFile = option(f == "b", new File(w, "RAxML_bipartitions." + n))
def getBipartitionsFile: Option[File] = option(f == "b", new File(w, "RAxML_bipartitions." + n))
/** Returns info file */
def getInfoFile = new File(w, "RAxML_info." + n)
/** return commandline to execute */
def cmdLine =
def cmdLine: String =
required(executable) +
required("-m", m) +
required("-s", input) +
......
......@@ -34,7 +34,7 @@ class RunGubbins(val parent: Configurable) extends BiopetCommandLineFunction {
var fastafile: File = _
@Argument(required = true)
var outputDirectory: File = null
var outputDirectory: File = _
executable = config("exe", default = "run_gubbins.py")
var outgroup: Option[String] = config("outgroup")
......@@ -72,7 +72,7 @@ class RunGubbins(val parent: Configurable) extends BiopetCommandLineFunction {
}
/** Return command to execute */
def cmdLine =
def cmdLine: String =
required("cd", outputDirectory) + " && " + required(executable) +
optional("--outgroup", outgroup) +
optional("--starting_tree", startingTree) +
......
......@@ -42,7 +42,7 @@ class Sed(val parent: Configurable) extends BiopetCommandLineFunction with Versi
var expressions: List[String] = Nil
def cmdLine =
def cmdLine: String =
executable +
repeat("-e", expressions) +
(if (inputAsStdin) "" else required(inputFile)) +
......
......@@ -31,7 +31,7 @@ class Sha1sum(val parent: Configurable) extends BiopetCommandLineFunction {
executable = config("exe", default = "sha1sum")
/** Set correct output files */
def cmdLine = required(executable) + required(input) + " > " + required(output)
def cmdLine: String = required(executable) + required(input) + " > " + required(output)
}
object Sha1sum {
......
......@@ -16,13 +16,14 @@ package nl.lumc.sasc.biopet.extensions
import java.io.File
import nl.lumc.sasc.biopet.core.{Version, BiopetCommandLineFunction}
import nl.lumc.sasc.biopet.core.{BiopetCommandLineFunction, Version}
import nl.lumc.sasc.biopet.utils.config.Configurable
import nl.lumc.sasc.biopet.core.summary.Summarizable
import org.broadinstitute.gatk.utils.commandline.{Input, Output}
import scala.collection.mutable
import scala.io.Source
import scala.util.matching.Regex
/**
* Extension for sickle
......@@ -58,8 +59,8 @@ class Sickle(val parent: Configurable)
var discardN: Boolean = config("discardN", default = false)
var quiet: Boolean = config("quiet", default = false)
var defaultQualityType: String = config("defaultqualitytype", default = "sanger")
def versionRegex = """sickle version (.*)""".r
def versionCommand = executable + " --version"
def versionRegex: Regex = """sickle version (.*)""".r
def versionCommand: String = executable + " --version"
/** Sets qualityType is still empty */
override def beforeGraph() {
......@@ -67,7 +68,7 @@ class Sickle(val parent: Configurable)
}
/** Return command to execute */
def cmdLine = {
def cmdLine: String = {
var cmd: String = required(executable)
if (inputR2 != null) {
cmd += required("pe") +
......@@ -87,7 +88,7 @@ class Sickle(val parent: Configurable)
(if (outputAsStdout) "" else " > " + required(outputStats))
}
override def summaryDeps = outputStats :: super.summaryDeps
override def summaryDeps: List[File] = outputStats :: super.summaryDeps
/** returns stats map for summary */
def summaryStats: Map[String, Any] = {
......@@ -109,12 +110,12 @@ class Sickle(val parent: Configurable)
case sKept(num) => stats += ("num_reads_kept" -> num.toInt)
case sDiscarded(num) => stats += ("num_reads_discarded_total" -> num.toInt)
// paired run
case pPairKept(reads, pairs) => stats += ("num_reads_kept" -> reads.toInt)
case pSingleKept(total, r1, r2) =>
case pPairKept(reads, _) => stats += ("num_reads_kept" -> reads.toInt)
case pSingleKept(_, r1, r2) =>
stats += ("num_reads_kept_R1" -> r1.toInt)
stats += ("num_reads_kept_R2" -> r2.toInt)
case pPairDiscarded(reads, pairs) => stats += ("num_reads_discarded_both" -> reads.toInt)
case pSingleDiscarded(total, r1, r2) =>
case pPairDiscarded(reads, _) => stats += ("num_reads_discarded_both" -> reads.toInt)
case pSingleDiscarded(_, r1, r2) =>
stats += ("num_reads_discarded_R1" -> r1.toInt)
stats += ("num_reads_discarded_R2" -> r2.toInt)
case _ =>
......
......@@ -17,9 +17,11 @@ package nl.lumc.sasc.biopet.extensions
import java.io.File
import nl.lumc.sasc.biopet.utils.config.Configurable
import nl.lumc.sasc.biopet.core.{Version, BiopetCommandLineFunction, Reference}
import nl.lumc.sasc.biopet.core.{BiopetCommandLineFunction, Reference, Version}
import org.broadinstitute.gatk.utils.commandline.{Input, Output}
import scala.util.matching.Regex
/** Extension for stampy */
class Stampy(val parent: Configurable)
extends BiopetCommandLineFunction
......@@ -32,7 +34,7 @@ class Stampy(val parent: Configurable)
var R2: File = _
@Input(doc = "The reference file for the bam files.", shortName = "ref")
var reference: File = null
var reference: File = _
@Input(doc = "The genome prefix.")
var genome: File = config("genome")
......@@ -57,19 +59,19 @@ class Stampy(val parent: Configurable)
var sensitive: Boolean = config("sensitive", default = false)
var fast: Boolean = config("fast", default = false)
var readgroup: String = null
var readgroup: String = _
var verbosity: Option[Int] = config("verbosity", default = 2)
var logfile: Option[String] = config("logfile")
executable = config("exe", default = "stampy.py", freeVar = false)
def versionRegex = """stampy v(.*) \(.*\), .*""".r
def versionRegex: Regex = """stampy v(.*) \(.*\), .*""".r
override def versionExitcode = List(0, 1)
/// Stampy uses approx factor 1.1 times the size of the genome in memory.
override def defaultCoreMemory = 4.0
override def defaultThreads = 8
def versionCommand = executable + " --help"
def versionCommand: String = executable + " --help"
/** Sets readgroup when not set yet */