Commit 74091f0f authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Merge remote-tracking branch 'remotes/origin/develop' into feature-bamstats

# Conflicts:
#	biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/BiopetFlagstat.scala
parents ed129189 edc81be3
...@@ -63,6 +63,7 @@ ...@@ -63,6 +63,7 @@
#if (!sampleLevel) <th>Library</th> #end #if (!sampleLevel) <th>Library</th> #end
<th>Total</th> <th>Total</th>
<th>Mapped</th> <th>Mapped</th>
<th>Secondary</th>
<th>(%)</th> <th>(%)</th>
<th>Duplicates</th> <th>Duplicates</th>
<th>(%)</th> <th>(%)</th>
...@@ -85,12 +86,14 @@ ...@@ -85,12 +86,14 @@
val total = summary.getValue((prefixPath ::: List("biopet_flagstat", "All")):_*).getOrElse(0L).asInstanceOf[Long] val total = summary.getValue((prefixPath ::: List("biopet_flagstat", "All")):_*).getOrElse(0L).asInstanceOf[Long]
val mapped = summary.getValue((prefixPath ::: List("biopet_flagstat", "Mapped")):_*).getOrElse(0L).asInstanceOf[Long] val mapped = summary.getValue((prefixPath ::: List("biopet_flagstat", "Mapped")):_*).getOrElse(0L).asInstanceOf[Long]
val duplicates = summary.getValue((prefixPath ::: List("biopet_flagstat", "Duplicates")):_*).getOrElse(0L).asInstanceOf[Long] val duplicates = summary.getValue((prefixPath ::: List("biopet_flagstat", "Duplicates")):_*).getOrElse(0L).asInstanceOf[Long]
val secondary = summary.getValue((prefixPath ::: List("biopet_flagstat", "NotPrimaryAlignment")):_*).getOrElse(0L).asInstanceOf[Long]
}# }#
<td>${total}</td> <td>${total}</td>
<td>${mapped}</td> <td>${mapped}</td>
<td>${mapped.toDouble / total * 100}%</td> <td>${secondary}</td>
<td>${(mapped - secondary).toDouble / (total - secondary) * 100}%</td>
<td>${duplicates}</td> <td>${duplicates}</td>
<td>${duplicates.toDouble / total * 100}%</td> <td>${duplicates.toDouble / (total - secondary) * 100}%</td>
</tr> </tr>
#end #end
#end #end
......
...@@ -17,14 +17,14 @@ package nl.lumc.sasc.biopet.pipelines.bammetrics ...@@ -17,14 +17,14 @@ package nl.lumc.sasc.biopet.pipelines.bammetrics
import java.io.File import java.io.File
import nl.lumc.sasc.biopet.core.annotations.{ AnnotationRefFlat, RibosomalRefFlat } import nl.lumc.sasc.biopet.core.annotations.{ AnnotationRefFlat, RibosomalRefFlat }
import nl.lumc.sasc.biopet.utils.config.Configurable
import nl.lumc.sasc.biopet.core.summary.SummaryQScript import nl.lumc.sasc.biopet.core.summary.SummaryQScript
import nl.lumc.sasc.biopet.core.{ BiopetFifoPipe, PipelineCommand, Reference, SampleLibraryTag } import nl.lumc.sasc.biopet.core.{ BiopetFifoPipe, PipelineCommand, Reference, SampleLibraryTag }
import nl.lumc.sasc.biopet.extensions.bedtools.{ BedtoolsCoverage, BedtoolsIntersect } import nl.lumc.sasc.biopet.extensions.bedtools.{ BedtoolsCoverage, BedtoolsIntersect, BedtoolsSort }
import nl.lumc.sasc.biopet.extensions.picard._ import nl.lumc.sasc.biopet.extensions.picard._
import nl.lumc.sasc.biopet.extensions.samtools.SamtoolsFlagstat import nl.lumc.sasc.biopet.extensions.samtools.SamtoolsFlagstat
import nl.lumc.sasc.biopet.pipelines.bammetrics.scripts.CoverageStats
import nl.lumc.sasc.biopet.extensions.tools.BiopetFlagstat import nl.lumc.sasc.biopet.extensions.tools.BiopetFlagstat
import nl.lumc.sasc.biopet.pipelines.bammetrics.scripts.CoverageStats
import nl.lumc.sasc.biopet.utils.config.Configurable
import nl.lumc.sasc.biopet.utils.intervals.BedCheck import nl.lumc.sasc.biopet.utils.intervals.BedCheck
import org.broadinstitute.gatk.queue.QScript import org.broadinstitute.gatk.queue.QScript
...@@ -41,6 +41,8 @@ class BamMetrics(val root: Configurable) extends QScript ...@@ -41,6 +41,8 @@ class BamMetrics(val root: Configurable) extends QScript
@Input(doc = "Bam File", shortName = "BAM", required = true) @Input(doc = "Bam File", shortName = "BAM", required = true)
var inputBam: File = _ var inputBam: File = _
override def defaults = Map("bedtoolscoverage" -> Map("sorted" -> true))
/** return location of summary file */ /** return location of summary file */
def summaryFile = (sampleId, libId) match { def summaryFile = (sampleId, libId) match {
case (Some(s), Some(l)) => new File(outputDir, s + "-" + l + ".BamMetrics.summary.json") case (Some(s), Some(l)) => new File(outputDir, s + "-" + l + ".BamMetrics.summary.json")
...@@ -164,7 +166,11 @@ class BamMetrics(val root: Configurable) extends QScript ...@@ -164,7 +166,11 @@ class BamMetrics(val root: Configurable) extends QScript
addSummarizable(biopetFlagstatLoose, targetName + "_biopet_flagstat_loose") addSummarizable(biopetFlagstatLoose, targetName + "_biopet_flagstat_loose")
add(new BiopetFifoPipe(this, List(biLoose, biopetFlagstatLoose))) add(new BiopetFifoPipe(this, List(biLoose, biopetFlagstatLoose)))
val bedCov = BedtoolsCoverage(this, intervals.bed, inputBam, depth = true) val sorter = new BedtoolsSort(this)
sorter.input = intervals.bed
sorter.output = swapExt(targetDir, intervals.bed, ".bed", ".sorted.bed")
add(sorter)
val bedCov = BedtoolsCoverage(this, sorter.output, inputBam, depth = true)
val covStats = CoverageStats(this, targetDir, inputBam.getName.stripSuffix(".bam") + ".coverage") val covStats = CoverageStats(this, targetDir, inputBam.getName.stripSuffix(".bam") + ".coverage")
covStats.title = Some("Coverage Plot") covStats.title = Some("Coverage Plot")
covStats.subTitle = Some(s"for file '$targetName.bed'") covStats.subTitle = Some(s"for file '$targetName.bed'")
......
...@@ -56,7 +56,7 @@ ...@@ -56,7 +56,7 @@
<dependency> <dependency>
<groupId>org.broadinstitute.gatk</groupId> <groupId>org.broadinstitute.gatk</groupId>
<artifactId>gatk-queue</artifactId> <artifactId>gatk-queue</artifactId>
<version>3.5</version> <version>3.6</version>
<exclusions> <exclusions>
<exclusion> <exclusion>
<groupId>org.broadinstitute.gatk</groupId> <groupId>org.broadinstitute.gatk</groupId>
...@@ -67,7 +67,7 @@ ...@@ -67,7 +67,7 @@
<dependency> <dependency>
<groupId>org.broadinstitute.gatk</groupId> <groupId>org.broadinstitute.gatk</groupId>
<artifactId>gatk-queue-extensions-public</artifactId> <artifactId>gatk-queue-extensions-public</artifactId>
<version>3.5</version> <version>3.6</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.scalatra.scalate</groupId> <groupId>org.scalatra.scalate</groupId>
......
...@@ -69,11 +69,15 @@ trait BiopetQScript extends Configurable with GatkLogging { qscript: QScript => ...@@ -69,11 +69,15 @@ trait BiopetQScript extends Configurable with GatkLogging { qscript: QScript =>
outputDir = outputDir.getAbsoluteFile outputDir = outputDir.getAbsoluteFile
init() init()
biopetScript() biopetScript()
logger.info("Biopet script done")
if (disableScatter) for (function <- functions) function match { if (disableScatter) {
logger.info("Disable scatters")
for (function <- functions) function match {
case f: ScatterGatherableFunction => f.scatterCount = 1 case f: ScatterGatherableFunction => f.scatterCount = 1
case _ => case _ =>
} }
}
this match { this match {
case q: MultiSampleQScript if q.onlySamples.nonEmpty && !q.samples.forall(x => q.onlySamples.contains(x._1)) => case q: MultiSampleQScript if q.onlySamples.nonEmpty && !q.samples.forall(x => q.onlySamples.contains(x._1)) =>
...@@ -81,6 +85,7 @@ trait BiopetQScript extends Configurable with GatkLogging { qscript: QScript => ...@@ -81,6 +85,7 @@ trait BiopetQScript extends Configurable with GatkLogging { qscript: QScript =>
case _ => reportClass.foreach(add(_)) case _ => reportClass.foreach(add(_))
} }
logger.info("Running pre commands")
for (function <- functions) function match { for (function <- functions) function match {
case f: BiopetCommandLineFunction => case f: BiopetCommandLineFunction =>
f.preProcessExecutable() f.preProcessExecutable()
...@@ -94,7 +99,8 @@ trait BiopetQScript extends Configurable with GatkLogging { qscript: QScript => ...@@ -94,7 +99,8 @@ trait BiopetQScript extends Configurable with GatkLogging { qscript: QScript =>
globalConfig.writeReport(qSettings.runName, new File(outputDir, ".log/" + qSettings.runName)) globalConfig.writeReport(qSettings.runName, new File(outputDir, ".log/" + qSettings.runName))
else Logging.addError("Parent of output dir: '" + outputDir.getParent + "' is not writeable, output directory cannot be created") else Logging.addError("Parent of output dir: '" + outputDir.getParent + "' is not writeable, output directory cannot be created")
inputFiles.foreach { i => logger.info("Checking input files")
inputFiles.par.foreach { i =>
if (!i.file.exists()) Logging.addError(s"Input file does not exist: ${i.file}") if (!i.file.exists()) Logging.addError(s"Input file does not exist: ${i.file}")
if (!i.file.canRead) Logging.addError(s"Input file can not be read: ${i.file}") if (!i.file.canRead) Logging.addError(s"Input file can not be read: ${i.file}")
if (!i.file.isAbsolute) Logging.addError(s"Input file should be an absolute path: ${i.file}") if (!i.file.isAbsolute) Logging.addError(s"Input file should be an absolute path: ${i.file}")
...@@ -111,6 +117,7 @@ trait BiopetQScript extends Configurable with GatkLogging { qscript: QScript => ...@@ -111,6 +117,7 @@ trait BiopetQScript extends Configurable with GatkLogging { qscript: QScript =>
if (logger.isDebugEnabled) WriteDependencies.writeDependencies(functions, new File(outputDir, s".log/${qSettings.runName}.deps.json")) if (logger.isDebugEnabled) WriteDependencies.writeDependencies(functions, new File(outputDir, s".log/${qSettings.runName}.deps.json"))
Logging.checkErrors() Logging.checkErrors()
logger.info("Script complete without errors")
} }
/** Get implemented from org.broadinstitute.gatk.queue.QScript */ /** Get implemented from org.broadinstitute.gatk.queue.QScript */
......
...@@ -24,10 +24,10 @@ import org.broadinstitute.gatk.queue.QScript ...@@ -24,10 +24,10 @@ import org.broadinstitute.gatk.queue.QScript
/** This trait creates a structured way of use multisample pipelines */ /** This trait creates a structured way of use multisample pipelines */
trait MultiSampleQScript extends SummaryQScript { qscript: QScript => trait MultiSampleQScript extends SummaryQScript { qscript: QScript =>
@Argument(doc = "Only Sample", shortName = "s", required = false, fullName = "sample") @Argument(doc = "Only Process This Sample", shortName = "s", required = false, fullName = "sample")
private[core] val onlySamples: List[String] = Nil private[core] val onlySamples: List[String] = Nil
require(globalConfig.map.contains("samples"), "No Samples found in config") if (!globalConfig.map.contains("samples")) Logging.addError("No Samples found in config")
/** Sample class with basic functions build in */ /** Sample class with basic functions build in */
abstract class AbstractSample(val sampleId: String) extends Summarizable { sample => abstract class AbstractSample(val sampleId: String) extends Summarizable { sample =>
...@@ -190,7 +190,7 @@ trait MultiSampleQScript extends SummaryQScript { qscript: QScript => ...@@ -190,7 +190,7 @@ trait MultiSampleQScript extends SummaryQScript { qscript: QScript =>
val samples: Map[String, Sample] = sampleIds.map(id => id -> makeSample(id)).toMap val samples: Map[String, Sample] = sampleIds.map(id => id -> makeSample(id)).toMap
/** Returns a list of all sampleIDs */ /** Returns a list of all sampleIDs */
protected def sampleIds: Set[String] = ConfigUtils.any2map(globalConfig.map("samples")).keySet protected def sampleIds: Set[String] = ConfigUtils.any2map(globalConfig.map.getOrElse("samples", Map())).keySet
protected lazy val nameRegex = """^[a-zA-Z0-9][a-zA-Z0-9-_]+[a-zA-Z0-9]$""".r protected lazy val nameRegex = """^[a-zA-Z0-9][a-zA-Z0-9-_]+[a-zA-Z0-9]$""".r
protected lazy val nameError = "has an invalid name. " + protected lazy val nameError = "has an invalid name. " +
......
...@@ -155,7 +155,7 @@ class VariantEffectPredictor(val root: Configurable) extends BiopetCommandLineFu ...@@ -155,7 +155,7 @@ class VariantEffectPredictor(val root: Configurable) extends BiopetCommandLineFu
super.beforeGraph() super.beforeGraph()
if (!cache && !database) { if (!cache && !database) {
Logging.addError("Must either set 'cache' or 'database' to true for VariantEffectPredictor") Logging.addError("Must either set 'cache' or 'database' to true for VariantEffectPredictor")
} else if (cache && dir.isEmpty) { } else if (cache && dir.isEmpty && dirCache.isEmpty) {
Logging.addError("Must supply 'dir_cache' to cache for VariantEffectPredictor") Logging.addError("Must supply 'dir_cache' to cache for VariantEffectPredictor")
} }
if (statsText) _summary = new File(output.getAbsolutePath + "_summary.txt") if (statsText) _summary = new File(output.getAbsolutePath + "_summary.txt")
......
...@@ -14,13 +14,16 @@ ...@@ -14,13 +14,16 @@
*/ */
package nl.lumc.sasc.biopet.extensions.bedtools package nl.lumc.sasc.biopet.extensions.bedtools
import java.io.File import java.io.{ File, PrintWriter }
import nl.lumc.sasc.biopet.core.Reference
import nl.lumc.sasc.biopet.utils.config.Configurable import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Argument, Input, Output } import org.broadinstitute.gatk.utils.commandline.{ Argument, Input, Output }
import scala.io.Source
/** Extension for bedtools coverage */ /** Extension for bedtools coverage */
class BedtoolsCoverage(val root: Configurable) extends Bedtools { class BedtoolsCoverage(val root: Configurable) extends Bedtools with Reference {
@Input(doc = "Input file (bed/gff/vcf/bam)") @Input(doc = "Input file (bed/gff/vcf/bam)")
var input: File = null var input: File = null
...@@ -40,6 +43,9 @@ class BedtoolsCoverage(val root: Configurable) extends Bedtools { ...@@ -40,6 +43,9 @@ class BedtoolsCoverage(val root: Configurable) extends Bedtools {
@Argument(doc = "diffStrand", required = false) @Argument(doc = "diffStrand", required = false)
var diffStrand: Boolean = false var diffStrand: Boolean = false
@Argument(doc = "sorted", required = false)
var sorted: Boolean = config("sorted", default = false, freeVar = false)
override def defaultCoreMemory = 4.0 override def defaultCoreMemory = 4.0
/** Returns command to execute */ /** Returns command to execute */
...@@ -49,7 +55,10 @@ class BedtoolsCoverage(val root: Configurable) extends Bedtools { ...@@ -49,7 +55,10 @@ class BedtoolsCoverage(val root: Configurable) extends Bedtools {
conditional(depth, "-d") + conditional(depth, "-d") +
conditional(sameStrand, "-s") + conditional(sameStrand, "-s") +
conditional(diffStrand, "-S") + conditional(diffStrand, "-S") +
conditional(sorted, "-sorted") +
(if (sorted) required("-g", BedtoolsCoverage.getGenomeFile(referenceFai, jobTempDir)) else "") +
(if (outputAsStsout) "" else " > " + required(output)) (if (outputAsStsout) "" else " > " + required(output))
} }
object BedtoolsCoverage { object BedtoolsCoverage {
...@@ -65,4 +74,27 @@ object BedtoolsCoverage { ...@@ -65,4 +74,27 @@ object BedtoolsCoverage {
bedtoolsCoverage.diffStrand = diffStrand bedtoolsCoverage.diffStrand = diffStrand
bedtoolsCoverage bedtoolsCoverage
} }
private var genomeCache: Map[(File, File), File] = Map()
def getGenomeFile(fai: File, dir: File): File = {
if (!genomeCache.contains((fai, dir))) genomeCache += (fai, dir) -> createGenomeFile(fai, dir)
genomeCache((fai, dir))
}
/**
* Creates the genome file. i.e. the first two columns of the fasta index
* @return
*/
def createGenomeFile(fai: File, dir: File): File = {
val tmp = File.createTempFile(fai.getName, ".genome", dir)
tmp.deleteOnExit()
val writer = new PrintWriter(tmp)
Source.fromFile(fai).
getLines().
map(s => s.split("\t").take(2).mkString("\t")).
foreach(f => writer.println(f))
writer.close()
tmp
}
} }
\ No newline at end of file
package nl.lumc.sasc.biopet.extensions.bedtools
import java.io.File
import nl.lumc.sasc.biopet.core.Reference
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Argument, Input, Output }
/**
* Created by Sander Bollen on 26-5-16.
*/
class BedtoolsSort(val root: Configurable) extends Bedtools with Reference {
@Input
var input: File = null
@Output
var output: File = null
@Argument(required = false)
var faidx: File = referenceFai
def cmdLine = required(executable) + required("sort") + required("-i", input) +
optional("-faidx", faidx) +
(if (outputAsStsout) "" else " > " + required(output))
}
...@@ -16,17 +16,16 @@ package nl.lumc.sasc.biopet.extensions.gatk ...@@ -16,17 +16,16 @@ package nl.lumc.sasc.biopet.extensions.gatk
import java.io.File import java.io.File
import nl.lumc.sasc.biopet.core.ScatterGatherableFunction
import nl.lumc.sasc.biopet.utils.VcfUtils import nl.lumc.sasc.biopet.utils.VcfUtils
import nl.lumc.sasc.biopet.utils.config.Configurable import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile
import org.broadinstitute.gatk.utils.commandline.{ Argument, Gather, Output, _ } import org.broadinstitute.gatk.utils.commandline.{ Argument, Gather, Output, Input }
//TODO: check gathering class BaseRecalibrator(val root: Configurable) extends CommandLineGATK with ScatterGatherableFunction {
class BaseRecalibrator(val root: Configurable) extends CommandLineGATK /* with ScatterGatherableFunction */ {
def analysis_type = "BaseRecalibrator" def analysis_type = "BaseRecalibrator"
//TODO: check gathering scatterClass = classOf[ContigScatterFunction]
//scatterClass = classOf[ContigScatterFunction] setupScatterFunction = { case scatter: GATKScatterFunction => scatter.includeUnmapped = false }
//setupScatterFunction = { case scatter: GATKScatterFunction => scatter.includeUnmapped = false }
/** A database of known polymorphic sites */ /** A database of known polymorphic sites */
@Input(fullName = "knownSites", shortName = "knownSites", doc = "A database of known polymorphic sites", required = false, exclusiveOf = "", validation = "") @Input(fullName = "knownSites", shortName = "knownSites", doc = "A database of known polymorphic sites", required = false, exclusiveOf = "", validation = "")
...@@ -38,7 +37,7 @@ class BaseRecalibrator(val root: Configurable) extends CommandLineGATK /* with S ...@@ -38,7 +37,7 @@ class BaseRecalibrator(val root: Configurable) extends CommandLineGATK /* with S
/** The output recalibration table file to create */ /** The output recalibration table file to create */
@Output(fullName = "out", shortName = "o", doc = "The output recalibration table file to create", required = true, exclusiveOf = "", validation = "") //TODO: check gathering @Output(fullName = "out", shortName = "o", doc = "The output recalibration table file to create", required = true, exclusiveOf = "", validation = "") //TODO: check gathering
//@Gather(classOf[org.broadinstitute.gatk.engine.recalibration.BQSRGatherer]) @Gather(classOf[org.broadinstitute.gatk.engine.recalibration.BQSRGatherer])
var out: File = _ var out: File = _
/** One or more covariates to be used in the recalibration. Can be specified multiple times */ /** One or more covariates to be used in the recalibration. Can be specified multiple times */
......
...@@ -58,6 +58,8 @@ class CatVariants(val root: Configurable) extends BiopetJavaCommandLineFunction ...@@ -58,6 +58,8 @@ class CatVariants(val root: Configurable) extends BiopetJavaCommandLineFunction
@Gather(classOf[org.broadinstitute.gatk.queue.function.scattergather.SimpleTextGatherFunction]) @Gather(classOf[org.broadinstitute.gatk.queue.function.scattergather.SimpleTextGatherFunction])
var log_to_file: File = _ var log_to_file: File = _
override def defaultCoreMemory = 4.0
override def beforeGraph() = { override def beforeGraph() = {
super.beforeGraph() super.beforeGraph()
if (reference == null) reference = referenceFasta() if (reference == null) reference = referenceFasta()
......
...@@ -90,7 +90,7 @@ class HaplotypeCaller(val root: Configurable) extends CommandLineGATK with Scatt ...@@ -90,7 +90,7 @@ class HaplotypeCaller(val root: Configurable) extends CommandLineGATK with Scatt
/** Mode for emitting reference confidence scores */ /** Mode for emitting reference confidence scores */
@Argument(fullName = "emitRefConfidence", shortName = "ERC", doc = "Mode for emitting reference confidence scores", required = false, exclusiveOf = "", validation = "") @Argument(fullName = "emitRefConfidence", shortName = "ERC", doc = "Mode for emitting reference confidence scores", required = false, exclusiveOf = "", validation = "")
var emitRefConfidence: String = _ var emitRefConfidence: Option[String] = config("emitRefConfidence")
/** File to which assembled haplotypes should be written */ /** File to which assembled haplotypes should be written */
@Output(fullName = "bamOutput", shortName = "bamout", doc = "File to which assembled haplotypes should be written", required = false, exclusiveOf = "", validation = "") @Output(fullName = "bamOutput", shortName = "bamout", doc = "File to which assembled haplotypes should be written", required = false, exclusiveOf = "", validation = "")
...@@ -522,12 +522,4 @@ object HaplotypeCaller { ...@@ -522,12 +522,4 @@ object HaplotypeCaller {
hc.out = outputFile hc.out = outputFile
hc hc
} }
def gvcf(root: Configurable, inputFile: File, outputFile: File): HaplotypeCaller = {
val hc = apply(root, List(inputFile), outputFile)
hc.emitRefConfidence = "GVCF"
hc.variant_index_type = Some("LINEAR")
hc.variant_index_parameter = Some(hc.config("variant_index_parameter", default = 128000).asInt)
hc
}
} }
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.extensions.hisat
import java.io.File
import nl.lumc.sasc.biopet.core.{ BiopetCommandLineFunction, Reference, Version }
import nl.lumc.sasc.biopet.utils.Logging
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
/**
* Extension for hisat2
*
* Based on version 2.0.4
*/
class Hisat2(val root: Configurable) extends BiopetCommandLineFunction with Reference with Version {
// TODO: handle --sra-acc flag. This is currently unsupported by the wrapper.
@Input(doc = "Fastq file R1", shortName = "R1")
var R1: File = null
@Input(doc = "Fastq file R2", shortName = "R2", required = false)
var R2: Option[File] = None
@Output(doc = "Output file SAM", shortName = "output", required = true)
var output: File = null
executable = config("exe", default = "hisat2", freeVar = false)
def versionRegex = """.*hisat2-align-s version (.*)""".r
override def versionExitcode = List(0, 1)
def versionCommand = executable + " --version"
override def defaultCoreMemory = 4.0
override def defaultThreads = 4
/* Required */
var hisat2Index: String = config("hisat2_index")
/* Input options */
var q: Boolean = config("q", default = false)
var qseq: Boolean = config("qseq", default = false)
var f: Boolean = config("f", default = false)
var r: Boolean = config("r", default = false)
var c: Boolean = config("c", default = false)
var skip: Option[Int] = config("skip")
var upto: Option[Int] = config("upto")
var trim5: Option[Int] = config("trim5")
var trim3: Option[Int] = config("trim3")
var phred33: Boolean = config("phred33", default = false)
var phred64: Boolean = config("phred64", default = false)
var intQuals: Boolean = config("int_quals", default = false)
/* Alignment options */
var nCeil: Option[String] = config("n_ceil")
var ignoreQuals: Boolean = config("ignore_quals", default = false)
var nofw: Boolean = config("nofw", default = false)
var norc: Boolean = config("norc", default = false)
/* Spliced alignment */
var penCansplice: Option[Int] = config("pen_cansplice")
var penNoncansplice: Option[Int] = config("pen_noncansplice")
var penCanintronlen: Option[Int] = config("pen_canintronlen")
var penNoncanintronlen: Option[Int] = config("pen_noncanintronlen")
var minIntronlen: Option[Int] = config("min_intronlen")
var maxIntronlen: Option[Int] = config("max_intronlen")
var knownSplicesiteInfile: Option[File] = config("known_splicesite_infile")
var novelSplicesiteOutfile: Option[File] = config("novel_splicesite_outfile")
var novelSplicesiteInfile: Option[File] = config("novel_splicesite_infile")
var noTempSplicesite: Boolean = config("no_temp_splicesite", default = false)
var noSplicedAlignment: Boolean = config("no_spliced_alignment", default = false)
var rnaStrandness: Option[String] = config("rna_strandness")
var tmo: Boolean = config("tmo", default = false)
var dta: Boolean = config("dta", default = false)
var dtaCufflinks: Boolean = config("dta_cufflinks", default = false)
/* Scoring */
var ma: Option[Int] = config("ma")
var mp: Option[String] = config("mp") // TODO: should be (Int, Int)
var sp: Option[String] = config("sp") // TODO: should be (Int, Int)
var np: Option[Int] = config("np")
var rdg: Option[String] = config("rdg") // TODO: should be (Int, Int)
var rfg: Option[String] = config("rfg") // TODO: should be (Int, Int)
var scoreMin: Option[String] = config("score_min")
/* Reporting */
var k: Option[Int] = config("k")
var all: Option[Int] = config("all")
/* Paired-end */
var fr: Boolean = config("fr", default = false)
var rf: Boolean = config("rf", default = false)
var ff: Boolean = config("ff", default = false)
var noMixed: Boolean = config("no_mixed", default = false)
var noDiscordant: Boolean = config("no_discordant", default = false)
/* Output */