Commit 2015429b authored by Wai Yi Leung's avatar Wai Yi Leung

Merge branch 'feature-reference' into 'develop'

Feature reference

Part of issue #81

This is only checking and not auto generating anything. Adding auto generate is now not a lot of work anymore.

See merge request !147
parents 8d7527db 8d604404
......@@ -5,10 +5,10 @@
*/
package nl.lumc.sasc.biopet.extensions.gatk.broad
import nl.lumc.sasc.biopet.core.BiopetJavaCommandLineFunction
import nl.lumc.sasc.biopet.core.{ Reference, BiopetJavaCommandLineFunction }
import org.broadinstitute.gatk.queue.extensions.gatk.CommandLineGATK
trait GatkGeneral extends CommandLineGATK with BiopetJavaCommandLineFunction {
trait GatkGeneral extends CommandLineGATK with BiopetJavaCommandLineFunction with Reference {
memoryLimit = Option(3)
override def subPath = "gatk" :: super.subPath
......@@ -19,7 +19,11 @@ trait GatkGeneral extends CommandLineGATK with BiopetJavaCommandLineFunction {
if (config.contains("intervals")) intervals = config("intervals").asFileList
if (config.contains("exclude_intervals")) excludeIntervals = config("exclude_intervals").asFileList
reference_sequence = config("reference")
if (config.contains("gatk_key")) gatk_key = config("gatk_key")
if (config.contains("pedigree")) pedigree = config("pedigree")
override def beforeGraph: Unit = {
super.beforeGraph
if (reference_sequence == null) reference_sequence = referenceFasta()
}
}
@HD VN:1.4 SO:unsorted
@SQ SN:chr1 LN:9 UR:file:/home/pjvan_thof/pipelines/biopet/public/mapping/src/test/resources/ref.fa M5:fe15dbbd0900310caf32827f6da57550
......@@ -5,6 +5,8 @@
*/
package nl.lumc.sasc.biopet.pipelines.gatk
import java.io.{ FileOutputStream, File }
import com.google.common.io.Files
import nl.lumc.sasc.biopet.core.config.Config
import nl.lumc.sasc.biopet.extensions.bwa.BwaMem
......@@ -90,10 +92,22 @@ class ShivaTest extends TestNGSuite with Matchers {
object ShivaTest {
val outputDir = Files.createTempDir()
private def copyFile(name: String): Unit = {
val is = getClass.getResourceAsStream("/" + name)
val os = new FileOutputStream(new File(outputDir, name))
org.apache.commons.io.IOUtils.copy(is, os)
os.close()
}
copyFile("ref.fa")
copyFile("ref.dict")
copyFile("ref.fa.fai")
val config = Map(
"name_prefix" -> "test",
"output_dir" -> outputDir,
"reference" -> "test",
"reference" -> (outputDir + File.separator + "ref.fa"),
"reference_fasta" -> (outputDir + File.separator + "ref.fa"),
"gatk_jar" -> "test",
"samtools" -> Map("exe" -> "test"),
"bcftools" -> Map("exe" -> "test"),
......
......@@ -5,7 +5,7 @@
*/
package nl.lumc.sasc.biopet.pipelines.gatk
import java.io.File
import java.io.{ FileOutputStream, File }
import com.google.common.io.Files
import nl.lumc.sasc.biopet.core.config.Config
......@@ -106,10 +106,22 @@ class ShivaVariantcallingTest extends TestNGSuite with Matchers {
object ShivaVariantcallingTest {
val outputDir = Files.createTempDir()
private def copyFile(name: String): Unit = {
val is = getClass.getResourceAsStream("/" + name)
val os = new FileOutputStream(new File(outputDir, name))
org.apache.commons.io.IOUtils.copy(is, os)
os.close()
}
copyFile("ref.fa")
copyFile("ref.dict")
copyFile("ref.fa.fai")
val config = Map(
"name_prefix" -> "test",
"output_dir" -> outputDir,
"reference" -> "test",
"reference" -> (outputDir + File.separator + "ref.fa"),
"reference_fasta" -> (outputDir + File.separator + "ref.fa"),
"gatk_jar" -> "test",
"samtools" -> Map("exe" -> "test"),
"bcftools" -> Map("exe" -> "test"),
......
@HD VN:1.4 SO:unsorted
@SQ SN:chr1 LN:9 UR:file:/home/pjvan_thof/pipelines/biopet/public/mapping/src/test/resources/ref.fa M5:fe15dbbd0900310caf32827f6da57550
package nl.lumc.sasc.biopet.pipelines.bammetrics
import java.io.File
import java.io.{ FileOutputStream, File }
import com.google.common.io.Files
import nl.lumc.sasc.biopet.core.config.Config
......@@ -81,9 +81,20 @@ class BamMetricsTest extends TestNGSuite with Matchers {
object BamMetricsTest {
val outputDir = Files.createTempDir()
private def copyFile(name: String): Unit = {
val is = getClass.getResourceAsStream("/" + name)
val os = new FileOutputStream(new File(outputDir, name))
org.apache.commons.io.IOUtils.copy(is, os)
os.close()
}
copyFile("ref.fa")
copyFile("ref.dict")
copyFile("ref.fa.fai")
val executables = Map(
"refFlat" -> "bla.refFlat",
"reference" -> "reference.fa",
"reference_fasta" -> (outputDir + File.separator + "ref.fa"),
"samtools" -> Map("exe" -> "test"),
"bedtools" -> Map("exe" -> "test")
)
......
package nl.lumc.sasc.biopet.core
import java.io.File
import htsjdk.samtools.reference.IndexedFastaSequenceFile
import nl.lumc.sasc.biopet.core.config.Configurable
import scala.collection.JavaConversions._
/**
* Created by pjvan_thof on 4/6/15.
*/
trait Reference extends Configurable {
def referenceSpecies: String = {
root match {
case r: Reference if r.referenceSpecies != "unknown_species" => r.referenceSpecies
case _ => config("species", default = "unknown_species", path = super.configPath)
}
}
def referenceName: String = {
root match {
case r: Reference if r.referenceName != "unknown_ref" => r.referenceName
case _ => {
val default: String = config("default", default = "unknown_ref", path = List("references", referenceSpecies))
config("reference_name", default = default, path = super.configPath)
}
}
}
override def subPath = {
referenceConfigPath ::: super.subPath
}
/** Returns the reference config path */
def referenceConfigPath = {
List("references", referenceSpecies, referenceName)
}
protected def faiRequired = false
protected def dictRequired = false
/** Returns the fasta file */
def referenceFasta(): File = {
val file: File = config("reference_fasta")
checkFasta(file)
val dict = new File(file.getAbsolutePath.stripSuffix(".fa").stripSuffix(".fasta") + ".dict")
val fai = new File(file.getAbsolutePath + ".fai")
this match {
case c: BiopetCommandLineFunctionTrait => c.deps :::= dict :: fai :: Nil
case _ =>
}
file
}
/** Create summary part for reference */
def referenceSummary: Map[String, Any] = {
val file = new IndexedFastaSequenceFile(referenceFasta())
Map("contigs" ->
(for (seq <- file.getSequenceDictionary.getSequences) yield seq.getSequenceName -> {
val md5 = Option(seq.getAttribute("M5"))
Map("md5" -> md5, "length" -> seq.getSequenceLength)
}).toMap,
"species" -> referenceSpecies,
"name" -> referenceName
)
}
//TODO: this become obsolete when index get autogenerated
/** Check fasta file if file exist and index file are there */
def checkFasta(file: File): Unit = {
if (!Reference.checked.contains(file)) {
require(file.exists(), "Reference not found: " + file)
if (dictRequired) {
val dict = new File(file.getAbsolutePath.stripSuffix(".fa").stripSuffix(".fasta") + ".dict")
require(dict.exists(), "Reference is missing a dict file")
}
if (faiRequired) {
val fai = new File(file.getAbsolutePath + ".fai")
require(fai.exists(), "Reference is missing a fai file")
require(IndexedFastaSequenceFile.canCreateIndexedFastaReader(file), "Index of reference cannot be loaded, reference: " + file)
}
Reference.checked += file
}
}
}
object Reference {
/** Used as cache to avoid double checking */
private var checked: Set[File] = Set()
}
......@@ -22,11 +22,11 @@ trait Configurable extends ImplicitConversions {
val root: Configurable
def globalConfig: Config = if (root != null) root.globalConfig else Config.global
/** subfix to the path */
/** suffix to the path */
def subPath: List[String] = Nil
/** Get default path to search config values for current object */
def configPath: List[String] = if (root != null) root.configFullPath ::: subPath else subPath
def configPath: List[String] = if (root != null) root.configFullPath else Nil
/** Gets name of module for config */
protected[core] def configName = getClass.getSimpleName.toLowerCase
......@@ -92,7 +92,7 @@ trait Configurable extends ImplicitConversions {
val s = if (sample != null || defaultSample.isEmpty) sample else defaultSample.get
val l = if (library != null || defaultLibrary.isEmpty) library else defaultLibrary.get
val m = if (submodule != null) submodule else configName
val p = if (path == null) getConfigPath(s, l, submodule) else path
val p = (if (path == null) getConfigPath(s, l, submodule) ::: subPath else path)
val d = {
val value = Config.getValueFromMap(defaults.toMap, ConfigValueIndex(m, p, key, freeVar))
if (value.isDefined) value.get.value else default
......@@ -119,7 +119,7 @@ trait Configurable extends ImplicitConversions {
val s = if (sample != null || defaultSample.isEmpty) sample else defaultSample.get
val l = if (library != null || defaultLibrary.isEmpty) library else defaultLibrary.get
val m = if (submodule != null) submodule else configName
val p = if (path == null) getConfigPath(s, l, submodule) else path
val p = (if (path == null) getConfigPath(s, l, submodule) ::: subPath else path)
globalConfig.contains(m, p, key, freeVar) || !(Config.getValueFromMap(defaults.toMap, ConfigValueIndex(m, p, key, freeVar)) == None)
}
......
......@@ -17,7 +17,7 @@ package nl.lumc.sasc.biopet.extensions
import java.io.File
import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction
import nl.lumc.sasc.biopet.core.{ Reference, BiopetCommandLineFunction }
import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
......@@ -26,7 +26,7 @@ import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
*
* Based on version 1.1.1
*/
class Bowtie(val root: Configurable) extends BiopetCommandLineFunction {
class Bowtie(val root: Configurable) extends BiopetCommandLineFunction with Reference {
@Input(doc = "Fastq file R1", shortName = "R1")
var R1: File = null
......@@ -34,7 +34,7 @@ class Bowtie(val root: Configurable) extends BiopetCommandLineFunction {
var R2: Option[File] = None
@Input(doc = "The reference file for the bam files.", shortName = "R", required = true)
var reference: File = config("reference")
var reference: File = null
@Output(doc = "Output file SAM", shortName = "output", required = true)
var output: File = null
......@@ -59,6 +59,11 @@ class Bowtie(val root: Configurable) extends BiopetCommandLineFunction {
var maqerr: Option[Int] = config("maqerr")
var maxins: Option[Int] = config("maxins")
override def beforeGraph {
super.beforeGraph
if (reference == null) reference = referenceFasta()
}
/** return commandline to execute */
def cmdLine = {
required(executable) +
......
......@@ -19,14 +19,14 @@ import java.io.File
import org.broadinstitute.gatk.utils.commandline.{ Input, Output, Argument }
import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction
import nl.lumc.sasc.biopet.core.{ Reference, BiopetCommandLineFunction }
import nl.lumc.sasc.biopet.core.config.Configurable
/**
* Wrapper for the gsnap command line tool
* Written based on gsnap version 2014-05-15
*/
class Gsnap(val root: Configurable) extends BiopetCommandLineFunction {
class Gsnap(val root: Configurable) extends BiopetCommandLineFunction with Reference {
/** default executable */
executable = config("exe", default = "gsnap", freeVar = false)
......
......@@ -17,12 +17,12 @@ package nl.lumc.sasc.biopet.extensions
import java.io.File
import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction
import nl.lumc.sasc.biopet.core.{ Reference, BiopetCommandLineFunction }
import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
/** Extension for stampy */
class Stampy(val root: Configurable) extends BiopetCommandLineFunction {
class Stampy(val root: Configurable) extends BiopetCommandLineFunction with Reference {
@Input(doc = "FastQ file R1", shortName = "R1")
var R1: File = _
......@@ -30,7 +30,7 @@ class Stampy(val root: Configurable) extends BiopetCommandLineFunction {
var R2: File = _
@Input(doc = "The reference file for the bam files.", shortName = "ref")
var reference: File = config("reference")
var reference: File = null
@Input(doc = "The genome prefix.")
var genome: File = config("genome")
......@@ -73,6 +73,7 @@ class Stampy(val root: Configurable) extends BiopetCommandLineFunction {
override def beforeGraph: Unit = {
super.beforeGraph
require(readgroup != null)
if (reference == null) reference = referenceFasta()
}
/** Returns command to execute */
......
......@@ -17,16 +17,16 @@ package nl.lumc.sasc.biopet.extensions
import java.io.File
import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction
import nl.lumc.sasc.biopet.core.{ Reference, BiopetCommandLineFunction }
import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Argument, Input, Output }
/**
* Extension for STAR
*/
class Star(val root: Configurable) extends BiopetCommandLineFunction {
class Star(val root: Configurable) extends BiopetCommandLineFunction with Reference {
@Input(doc = "The reference file for the bam files.", required = false)
var reference: File = new File(config("reference"))
var reference: File = null
@Input(doc = "Fastq file R1", required = false)
var R1: File = _
......@@ -57,7 +57,7 @@ class Star(val root: Configurable) extends BiopetCommandLineFunction {
@Argument(doc = "Output Directory")
var outputDir: File = _
var genomeDir: File = config("genomeDir", new File(reference.getAbsoluteFile.getParent, "star"))
var genomeDir: File = null
var runmode: String = _
var sjdbOverhang: Int = _
var outFileNamePrefix: String = _
......@@ -68,6 +68,9 @@ class Star(val root: Configurable) extends BiopetCommandLineFunction {
/** Sets output files for the graph */
override def beforeGraph() {
super.beforeGraph
if (reference == null) reference = referenceFasta()
genomeDir = config("genomeDir", new File(reference.getAbsoluteFile.getParent, "star"))
if (outFileNamePrefix != null && !outFileNamePrefix.endsWith(".")) outFileNamePrefix += "."
val prefix = if (outFileNamePrefix != null) outputDir + outFileNamePrefix else outputDir
if (runmode == null) {
......
......@@ -17,7 +17,7 @@ package nl.lumc.sasc.biopet.extensions
import java.io.File
import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction
import nl.lumc.sasc.biopet.core.{ Reference, BiopetCommandLineFunction }
import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Output, Input }
......@@ -25,7 +25,7 @@ import org.broadinstitute.gatk.utils.commandline.{ Output, Input }
* Extension for VariantEffectPredictor
* Created by ahbbollen on 15-1-15.
*/
class VariantEffectPredictor(val root: Configurable) extends BiopetCommandLineFunction {
class VariantEffectPredictor(val root: Configurable) extends BiopetCommandLineFunction with Reference {
executable = config("exe", submodule = "perl", default = "perl")
var vep_script: String = config("vep_script")
......
......@@ -17,6 +17,7 @@ package nl.lumc.sasc.biopet.extensions.bwa
import java.io.File
import nl.lumc.sasc.biopet.core.Reference
import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Output, Input }
......@@ -27,12 +28,12 @@ import org.broadinstitute.gatk.utils.commandline.{ Output, Input }
*
* Created by pjvan_thof on 1/16/15.
*/
class BwaAln(val root: Configurable) extends Bwa {
class BwaAln(val root: Configurable) extends Bwa with Reference {
@Input(doc = "Fastq file", required = true)
var fastq: File = _
@Input(doc = "The reference file for the bam files.", required = true)
var reference: File = config("reference")
var reference: File = null
@Output(doc = "Output file SAM", required = false)
var output: File = _
......@@ -63,6 +64,11 @@ class BwaAln(val root: Configurable) extends Bwa {
override val defaultCoreMemory = 4.0
override val defaultThreads = 8
override def beforeGraph {
super.beforeGraph
if (reference == null) reference = referenceFasta()
}
/** Returns command to execute */
def cmdLine = required(executable) +
required("aln") +
......
......@@ -17,7 +17,7 @@ package nl.lumc.sasc.biopet.extensions.bwa
import java.io.File
import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction
import nl.lumc.sasc.biopet.core.{ Reference, BiopetCommandLineFunction }
import nl.lumc.sasc.biopet.core.config.Configurable
import nl.lumc.sasc.biopet.core.summary.Summarizable
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
......@@ -27,7 +27,7 @@ import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
*
* Based on version 0.7.12-r1039
*/
class BwaMem(val root: Configurable) extends Bwa {
class BwaMem(val root: Configurable) extends Bwa with Reference {
@Input(doc = "Fastq file R1", shortName = "R1")
var R1: File = _
......@@ -35,7 +35,7 @@ class BwaMem(val root: Configurable) extends Bwa {
var R2: File = _
@Input(doc = "The reference file for the bam files.", shortName = "R")
var reference: File = config("reference")
var reference: File = null
@Output(doc = "Output file SAM", shortName = "output")
var output: File = _
......@@ -72,6 +72,11 @@ class BwaMem(val root: Configurable) extends Bwa {
override val defaultCoreMemory = 4.0
override val defaultThreads = 8
override def beforeGraph {
super.beforeGraph
if (reference == null) reference = referenceFasta()
}
def cmdLine = {
required(executable) +
required("mem") +
......
......@@ -17,6 +17,7 @@ package nl.lumc.sasc.biopet.extensions.bwa
import java.io.File
import nl.lumc.sasc.biopet.core.Reference
import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Output, Input }
......@@ -26,7 +27,7 @@ import org.broadinstitute.gatk.utils.commandline.{ Output, Input }
* based on executable version 0.7.10-r789
*
*/
class BwaSampe(val root: Configurable) extends Bwa {
class BwaSampe(val root: Configurable) extends Bwa with Reference {
@Input(doc = "Fastq file R1", required = true)
var fastqR1: File = _
......@@ -40,7 +41,7 @@ class BwaSampe(val root: Configurable) extends Bwa {
var saiR2: File = _
@Input(doc = "The reference file for the bam files.", required = true)
var reference: File = config("reference")
var reference: File = null
@Output(doc = "Output file SAM", required = false)
var output: File = _
......@@ -56,6 +57,11 @@ class BwaSampe(val root: Configurable) extends Bwa {
var r: String = _
override def beforeGraph {
super.beforeGraph
if (reference == null) reference = referenceFasta()
}
def cmdLine = required(executable) +
required("sampe") +
optional("-a", a) +
......
......@@ -17,6 +17,7 @@ package nl.lumc.sasc.biopet.extensions.bwa
import java.io.File
import nl.lumc.sasc.biopet.core.Reference
import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Output, Input }
......@@ -26,7 +27,7 @@ import org.broadinstitute.gatk.utils.commandline.{ Output, Input }
* based on executable version 0.7.10-r789
*
*/
class BwaSamse(val root: Configurable) extends Bwa {
class BwaSamse(val root: Configurable) extends Bwa with Reference {
@Input(doc = "Fastq file", required = true)
var fastq: File = _
......@@ -34,7 +35,7 @@ class BwaSamse(val root: Configurable) extends Bwa {
var sai: File = _
@Input(doc = "The reference file for the bam files.", required = true)
var reference: File = config("reference")
var reference: File = null
@Output(doc = "Output file SAM", required = false)
var output: File = _
......@@ -42,6 +43,11 @@ class BwaSamse(val root: Configurable) extends Bwa {
var n: Option[Int] = config("n")
var r: String = _
override def beforeGraph {
super.beforeGraph
if (reference == null) reference = referenceFasta()
}
/** Returns command to execute */
def cmdLine = required(executable) +
required("samse") +
......
......@@ -17,14 +17,14 @@ package nl.lumc.sasc.biopet.extensions.gatk
import java.io.File
import nl.lumc.sasc.biopet.core.BiopetJavaCommandLineFunction
import nl.lumc.sasc.biopet.core.{ Reference, BiopetJavaCommandLineFunction }
import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.utils.commandline.Input
/**
* Created by pjvan_thof on 2/26/15.
*/
abstract class Gatk extends BiopetJavaCommandLineFunction {
abstract class Gatk extends BiopetJavaCommandLineFunction with Reference {
override def subPath = "gatk" :: super.subPath
jarFile = config("gatk_jar")
......@@ -34,7 +34,7 @@ abstract class Gatk extends BiopetJavaCommandLineFunction {
override val defaultCoreMemory = 3.0
@Input(required = true)
var reference: File = config("reference")
var reference: File = null
@Input(required = false)
var gatkKey: Option[File] = config("gatk_key")
......@@ -48,6 +48,13 @@ abstract class Gatk extends BiopetJavaCommandLineFunction {
@Input(required = false)
var pedigree: List[File] = config("pedigree", default = Nil)
override def dictRequired = true
override def beforeGraph: Unit = {
super.beforeGraph
if (reference == null) reference = referenceFasta()
}
override def commandLine = super.commandLine +
required("-T", analysisType) +
required("-R", reference) +
......
......@@ -2,20 +2,21 @@ package nl.lumc.sasc.biopet.extensions.picard
import java.io.File
import nl.lumc.sasc.biopet.core.Reference
import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Output, Input }
/**
* Created by pjvan_thof on 4/15/15.
*/
class BedToIntervalList(val root: Configurable) extends Picard {
class BedToIntervalList(val root: Configurable) extends Picard with Reference {
javaMainClass = new picard.util.BedToIntervalList().getClass.getName
@Input(doc = "Input bed file", required = true)
var input: File = null
@Input(doc = "Reference dict file", required = true)
var dict: File = new File(config("reference").asString.stripSuffix(".fa").stripSuffix(".fasta") + ".dict")
var dict: File = new File(referenceFasta().toString.stripSuffix(".fa").stripSuffix(".fasta") + ".dict")
@Output(doc = "Output interval list", required = true)
var output: File = null
......
......@@ -16,12 +16,13 @@
package nl.lumc.sasc.biopet.extensions.picard
import java.io.File
import nl.lumc.sasc.biopet.core.Reference
import nl.lumc.sasc.biopet.core.config.Configurable