Commit 0beb17fd authored by Wai Yi Leung's avatar Wai Yi Leung
Browse files

Merge branch 'feature-sample_tags' into 'develop'

Added gender and groups to samples

Fix for #131 

See merge request !296
parents 488594e1 5edadca1
......@@ -17,10 +17,10 @@ package nl.lumc.sasc.biopet.core
import java.io.File
import nl.lumc.sasc.biopet.core.MultiSampleQScript.Gender
import nl.lumc.sasc.biopet.core.summary.{ Summarizable, SummaryQScript }
import nl.lumc.sasc.biopet.utils.{ Logging, ConfigUtils }
import org.broadinstitute.gatk.queue.QScript
import org.broadinstitute.gatk.utils.commandline.Argument
/** This trait creates a structured way of use multisample pipelines */
trait MultiSampleQScript extends SummaryQScript { qscript: QScript =>
......@@ -31,7 +31,7 @@ trait MultiSampleQScript extends SummaryQScript { qscript: QScript =>
require(globalConfig.map.contains("samples"), "No Samples found in config")
/** Sample class with basic functions build in */
abstract class AbstractSample(val sampleId: String) extends Summarizable {
abstract class AbstractSample(val sampleId: String) extends Summarizable { sample =>
/** Overrules config of qscript with default sample */
val config = new ConfigFunctions(defaultSample = sampleId)
......@@ -39,7 +39,7 @@ trait MultiSampleQScript extends SummaryQScript { qscript: QScript =>
def summarySettings: Map[String, Any] = Map()
/** Library class with basic functions build in */
abstract class AbstractLibrary(val libId: String) extends Summarizable {
abstract class AbstractLibrary(val libId: String) extends Summarizable { lib =>
/** Overrules config of qscript with default sample and default library */
val config = new ConfigFunctions(defaultSample = sampleId, defaultLibrary = libId)
......@@ -64,11 +64,22 @@ trait MultiSampleQScript extends SummaryQScript { qscript: QScript =>
}
/** Creates a library file with given suffix */
def createFile(suffix: String): File = new File(libDir, sampleId + "-" + libId + suffix)
def createFile(suffix: String): File = new File(libDir, s"$sampleId-$libId.$suffix")
/** Returns library directory */
def libDir = new File(sampleDir, "lib_" + libId)
lazy val libTags: Map[String, Any] =
config("tags", default = Map(), freeVar = false, submodule = libId, path = List("samples", sampleId, "libraries"))
def sampleId = sample.sampleId
lazy val libGroups: List[String] = libTags.get("groups") match {
case Some(g: List[_]) => g.map(_.toString)
case Some(g: String) => List(g)
case _ => Nil
}
/** Function that add library jobs */
protected def addJobs()
}
......@@ -79,6 +90,49 @@ trait MultiSampleQScript extends SummaryQScript { qscript: QScript =>
/** Stores all libraries */
val libraries: Map[String, Library] = libIds.map(id => id -> makeLibrary(id)).toMap
lazy val sampleTags: Map[String, Any] =
config("tags", default = Map(), freeVar = false, submodule = sampleId, path = List("samples"))
lazy val gender = {
val g: Option[String] = sampleTags.get("gender").map(_.toString)
g.map(_.toLowerCase) match {
case Some("male") => Gender.Male
case Some("female") => Gender.Female
case Some(s) =>
logger.warn(s"Could not convert '$g' to a gender")
Gender.Unknown
case _ => Gender.Unknown
}
}
lazy val father = {
val g: Option[String] = sampleTags.get("father").map(_.toString)
g.foreach { father =>
if (sampleId != father) Logging.addError(s"Father for $sampleId can not be itself")
if (samples.contains(father)) if (samples(father).gender == Gender.Male)
Logging.addError(s"Father of $sampleId is not a female")
else logger.warn(s"For sample '$sampleId' is father '$father' not found in config")
}
g
}
lazy val mother = {
val g: Option[String] = sampleTags.get("mother").map(_.toString)
g.foreach { mother =>
if (sampleId != mother) Logging.addError(s"mother for $sampleId can not be itself")
if (samples.contains(mother)) if (samples(mother).gender == Gender.Female)
Logging.addError(s"Mother of $sampleId is not a female")
else logger.warn(s"For sample '$sampleId' is mother '$mother' not found in config")
}
g
}
lazy val sampleGroups: List[String] = sampleTags.get("groups") match {
case Some(g: List[_]) => g.map(_.toString)
case Some(g: String) => List(g)
case _ => Nil
}
/**
* Factory method for Library class
* @param id SampleId
......@@ -117,7 +171,7 @@ trait MultiSampleQScript extends SummaryQScript { qscript: QScript =>
}
/** Creates a sample file with given suffix */
def createFile(suffix: String) = new File(sampleDir, sampleId + suffix)
def createFile(suffix: String) = new File(sampleDir, s"$sampleId.$suffix")
/** Returns sample directory */
def sampleDir = new File(outputDir, "samples" + File.separator + sampleId)
......@@ -180,3 +234,10 @@ trait MultiSampleQScript extends SummaryQScript { qscript: QScript =>
sample ::: lib ::: super.configFullPath
}
}
object MultiSampleQScript {
object Gender extends Enumeration {
val Male, Female, Unknown = Value
}
}
\ No newline at end of file
package nl.lumc.sasc.biopet.core
import java.io.File
import nl.lumc.sasc.biopet.core.MultiSampleQScript.Gender
import nl.lumc.sasc.biopet.core.extensions.Md5sum
import nl.lumc.sasc.biopet.utils.ConfigUtils
import nl.lumc.sasc.biopet.utils.config.Config
import org.broadinstitute.gatk.queue.QScript
import org.scalatest.Matchers
import org.scalatest.testng.TestNGSuite
import org.testng.annotations.Test
import scala.collection.mutable.ListBuffer
/**
* Created by pjvan_thof on 12/29/15.
*/
class MultiSampleQScriptTest extends TestNGSuite with Matchers {
import MultiSampleQScriptTest._
@Test
def testDefault: Unit = {
val script = MultiSampleQScriptTest(sample1 :: sample2 :: sample3 :: Nil)
script.outputDir = new File("./").getAbsoluteFile
script.init()
script.biopetScript()
script.functions.size shouldBe 5
script.samples.foreach {
case (sampleId, sample) =>
sample.gender shouldBe Gender.Unknown
sample.father shouldBe None
sample.mother shouldBe None
sample.summaryFiles shouldBe Map()
sample.summaryStats shouldBe Map()
sample.summarySettings shouldBe Map()
sample.sampleDir shouldBe new File(script.outputDir, "samples" + File.separator + sampleId)
sample.createFile("bla.txt") shouldBe new File(sample.sampleDir, s"$sampleId.bla.txt")
sample.libraries.foreach {
case (libId, library) =>
library.libDir shouldBe new File(sample.sampleDir, s"lib_$libId")
library.createFile("bla.txt") shouldBe new File(library.libDir, s"$sampleId-$libId.bla.txt")
library.summaryFiles shouldBe Map()
library.summaryStats shouldBe Map()
library.summarySettings shouldBe Map()
}
}
}
@Test
def testTrio: Unit = {
val script = MultiSampleQScriptTest(child :: father :: mother :: Nil)
script.init()
script.biopetScript()
script.functions.size shouldBe 5
script.samples("child").gender shouldBe Gender.Male
script.samples("father").gender shouldBe Gender.Male
script.samples("mother").gender shouldBe Gender.Female
script.samples("child").father shouldBe Some("father")
script.samples("child").mother shouldBe Some("mother")
}
@Test
def testGroups: Unit = {
val script = MultiSampleQScriptTest(sample1 :: sample2 :: sample3 :: Nil)
script.init()
script.biopetScript()
script.functions.size shouldBe 5
script.samples("sample1").sampleGroups shouldBe List("1")
script.samples("sample1").libraries("lib1").libGroups should not be List("1")
script.samples("sample2").sampleGroups shouldBe List("2")
script.samples("sample2").libraries("lib1").libGroups shouldBe List("3")
script.samples("sample3").sampleGroups shouldBe Nil
}
@Test
def testOnlySamples: Unit = {
val script = MultiSampleQScriptTest(sample1 :: sample2 :: sample3 :: Nil, List("sample1"))
script.init()
script.biopetScript()
script.functions.size shouldBe 1
}
}
object MultiSampleQScriptTest {
val sample1 = Map("samples" -> Map("sample1" -> Map(
"tags" -> Map(
"gender" -> "blablablablabla",
"groups" -> List("1")
),
"libraries" -> Map(
"lib1" -> Map("test" -> "1-1")
)))
)
val sample2 = Map("samples" -> Map("sample2" -> Map(
"tags" -> Map(
"groups" -> List("2")
),
"libraries" -> Map(
"lib1" -> Map("test" -> "2-1", "tags" -> Map(
"groups" -> List("3")
)),
"lib2" -> Map("test" -> "2-2")
))))
val sample3 = Map("samples" -> Map("sample3" -> Map("libraries" -> Map(
"lib1" -> Map("test" -> "3-1"),
"lib2" -> Map("test" -> "3-2"),
"lib3" -> Map("test" -> "3-3")
))))
val child = Map("samples" -> Map("child" -> Map("tags" -> Map(
"gender" -> "male", "father" -> "father", "mother" -> "mother"))))
val father = Map("samples" -> Map("father" -> Map("tags" -> Map("gender" -> "male"))))
val mother = Map("samples" -> Map("mother" -> Map("tags" -> Map("gender" -> "female"))))
def apply(configs: List[Map[String, Any]], only: List[String] = Nil) = {
new QScript with MultiSampleQScript { qscript =>
override val onlySamples = only
var buffer = new ListBuffer[String]()
override def globalConfig = new Config(configs
.foldLeft(Map[String, Any]()) { case (a, b) => ConfigUtils.mergeMaps(a, b) })
val root = null
class Sample(id: String) extends AbstractSample(id) {
class Library(id: String) extends AbstractLibrary(id) {
/** Function that add library jobs */
protected def addJobs(): Unit = {
buffer += config("test")
}
/** Must return files to store into summary */
def summaryFiles: Map[String, File] = Map()
/** Must returns stats to store into summary */
def summaryStats = Map()
}
/**
* Factory method for Library class
* @param id SampleId
* @return Sample class
*/
def makeLibrary(id: String): Library = new Library(id)
/** Function to add sample jobs */
protected def addJobs(): Unit = {
buffer += s"$sampleId"
addPerLibJobs()
add(new Md5sum(qscript))
}
/** Must return files to store into summary */
def summaryFiles: Map[String, File] = Map()
/** Must returns stats to store into summary */
def summaryStats = Map()
}
/**
* Method where the multisample jobs should be added, this will be executed only when running the -sample argument is not given.
*/
def addMultiSampleJobs(): Unit = {
add(new Md5sum(qscript))
}
/**
* Factory method for Sample class
* @param id SampleId
* @return Sample class
*/
def makeSample(id: String): Sample = new Sample(id)
/** Must return a map with used settings for this pipeline */
def summarySettings: Map[String, Any] = Map()
/** File to put in the summary for thie pipeline */
def summaryFiles: Map[String, File] = Map()
/** Name of summary output file */
def summaryFile: File = null
/** Init for pipeline */
def init(): Unit = {
}
/** Pipeline itself */
def biopetScript(): Unit = {
addSamplesJobs()
addSummaryJobs()
}
}
}
}
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment