Commit abd0b96b authored by Peter van 't Hof's avatar Peter van 't Hof Committed by GitHub

Merge pull request #72 from biopet/tarmac-testing

Tarmac testing
parents e00f888e 071c7c37
......@@ -111,10 +111,11 @@ trait MultiSampleQScript extends SummaryQScript { qscript: QScript =>
lazy val father = {
val g: Option[String] = sampleTags.get("father").map(_.toString)
g.foreach { father =>
if (sampleId != father) Logging.addError(s"Father for $sampleId can not be itself")
if (samples.contains(father)) if (samples(father).gender != Gender.Male)
Logging.addError(s"Father of $sampleId is not a male")
else logger.warn(s"For sample '$sampleId' is father '$father' not found in config")
if (sampleId == father) Logging.addError(s"Father for $sampleId can not be itself")
if (samples.contains(father)) {
if (samples(father).gender != Gender.Male)
Logging.addError(s"Father of $sampleId is not a male")
} else logger.warn(s"For sample '$sampleId' is father '$father' not found in config")
}
g
}
......@@ -122,10 +123,11 @@ trait MultiSampleQScript extends SummaryQScript { qscript: QScript =>
lazy val mother = {
val g: Option[String] = sampleTags.get("mother").map(_.toString)
g.foreach { mother =>
if (sampleId != mother) Logging.addError(s"mother for $sampleId can not be itself")
if (samples.contains(mother)) if (samples(mother).gender != Gender.Female)
Logging.addError(s"Mother of $sampleId is not a female")
else logger.warn(s"For sample '$sampleId' is mother '$mother' not found in config")
if (sampleId == mother) Logging.addError(s"mother for $sampleId can not be itself")
if (samples.contains(mother)) {
if (samples(mother).gender != Gender.Female)
Logging.addError(s"Mother of $sampleId is not a female")
} else logger.warn(s"For sample '$sampleId' is mother '$mother' not found in config")
}
g
}
......
......@@ -13,14 +13,14 @@ import scala.io.Source
*
* A multi-sample Qscript with additional Pedigree information.
* Pedigrees follow the PED standard.
* See: http://pngu.mgh.harvard.edu/~purcell/plink/data.shtml for the format
* See: http://zzz.bwh.harvard.edu/plink/data.shtml#ped
*
* Pedigrees may be parsed from the sample config and/or a supplied PED file.
*/
trait PedigreeQscript extends MultiSampleQScript { qscript: QScript =>
/* Optionally parse from ped file */
def ped: Option[File] = None
def ped: Option[File] = config("ped_file", default = None)
/* The merge stategy to use when we have both a ped file and sample tag information */
def mergeStrategy: PedMergeStrategy.Value = PedMergeStrategy.Concatenate
......@@ -28,14 +28,14 @@ trait PedigreeQscript extends MultiSampleQScript { qscript: QScript =>
/**
* Case class representing a PED samples
* For the PED format, see:
* http://pngu.mgh.harvard.edu/~purcell/plink/data.shtml
* http://zzz.bwh.harvard.edu/plink/data.shtml#ped
* @param familyId family id
* @param individualId individual id
* @param paternalId Optional paternal id
* @param maternalId Optional maternal id
* @param gender gender
* @param affectedPhenotype Optional boolean
* @param genotypeFields optional genotype fileds
* @param genotypeFields optional genotype fields
*/
case class PedSample(familyId: String, individualId: String,
paternalId: Option[String],
......@@ -57,7 +57,7 @@ trait PedigreeQscript extends MultiSampleQScript { qscript: QScript =>
* @return List[PedSample]
*/
def getIndexSamples: List[PedSample] = {
pedSamples.filter(x => x.affectedPhenotype)
pedSamples.filter(x => x.affectedPhenotype.contains(true))
}
/**
......@@ -186,8 +186,11 @@ trait PedigreeQscript extends MultiSampleQScript { qscript: QScript =>
}
case _ => "0"
}
val line: String = s"${p.familyId}\t${p.individualId}\t$paternalField\t$maternalField\t$genderField\t$affectedField\t" +
p.genotypeFields.mkString("\t")
val mainLine: String = s"${p.familyId}\t${p.individualId}\t$paternalField\t$maternalField\t$genderField\t$affectedField"
val line = if (p.genotypeFields.nonEmpty) {
mainLine + "\t" + p.genotypeFields.mkString("\t")
} else mainLine
writer.write(line + "\n")
}
writer.close()
......
fam01 sample1 sample2 sample3 2 0
fam01 sample2 0 0 1 0
fam01 sample3 0 0 2 0
fam02 sample4 sample5 sample6 2 2
fam02 sample5 0 0 1 1
fam02 sample6 0 0 2 1
fam02 sample4 sample5 sample6 2 2
fam02 sample5 0 0 1 1
fam02 sample6 0 0 2 1
package nl.lumc.sasc.biopet.core
import java.io.File
import java.nio.file.Paths
import nl.lumc.sasc.biopet.core.MultiSampleQScript.Gender
import nl.lumc.sasc.biopet.core.extensions.Md5sum
import nl.lumc.sasc.biopet.utils.config.Config
import nl.lumc.sasc.biopet.utils.{ ConfigUtils, Logging }
import org.broadinstitute.gatk.queue.{ QScript, QSettings }
import org.scalatest.Matchers
import org.scalatest.testng.TestNGSuite
import org.testng.annotations.Test
import scala.language.reflectiveCalls
import scala.collection.mutable.ListBuffer
import scala.io.Source
/**
* Created by Sander Bollen on 11-4-17.
*/
class PedigreeQScriptTest extends TestNGSuite with Matchers {
import PedigreeQScriptTest._
@Test
def testConfigPedigree(): Unit = {
val script = PedigreeQScriptTest(sample1 :: sample2 :: sample3 :: Nil)
script.init()
script.biopetScript()
script.pedSamples.size shouldBe 3
script.pedSamples.map(_.individualId).contains("sample1") shouldBe true
script.pedSamples.map(_.individualId).contains("sample2") shouldBe true
script.pedSamples.map(_.individualId).contains("sample3") shouldBe true
}
@Test
def testGenderCorrect(): Unit = {
val script = PedigreeQScriptTest(sample2 :: Nil)
script.init()
script.biopetScript()
script.pedSamples.size shouldBe 1
script.pedSamples.head.gender shouldEqual Gender.Male
val script2 = PedigreeQScriptTest(sample3 :: Nil)
script2.init()
script2.biopetScript()
script2.pedSamples.size shouldBe 1
script2.pedSamples.head.gender shouldEqual Gender.Female
}
@Test
def testIsSingle(): Unit = {
val script = PedigreeQScriptTest(sample2 :: Nil)
script.init()
script.biopetScript()
script.isSingle shouldBe true
}
@Test
def testPedParsing(): Unit = {
val script = PedigreeQScriptTest(trioPed :: Nil)
script.init()
script.biopetScript()
script.pedSamples.size shouldBe 3
script.pedSamples.map(_.individualId).contains("sample4") shouldBe true
script.pedSamples.map(_.individualId).contains("sample5") shouldBe true
script.pedSamples.map(_.individualId).contains("sample6") shouldBe true
}
@Test
def testIsMother() = {
val script = PedigreeQScriptTest(trioPed :: Nil)
script.init()
script.biopetScript()
script.isMother(script.pedSamples.filter(_.individualId == "sample6").head) shouldBe true
}
@Test
def testIsFather() = {
val script = PedigreeQScriptTest(trioPed :: Nil)
script.init()
script.biopetScript()
script.isFather(script.pedSamples.filter(_.individualId == "sample5").head) shouldBe true
}
@Test
def testIsTrio() = {
val script = PedigreeQScriptTest(trioPed :: Nil)
script.init()
script.biopetScript()
script.isTrio shouldBe true
val script2 = PedigreeQScriptTest(sample1 :: sample2 :: sample3 :: Nil)
script2.init()
script2.biopetScript()
script2.isTrio shouldBe false
}
@Test
def testConcatenation() = {
val script = PedigreeQScriptTest(sample1 :: sample2 :: sample3 :: trioPed :: Nil)
script.init()
script.biopetScript()
script.pedSamples.size shouldBe 6
script.pedSamples.map(_.individualId).contains("sample1") shouldBe true
script.pedSamples.map(_.individualId).contains("sample2") shouldBe true
script.pedSamples.map(_.individualId).contains("sample3") shouldBe true
script.pedSamples.map(_.individualId).contains("sample4") shouldBe true
script.pedSamples.map(_.individualId).contains("sample5") shouldBe true
script.pedSamples.map(_.individualId).contains("sample6") shouldBe true
}
@Test
def testWritePedFile(): Unit = {
val script = PedigreeQScriptTest(sample1 :: sample2 :: sample3 :: trioPed :: Nil)
script.init()
script.biopetScript()
val tmpFile = File.createTempFile("test", ".ped")
tmpFile.deleteOnExit()
script.writeToPedFile(tmpFile)
val expectedLines = Source.fromFile(resourcePath("/full.ped")).getLines().toList.sorted
val writtenLines = Source.fromFile(tmpFile).getLines().toList.sorted
writtenLines shouldEqual expectedLines
}
}
object PedigreeQScriptTest {
private def resourcePath(p: String): String = {
Paths.get(getClass.getResource(p).toURI).toString
}
val sample1 = Map("samples" ->
Map("sample1" ->
Map("tags" ->
Map("gender" -> "female",
"father" -> "sample2",
"mother" -> "sample3",
"family" -> "fam01"
)
)
)
)
val sample2 = Map("samples" ->
Map("sample2" ->
Map("tags" ->
Map("gender" -> "male",
"family" -> "fam01"
)
)
)
)
val sample3 = Map("samples" ->
Map("sample3" ->
Map("tags" ->
Map("gender" -> "female",
"family" -> "fam01"
)
)
)
)
val trioPed = Map("ped_file" -> resourcePath("/trio.ped"))
def apply(configs: List[Map[String, Any]], only: List[String] = Nil) = {
new QScript with PedigreeQscript { qscript =>
qSettings = new QSettings()
qSettings.runName = "test"
override val onlySamples = only
var buffer = new ListBuffer[String]()
override def globalConfig = new Config(configs
.foldLeft(Map[String, Any]()) { case (a, b) => ConfigUtils.mergeMaps(a, b) })
val parent = null
def getLastLogMessage: String = {
Logging.errors.toList.last.getMessage
}
class Sample(id: String) extends AbstractSample(id) {
class Library(id: String) extends AbstractLibrary(id) {
/** Function that add library jobs */
protected def addJobs(): Unit = {
buffer += config("test")
}
/** Must return files to store into summary */
def summaryFiles: Map[String, File] = Map()
/** Must returns stats to store into summary */
def summaryStats = Map()
}
/**
* Factory method for Library class
* @param id SampleId
* @return Sample class
*/
def makeLibrary(id: String): Library = new Library(id)
/** Function to add sample jobs */
protected def addJobs(): Unit = {
buffer += s"$sampleId"
addPerLibJobs()
add(new Md5sum(qscript))
}
/** Must return files to store into summary */
def summaryFiles: Map[String, File] = Map()
/** Must returns stats to store into summary */
def summaryStats = Map()
}
/**
* Method where the multisample jobs should be added, this will be executed only when running the -sample argument is not given.
*/
def addMultiSampleJobs(): Unit = {
add(new Md5sum(qscript))
}
/**
* Factory method for Sample class
* @param id SampleId
* @return Sample class
*/
def makeSample(id: String): Sample = new Sample(id)
/** Must return a map with used settings for this pipeline */
def summarySettings: Map[String, Any] = Map()
/** File to put in the summary for thie pipeline */
def summaryFiles: Map[String, File] = Map()
/** Name of summary output file */
def summaryFile: File = new File("./summary.json")
/** Init for pipeline */
def init(): Unit = {
}
/** Pipeline itself */
def biopetScript(): Unit = {
addSamplesJobs()
addSummaryJobs()
}
}
}
}
......@@ -47,17 +47,5 @@
<artifactId>BiopetToolsExtensions</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.testng</groupId>
<artifactId>testng</artifactId>
<version>6.8</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.scalatest</groupId>
<artifactId>scalatest_2.10</artifactId>
<version>2.2.1</version>
<scope>test</scope>
</dependency>
</dependencies>
</project>
......@@ -21,7 +21,7 @@ import scalaz.{ -\/, \/, \/- }
class Tarmac(val parent: Configurable) extends QScript with PedigreeQscript with SummaryQScript with Reference {
qscript =>
private val targets: File = config("targets")
lazy val targets: File = config("targets")
def this() = this(null)
/* Fixed values for xhmm count file */
......@@ -233,6 +233,9 @@ class Tarmac(val parent: Configurable) extends QScript with PedigreeQscript with
/** Must return files to store into summary */
def summaryFiles: Map[String, File] = Map()
/** Libs do not exist for this pipeline */
override def libIds: Set[String] = Set()
/** Must returns stats to store into summary */
def summaryStats: Any = Map()
}
......
fam02 sample4 sample5 sample6 2 2
fam02 sample5 0 0 1 1
fam02 sample6 0 0 2 1
package nl.lumc.sasc.biopet.pipelines.tarmac
import nl.lumc.sasc.biopet.extensions.Ln
import nl.lumc.sasc.biopet.extensions.gatk.DepthOfCoverage
import nl.lumc.sasc.biopet.extensions.wisecondor.{ WisecondorCount, WisecondorNewRef }
import nl.lumc.sasc.biopet.extensions.xhmm.XhmmMergeGatkDepths
import nl.lumc.sasc.biopet.utils.ConfigUtils
import nl.lumc.sasc.biopet.utils.config.Config
import org.broadinstitute.gatk.queue.QSettings
import org.scalatest.Matchers
import org.scalatest.testng.TestNGSuite
import org.testng.annotations.Test
/**
* Created by Sander Bollen on 13-4-17.
*/
class TarmacTest extends TestNGSuite with Matchers {
import TarmacTest._
def initPipeline(map: Map[String, Any]): Tarmac = {
new Tarmac {
override def configNamespace = "tarmac"
override def globalConfig = new Config(map)
qSettings = new QSettings
qSettings.runName = "test"
}
}
@Test
def testSingleSampleLeft(): Unit = {
val script = initPipeline(sample2)
script.init()
script.samples.size shouldBe 1
script.samples.head._2.outputXhmmCountFile.isLeft shouldBe true
script.samples.head._2.outputWisecondorCountFile.isLeft shouldBe true
}
@Test
def testCountGeneration(): Unit = {
val script = initPipeline(ConfigUtils.mergeMaps(samplesWithBam, settings))
script.init()
script.addSamplesJobs()
script.samples.size shouldBe 7
script.functions.count(_.isInstanceOf[WisecondorCount]) shouldBe 7
script.functions.count(_.isInstanceOf[DepthOfCoverage]) shouldBe 7
val script2 = initPipeline(ConfigUtils.mergeMaps(samplesWithCount, settings))
script2.init()
script2.addSamplesJobs()
script2.functions.count(_.isInstanceOf[WisecondorCount]) shouldBe 0
script2.functions.count(_.isInstanceOf[DepthOfCoverage]) shouldBe 0
script2.functions.count(_.isInstanceOf[Ln]) shouldBe 14
}
@Test
def testReferenceSamples(): Unit = {
val script = initPipeline(ConfigUtils.mergeMaps(samplesWithBam, settings))
script.init()
script.biopetScript()
script.
getReferenceSamplesForSample("sample1").
getOrElse(Nil).toList.sorted shouldEqual List("sample4", "sample5").sorted
script.
getReferenceSamplesForSample("sample2").
getOrElse(Nil).toList.sorted shouldEqual List("sample6", "sample7").sorted
script.
getReferenceSamplesForSample("sample3").
getOrElse(Nil).toList.sorted shouldEqual List("sample1", "sample4", "sample5").sorted
script.
getReferenceSamplesForSample("sample4").
getOrElse(Nil).toList.sorted shouldEqual List("sample1", "sample3", "sample5").sorted
script.
getReferenceSamplesForSample("sample5").
getOrElse(Nil).toList.sorted shouldEqual List("sample1", "sample3", "sample4").sorted
script.
getReferenceSamplesForSample("sample6").
getOrElse(Nil).toList.sorted shouldEqual List("sample2", "sample7").sorted
script.
getReferenceSamplesForSample("sample7").
getOrElse(Nil).toList.sorted shouldEqual List("sample2", "sample6").sorted
}
@Test
def testReferenceJobs(): Unit = {
val script = initPipeline(ConfigUtils.mergeMaps(samplesWithBam, settings))
script.init()
script.biopetScript()
script.functions.count(_.isInstanceOf[XhmmMergeGatkDepths]) shouldBe 7
script.functions.count(_.isInstanceOf[WisecondorNewRef]) shouldBe 7
}
}
object TarmacTest {
val sample2 = Map("samples" ->
Map("sample2" ->
Map("tags" ->
Map("gender" -> "male",
"family" -> "fam01"
)
)
)
)
val samplesWithBam = Map(
"samples" -> Map(
"sample1" -> Map(
"tags" -> Map(
"gender" -> "female",
"father" -> "sample2",
"mother" -> "sample3",
"family" -> "fam01"
),
"bam" -> "sample1.bam"
),
"sample2" -> Map(
"tags" -> Map(
"gender" -> "male",
"family" -> "fam01"
),
"bam" -> "sample2.bam"
),
"sample3" -> Map(
"tags" -> Map(
"gender" -> "female",
"family" -> "fam01"
),
"bam" -> "sample3.bam"
),
"sample4" -> Map(
"tags" -> Map(
"gender" -> "female",
"family" -> "fam02"
),
"bam" -> "sample4.bam"
),
"sample5" -> Map(
"tags" -> Map(
"gender" -> "female",
"family" -> "fam02"
),
"bam" -> "sample5.bam"
),
"sample6" -> Map(
"tags" -> Map(
"gender" -> "male",
"family" -> "fam02"
),
"bam" -> "sample6.bam"
),
"sample7" -> Map(
"tags" -> Map(
"gender" -> "male",
"family" -> "fam02"
),
"bam" -> "sample7.bam"
)
)
)
val samplesWithCount = Map(
"samples" -> Map(
"sample1" -> Map(
"tags" -> Map(
"gender" -> "female",
"father" -> "sample2",
"mother" -> "sample3",
"family" -> "fam01"
),
"xhmm_count_file" -> "sample1.xhmm",
"wisecondor_count_file" -> "sample1.bed"
),
"sample2" -> Map(
"tags" -> Map(
"gender" -> "male",
"family" -> "fam01"
),
"xhmm_count_file" -> "sample2.xhmm",
"wisecondor_count_file" -> "sample2.bed"
),
"sample3" -> Map(
"tags" -> Map(
"gender" -> "female",
"family" -> "fam01"
),
"xhmm_count_file" -> "sample3.xhmm",
"wisecondor_count_file" -> "sample3.bed"
),
"sample4" -> Map(
"tags" -> Map(
"gender" -> "female",
"family" -> "fam02"
),
"xhmm_count_file" -> "sample4.xhmm",
"wisecondor_count_file" -> "sample4.bed"
),
"sample5" -> Map(
"tags" -> Map(
"gender" -> "female",
"family" -> "fam02"
),
"xhmm_count_file" -> "sample5.xhmm",
"wisecondor_count_file" -> "sample5.bed"
),
"sample6" -> Map(
"tags" -> Map(
"gender" -> "male",
"family" -> "fam02"
),
"xhmm_count_file" -> "sample6.xhmm",
"wisecondor_count_file" -> "sample6.bed"
),
"sample7" -> Map(
"tags" -> Map(
"gender" -> "male",
"family" -> "fam02"
),
"xhmm_count_file" -> "sample7.xhmm",
"wisecondor_count_file" -> "sample7.bed"
)
)
)
val settings = Map(
"gatk_jar" -> "gatk.jar",
"discover_params" -> "discover_params",
"tarmac" -> Map(
"targets" -> "targets.bed"
)
)
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment