Commit e8609425 authored by Peter van 't Hof's avatar Peter van 't Hof Committed by GitHub

Merge branch 'develop' into fix-BIOPET-736-peter

parents 1de0aefe 050ed7a9
......@@ -90,4 +90,4 @@ class BiopetPipe(val commands: List[BiopetCommandLineFunction]) extends BiopetCo
super.freezeFieldValues()
commands.foreach(_.qSettings = qSettings)
}
}
}
\ No newline at end of file
......@@ -124,11 +124,11 @@ trait MultiSampleQScript extends SummaryQScript { qscript: QScript =>
lazy val father = {
val g: Option[String] = sampleTags.get("father").map(_.toString)
g.foreach { father =>
if (sampleId != father) Logging.addError(s"Father for $sampleId can not be itself")
if (samples.contains(father))
if (samples(father).gender == Gender.Male)
Logging.addError(s"Father of $sampleId is not a female")
else logger.warn(s"For sample '$sampleId' is father '$father' not found in config")
if (sampleId == father) Logging.addError(s"Father for $sampleId can not be itself")
if (samples.contains(father)) {
if (samples(father).gender != Gender.Male)
Logging.addError(s"Father of $sampleId is not a male")
} else logger.warn(s"For sample '$sampleId', its father '$father' not found in config")
}
g
}
......@@ -136,15 +136,19 @@ trait MultiSampleQScript extends SummaryQScript { qscript: QScript =>
lazy val mother = {
val g: Option[String] = sampleTags.get("mother").map(_.toString)
g.foreach { mother =>
if (sampleId != mother) Logging.addError(s"mother for $sampleId can not be itself")
if (samples.contains(mother))
if (samples(mother).gender == Gender.Female)
if (sampleId == mother) Logging.addError(s"mother for $sampleId can not be itself")
if (samples.contains(mother)) {
if (samples(mother).gender != Gender.Female)
Logging.addError(s"Mother of $sampleId is not a female")
else logger.warn(s"For sample '$sampleId' is mother '$mother' not found in config")
} else logger.warn(s"For sample '$sampleId', its mother '$mother' not found in config")
}
g
}
lazy val family: Option[String] = {
sampleTags.get("family").map(_.toString)
}
lazy val sampleGroups: List[String] = sampleTags.get("groups") match {
case Some(g: List[_]) => g.map(_.toString)
case Some(g: String) => List(g)
......@@ -312,4 +316,4 @@ object MultiSampleQScript {
val Male, Female, Unknown = Value
}
}
}
\ No newline at end of file
package nl.lumc.sasc.biopet.core
import java.io.PrintWriter
import nl.lumc.sasc.biopet.core.PedigreeQscript.PedMergeStrategy
import nl.lumc.sasc.biopet.utils.Logging
import org.broadinstitute.gatk.queue.QScript
import scala.io.Source
/**
* Created by Sander Bollen on 28-3-17.
*
* A multi-sample Qscript with additional Pedigree information.
* Pedigrees follow the PED standard.
* See: http://zzz.bwh.harvard.edu/plink/data.shtml#ped
*
* Pedigrees may be parsed from the sample config and/or a supplied PED file.
*/
trait PedigreeQscript extends MultiSampleQScript { qscript: QScript =>
/* Optionally parse from ped file */
def ped: Option[File] = config("ped_file", default = None)
/* The merge stategy to use when we have both a ped file and sample tag information */
def mergeStrategy: PedMergeStrategy.Value = PedMergeStrategy.Concatenate
/**
* Case class representing a PED samples
* For the PED format, see:
* http://zzz.bwh.harvard.edu/plink/data.shtml#ped
* @param familyId family id
* @param individualId individual id
* @param paternalId Optional paternal id
* @param maternalId Optional maternal id
* @param gender gender
* @param affectedPhenotype Optional boolean
* @param genotypeFields optional genotype fields
*/
case class PedSample(familyId: String,
individualId: String,
paternalId: Option[String],
maternalId: Option[String],
gender: MultiSampleQScript.Gender.Value,
affectedPhenotype: Option[Boolean],
genotypeFields: List[String] = Nil)
lazy val pedSamples: List[PedSample] = {
mergeStrategy match {
case PedMergeStrategy.Concatenate => pedSamplesFromConfig() ::: parsePedFile()
case _ => throw new NotImplementedError() // todo
}
}
/**
* Get affected samples
*
* @return List[PedSample]
*/
def getIndexSamples: List[PedSample] = {
pedSamples.filter(x => x.affectedPhenotype.contains(true))
}
/**
* Get all samples for which mother and father are defined
* @return
*/
def getChildren: List[PedSample] = {
pedSamples.filter(x => x.maternalId.isDefined && x.paternalId.isDefined)
}
/**
* Get pedSamples from sample tags in config
* May return empty list if no pedigree can be constructed
* PedSamples can only be constructed for those samples where family is defined
* Furthermore, if a father or mother is given, another sample with this id must also exist
*
* @return
*/
def pedSamplesFromConfig(): List[PedSample] = {
val withFam = samples.values.filter(_.family.isDefined)
val sampleIds = withFam.map(_.sampleId).toSet
val fathers = withFam.flatMap(_.father)
val mothers = withFam.flatMap(_.mother)
fathers.foreach { f =>
if (!sampleIds.contains(f)) {
Logging.addError(s"Father $f does not exist in samples")
}
}
mothers.foreach { m =>
if (!sampleIds.contains(m)) {
Logging.addError(s"Mother $m does not exist in samples")
}
}
withFam.map { s =>
PedSample(s.family.get, s.sampleId, s.father, s.mother, s.gender, None)
}.toList
}
/* Parse ped file to list of PedSamples */
def parsePedFile(): List[PedSample] = {
ped match {
case Some(p) =>
Source
.fromFile(p)
.getLines()
.map { x =>
parseSinglePedLine(x)
}
.toList
case _ => Nil
}
}
/* Parse a single Ped line to a PedSample */
def parseSinglePedLine(line: String): PedSample = {
val arr = line.split("\\s")
var genotypeFields: List[String] = Nil
if (arr.size < 6) {
throw new IllegalArgumentException("Ped file contains less than 6 columns")
} else if (arr.length == 6) {
genotypeFields = Nil
} else {
genotypeFields = arr.drop(6).toList
}
val gender = arr(4) match {
case "1" => MultiSampleQScript.Gender.Male
case "2" => MultiSampleQScript.Gender.Female
case _ => MultiSampleQScript.Gender.Unknown
}
val affected = arr(5) match {
case "1" => Some(false)
case "2" => Some(true)
case _ => None
}
val paternalId = arr(2) match {
case "0" => None
case otherwise => Some(otherwise)
}
val maternalId = arr(3) match {
case "0" => None
case otherwise => Some(otherwise)
}
PedSample(arr(0), arr(1), paternalId, maternalId, gender, affected, genotypeFields)
}
/* Check whether sample is a mother */
def isMother(pedSample: PedSample): Boolean = {
val motherIds = pedSamples.flatMap(_.maternalId)
motherIds.contains(pedSample.individualId)
}
/* Check whether sample is a father */
def isFather(pedSample: PedSample): Boolean = {
val fatherIds = pedSamples.flatMap(_.paternalId)
fatherIds.contains(pedSample.individualId)
}
/**
* Convenience method for checking whether current pedigree constitutes a single patient pedigree
*/
def isSingle: Boolean = {
pedSamples.size == 1 && pedSamples.head.maternalId.isEmpty && pedSamples.head.paternalId.isEmpty
}
/**
* Convenience method for checking whether current pedigree constitutes a trio pedigree
*/
def isTrio: Boolean = {
getIndexSamples.size == 1 &&
pedSamples.size == 3 &&
(getIndexSamples.head.maternalId.isDefined && pedSamples
.map(_.individualId)
.contains(getIndexSamples.head.maternalId.get)) &&
(getIndexSamples.head.paternalId.isDefined && pedSamples
.map(_.individualId)
.contains(getIndexSamples.head.paternalId.get))
}
/**
* Write list of ped samples to a PED file
*
* For the PED format see:
* http://pngu.mgh.harvard.edu/~purcell/plink/data.shtml
*/
def writeToPedFile(outFile: File): Unit = {
val writer = new PrintWriter(outFile)
pedSamples.foreach { p =>
val paternalField = p.paternalId match {
case Some(s) => s
case _ => "0"
}
val maternalField = p.maternalId match {
case Some(s) => s
case _ => "0"
}
val genderField = p.gender match {
case MultiSampleQScript.Gender.Male => "1"
case MultiSampleQScript.Gender.Female => "2"
case _ => "-9"
}
val affectedField = p.affectedPhenotype match {
case Some(b) =>
b match {
case true => "2"
case _ => "1"
}
case _ => "0"
}
val mainLine: String =
s"${p.familyId}\t${p.individualId}\t$paternalField\t$maternalField\t$genderField\t$affectedField"
val line = if (p.genotypeFields.nonEmpty) {
mainLine + "\t" + p.genotypeFields.mkString("\t")
} else mainLine
writer.write(line + "\n")
}
writer.close()
}
}
object PedigreeQscript {
object PedMergeStrategy extends Enumeration {
val Concatenate, Update, ConcatenatedAndUpdate = Value
}
}
fam01 sample1 sample2 sample3 2 0
fam01 sample2 0 0 1 0
fam01 sample3 0 0 2 0
fam02 sample4 sample5 sample6 2 2
fam02 sample5 0 0 1 1
fam02 sample6 0 0 2 1
fam02 sample4 sample5 sample6 2 2
fam02 sample5 0 0 1 1
fam02 sample6 0 0 2 1
package nl.lumc.sasc.biopet.core
import java.io.File
import java.nio.file.Paths
import nl.lumc.sasc.biopet.core.MultiSampleQScript.Gender
import nl.lumc.sasc.biopet.core.extensions.Md5sum
import nl.lumc.sasc.biopet.utils.config.Config
import nl.lumc.sasc.biopet.utils.{ConfigUtils, Logging}
import org.broadinstitute.gatk.queue.{QScript, QSettings}
import org.scalatest.Matchers
import org.scalatest.testng.TestNGSuite
import org.testng.annotations.Test
import scala.language.reflectiveCalls
import scala.collection.mutable.ListBuffer
import scala.io.Source
/**
* Created by Sander Bollen on 11-4-17.
*/
class PedigreeQScriptTest extends TestNGSuite with Matchers {
import PedigreeQScriptTest._
@Test
def testConfigPedigree(): Unit = {
val script = PedigreeQScriptTest(sample1 :: sample2 :: sample3 :: Nil)
script.init()
script.biopetScript()
script.pedSamples.size shouldBe 3
script.pedSamples.map(_.individualId).contains("sample1") shouldBe true
script.pedSamples.map(_.individualId).contains("sample2") shouldBe true
script.pedSamples.map(_.individualId).contains("sample3") shouldBe true
}
@Test
def testGenderCorrect(): Unit = {
val script = PedigreeQScriptTest(sample2 :: Nil)
script.init()
script.biopetScript()
script.pedSamples.size shouldBe 1
script.pedSamples.head.gender shouldEqual Gender.Male
val script2 = PedigreeQScriptTest(sample3 :: Nil)
script2.init()
script2.biopetScript()
script2.pedSamples.size shouldBe 1
script2.pedSamples.head.gender shouldEqual Gender.Female
}
@Test
def testIsSingle(): Unit = {
val script = PedigreeQScriptTest(sample2 :: Nil)
script.init()
script.biopetScript()
script.isSingle shouldBe true
}
@Test
def testPedParsing(): Unit = {
val script = PedigreeQScriptTest(trioPed :: Nil)
script.init()
script.biopetScript()
script.pedSamples.size shouldBe 3
script.pedSamples.map(_.individualId).contains("sample4") shouldBe true
script.pedSamples.map(_.individualId).contains("sample5") shouldBe true
script.pedSamples.map(_.individualId).contains("sample6") shouldBe true
}
@Test
def testIsMother() = {
val script = PedigreeQScriptTest(trioPed :: Nil)
script.init()
script.biopetScript()
script.isMother(script.pedSamples.filter(_.individualId == "sample6").head) shouldBe true
}
@Test
def testIsFather() = {
val script = PedigreeQScriptTest(trioPed :: Nil)
script.init()
script.biopetScript()
script.isFather(script.pedSamples.filter(_.individualId == "sample5").head) shouldBe true
}
@Test
def testIsTrio() = {
val script = PedigreeQScriptTest(trioPed :: Nil)
script.init()
script.biopetScript()
script.isTrio shouldBe true
val script2 = PedigreeQScriptTest(sample1 :: sample2 :: sample3 :: Nil)
script2.init()
script2.biopetScript()
script2.isTrio shouldBe false
}
@Test
def testConcatenation() = {
val script = PedigreeQScriptTest(sample1 :: sample2 :: sample3 :: trioPed :: Nil)
script.init()
script.biopetScript()
script.pedSamples.size shouldBe 6
script.pedSamples.map(_.individualId).contains("sample1") shouldBe true
script.pedSamples.map(_.individualId).contains("sample2") shouldBe true
script.pedSamples.map(_.individualId).contains("sample3") shouldBe true
script.pedSamples.map(_.individualId).contains("sample4") shouldBe true
script.pedSamples.map(_.individualId).contains("sample5") shouldBe true
script.pedSamples.map(_.individualId).contains("sample6") shouldBe true
}
@Test
def testWritePedFile(): Unit = {
val script = PedigreeQScriptTest(sample1 :: sample2 :: sample3 :: trioPed :: Nil)
script.init()
script.biopetScript()
val tmpFile = File.createTempFile("test", ".ped")
tmpFile.deleteOnExit()
script.writeToPedFile(tmpFile)
val expectedLines = Source.fromFile(resourcePath("/full.ped")).getLines().toList.sorted
val writtenLines = Source.fromFile(tmpFile).getLines().toList.sorted
writtenLines shouldEqual expectedLines
}
}
object PedigreeQScriptTest {
private def resourcePath(p: String): String = {
Paths.get(getClass.getResource(p).toURI).toString
}
val sample1 = Map(
"samples" ->
Map(
"sample1" ->
Map(
"tags" ->
Map("gender" -> "female",
"father" -> "sample2",
"mother" -> "sample3",
"family" -> "fam01"))))
val sample2 = Map(
"samples" ->
Map(
"sample2" ->
Map("tags" ->
Map("gender" -> "male", "family" -> "fam01"))))
val sample3 = Map(
"samples" ->
Map(
"sample3" ->
Map("tags" ->
Map("gender" -> "female", "family" -> "fam01"))))
val trioPed = Map("ped_file" -> resourcePath("/trio.ped"))
def apply(configs: List[Map[String, Any]], only: List[String] = Nil) = {
new QScript with PedigreeQscript { qscript =>
qSettings = new QSettings()
qSettings.runName = "test"
override val onlySamples = only
var buffer = new ListBuffer[String]()
override def globalConfig =
new Config(
configs
.foldLeft(Map[String, Any]()) { case (a, b) => ConfigUtils.mergeMaps(a, b) })
val parent = null
def getLastLogMessage: String = {
Logging.errors.toList.last.getMessage
}
class Sample(id: String) extends AbstractSample(id) {
class Library(id: String) extends AbstractLibrary(id) {
/** Function that add library jobs */
protected def addJobs(): Unit = {
buffer += config("test")
}
/** Must return files to store into summary */
def summaryFiles: Map[String, File] = Map()
/** Must returns stats to store into summary */
def summaryStats = Map()
}
/**
* Factory method for Library class
* @param id SampleId
* @return Sample class
*/
def makeLibrary(id: String): Library = new Library(id)
/** Function to add sample jobs */
protected def addJobs(): Unit = {
buffer += s"$sampleId"
addPerLibJobs()
add(new Md5sum(qscript))
}
/** Must return files to store into summary */
def summaryFiles: Map[String, File] = Map()
/** Must returns stats to store into summary */
def summaryStats = Map()
}
/**
* Method where the multisample jobs should be added, this will be executed only when running the -sample argument is not given.
*/
def addMultiSampleJobs(): Unit = {
add(new Md5sum(qscript))
}
/**
* Factory method for Sample class
* @param id SampleId
* @return Sample class
*/
def makeSample(id: String): Sample = new Sample(id)
/** Must return a map with used settings for this pipeline */
def summarySettings: Map[String, Any] = Map()
/** File to put in the summary for thie pipeline */
def summaryFiles: Map[String, File] = Map()
/** Name of summary output file */
def summaryFile: File = new File("./summary.json")
/** Init for pipeline */
def init(): Unit = {}
/** Pipeline itself */
def biopetScript(): Unit = {
addSamplesJobs()
addSummaryJobs()
}
}
}
}
......@@ -27,15 +27,23 @@ class BedtoolsMerge(val parent: Configurable) extends Bedtools {
@Input(doc = "Input bed file")
var input: File = _
@Argument(doc = "Distance")
var dist: Option[Int] = config("dist") //default of tool is 1
@Argument(doc = "Distance", required = false)
var dist: Option[Int] = config("dist", default = 1) //default of tool is 1
@Output(doc = "Output bed file")
var output: File = _
@Argument(doc = "operation to additional columns", required = false)
var operation: Option[String] = None
@Argument(doc = "Additional columns to operate upon", required = false)
var additionalColumns: List[Int] = Nil
def cmdLine = {
required(executable) + required("merge") +
required("-i", input) + optional("-d", dist) +
(if (additionalColumns.nonEmpty) required("-c", additionalColumns.mkString(",")) else "") +
optional("-o", operation) +
" > " + required(output)
}
......
package nl.lumc.sasc.biopet.extensions.stouffbed
import java.io.File
import nl.lumc.sasc.biopet.core.{BiopetCommandLineFunction, Version}
import org.broadinstitute.gatk.utils.commandline.Input
/**
* Created by Sander Bollen on 24-4-17.
*/
abstract class Stouffbed extends BiopetCommandLineFunction with Version {
executable = config("exe", namespace = "stouffbed", default = "stouffbed")
@Input
var inputFiles: List[File] = Nil
def versionCommand = executable + " --version"
def versionRegex = """.+, version (.*)""".r
}
package nl.lumc.sasc.biopet.extensions.stouffbed
import java.io.File
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.Output
/**
* Created by Sander Bollen on 24-4-17.
*/
class StouffbedHorizontal(val parent: Configurable) extends Stouffbed {
@Output
var output: File = _
def cmdLine: String = {
executable +
required("horizontal") +
repeat("-i", inputs) +
required("-o", output)
}
}
package nl.lumc.sasc.biopet.extensions.stouffbed
import java.io.File
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.Output
/**
* Created by Sander Bollen on 24-4-17.
*/
class StouffbedVertical(val parent: Configurable) extends Stouffbed {
@Output
var output: File = _
var windowSize: Int = _
def cmdLine: String = {
executable +
required("vertical") +
repeat("-i", inputs) +
required("-o", output) +
required("-w", windowSize)
}
}
package nl.lumc.sasc.biopet.extensions.wisecondor
import java.io.File
import nl.lumc.sasc.biopet.core.{BiopetCommandLineFunction, Reference, Version}
import org.broadinstitute.gatk.utils.commandline.{Input, Output}