Commit 2b6ed881 authored by Sander Bollen's avatar Sander Bollen
Browse files

add recessive calls

parent 3c1ebbd6
......@@ -61,6 +61,14 @@ trait PedigreeQscript extends MultiSampleQScript { qscript: QScript =>
pedSamples.filter(x => x.affectedPhenotype.contains(true))
}
/**
* Get all samples for which mother and father are defined
* @return
*/
def getChildren: List[PedSample] = {
pedSamples.filter(x => x.maternalId.isDefined && x.paternalId.isDefined)
}
/**
* Get pedSamples from sample tags in config
* May return empty list if no pedigree can be constructed
......
#!/usr/bin/env python3
#
# Biopet is built on top of GATK Queue for building bioinformatic
# pipelines. It is mainly intended to support LUMC SHARK cluster
# which is running SGE. But other types of HPC that are supported by
# GATK Queue (such as PBS) should also be able to execute Biopet tools and
# pipelines.
#
# Copyright 2014-2017 Sequencing Analysis Support Core - Leiden University Medical Center
#
# Contact us at: sasc@lumc.nl
#
# A dual licensing mode is applied. The source code within this project is
# freely available for non-commercial use under an AGPL license;
# For commercial users or users who do not want to follow the AGPL
# license, please contact us to obtain a separate license.
#
import argparse
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("--input")
parser.add_argument("--db", action="append", default=[])
args = parser.parse_args()
dbs = []
for x in args.db:
d = {}
with open(x) as db_handle:
for line in db_handle:
reg = "\t".join(line.split("\t")[:3])
d[reg] = True
dbs.append(d)
with open(args.input) as inhandle:
for line in inhandle:
reg = "\t".join(line.split("\t")[:3])
if all([reg in x for x in dbs]):
print(line.strip())
......@@ -18,7 +18,12 @@ import nl.lumc.sasc.biopet.extensions.xhmm.{
XhmmPca
}
import nl.lumc.sasc.biopet.extensions.{Bgzip, Ln, Tabix}
import nl.lumc.sasc.biopet.pipelines.tarmac.scripts.{BedThreshold, SampleFromMatrix, TarmacPlot}
import nl.lumc.sasc.biopet.pipelines.tarmac.scripts.{
BedThreshold,
FindAllCommon,
SampleFromMatrix,
TarmacPlot
}
import nl.lumc.sasc.biopet.utils.Logging
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.queue.QScript
......@@ -140,6 +145,41 @@ class Tarmac(val parent: Configurable)
sample -> horizontal
}
val recessiveJobs = zScoreMergeJobs
.filter(x => getChildren.map(_.individualId).contains(x._1.sampleId))
.flatMap {
case (sample, job) =>
val parents = findParentHorizontals(sample, zScoreMergeJobs)
val parentsAndKid = job :: parents
val syncJobs = parentsAndKid.map { x =>
val syncer = new FindAllCommon(this)
val remainder = (parentsAndKid.toSet - x).toList
syncer.inputFile = x.output
syncer.databases = remainder.map(_.output)
syncer.output =
Some(swapExt(x.output.getParentFile, x.output, ".bed", ".recessive.bed"))
syncer
}
val verticalsAndThresholds = syncJobs map { s =>
stouffWindowSizes map { size =>
val windowDir = new File(sample.sampleDir, s"window_$size")
val vertical = new StouffbedVertical(this)
vertical.inputFiles = List(s.output.get)
vertical.output =
new File(windowDir, s"${sample.sampleId}.window_$size.recessive.z.bed")
val threshold = new BedThreshold(this)
threshold.input = vertical.output
threshold.threshold = size
threshold.output =
Some(new File(windowDir, s"${sample.sampleId}.recessive.treshold.bed"))
vertical :: threshold :: Nil
}
}
val verticalAndThresholdJobs: List[BiopetCommandLineFunction] =
verticalsAndThresholds.flatten.flatten
verticalAndThresholdJobs ::: syncJobs
}
val windowStouffJobs = zScoreMergeJobs map {
case (sample, horizontal) =>
val subMap = stouffWindowSizes map { size =>
......@@ -181,6 +221,7 @@ class Tarmac(val parent: Configurable)
addAll(zScoreMergeJobs.values)
addAll(windowStouffJobs.values.flatMap(_.values))
addAll(thresholdJobs.values.flatMap(_.values))
addAll(recessiveJobs)
val xhmmZGzip = xhmmZJobs map {
case (sample, jobAndRefFile) =>
......@@ -233,6 +274,20 @@ class Tarmac(val parent: Configurable)
}
}
def findParentHorizontals(
sample: Sample,
sampleHorizontalMap: Map[Sample, StouffbedHorizontal]): List[StouffbedHorizontal] = {
val sampleIdMap = sampleHorizontalMap.map { case (k, v) => k.sampleId -> v }
(sample.mother, sample.father) match {
case (Some(m), Some(f)) =>
(sampleIdMap.get(m), sampleIdMap.get(f)) match {
case (Some(one), Some(two)) => List(one, two)
case _ => Nil
}
case _ => Nil
}
}
case class SortBgzipTabix(sortJob: BedtoolsSort, bgzipJob: Bgzip, tabixJob: Tabix)
def sortBgzipAndTabix(file: File): SortBgzipTabix = {
......
package nl.lumc.sasc.biopet.pipelines.tarmac.scripts
import java.io.File
import nl.lumc.sasc.biopet.core.extensions.PythonCommandLineFunction
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{Input, Output}
/**
* Created by Sander Bollen on 23-6-17.
*/
class FindAllCommon(val parent: Configurable) extends PythonCommandLineFunction {
setPythonScript("find_all_common.py")
@Input
var inputFile: File = _
@Input
var databases: List[File] = Nil
@Output(required = false)
var output: Option[File] = None
def cmdLine: String = {
getPythonCommand +
required("--input", inputFile) +
repeat("--db", databases) +
(if (outputAsStdout) "" else " > " + required(output))
}
}
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment