Commit d5c0c6fa authored by Sander Bollen's avatar Sander Bollen

thresholding

parent ba7d90fb
......@@ -27,15 +27,23 @@ class BedtoolsMerge(val parent: Configurable) extends Bedtools {
@Input(doc = "Input bed file")
var input: File = _
@Argument(doc = "Distance")
var dist: Option[Int] = config("dist") //default of tool is 1
@Argument(doc = "Distance", required = false)
var dist: Option[Int] = config("dist", default = 1) //default of tool is 1
@Output(doc = "Output bed file")
var output: File = _
@Argument(doc = "operation to additional columns", required = false)
var operation: Option[String] = None
@Argument(doc = "Additional columns to operate upon", required = false)
var additionalColumns: Option[List[Int]] = None
def cmdLine = {
required(executable) + required("merge") +
required("-i", input) + optional("-d", dist) +
(if (additionalColumns.isDefined) required("-c", additionalColumns.get.mkString(",")) else "") +
optional("-o", operation) +
" > " + required(output)
}
......
#!/usr/bin/env python3
#
# Biopet is built on top of GATK Queue for building bioinformatic
# pipelines. It is mainly intended to support LUMC SHARK cluster
# which is running SGE. But other types of HPC that are supported by
# GATK Queue (such as PBS) should also be able to execute Biopet tools and
# pipelines.
#
# Copyright 2014-2017 Sequencing Analysis Support Core - Leiden University Medical Center
#
# Contact us at: sasc@lumc.nl
#
# A dual licensing mode is applied. The source code within this project is
# freely available for non-commercial use under an AGPL license;
# For commercial users or users who do not want to follow the AGPL
# license, please contact us to obtain a separate license.
#
import argparse
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('-i', "--input", required=True)
parser.add_argument("-t", "--threshold", type=int, default=5)
args = parser.parse_args()
with open(args.input) as handle:
for line in handle:
value = float(line.strip().split("\t")[-1])
if abs(value) >= args.threshold:
print(line)
\ No newline at end of file
......@@ -4,13 +4,13 @@ import java.io.File
import nl.lumc.sasc.biopet.core.{ BiopetCommandLineFunction, PedigreeQscript, PipelineCommand, Reference }
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
import nl.lumc.sasc.biopet.extensions.bedtools.{ BedtoolsIntersect, BedtoolsSort }
import nl.lumc.sasc.biopet.extensions.bedtools.{ BedtoolsIntersect, BedtoolsMerge, BedtoolsSort }
import nl.lumc.sasc.biopet.extensions.{ Bgzip, Gzip, Ln, Tabix }
import nl.lumc.sasc.biopet.extensions.gatk.DepthOfCoverage
import nl.lumc.sasc.biopet.extensions.stouffbed.{ StouffbedHorizontal, StouffbedVertical }
import nl.lumc.sasc.biopet.extensions.wisecondor.{ WisecondorCount, WisecondorGcCorrect, WisecondorNewRef, WisecondorZscore }
import nl.lumc.sasc.biopet.extensions.xhmm.{ XhmmMatrix, XhmmMergeGatkDepths, XhmmNormalize, XhmmPca }
import nl.lumc.sasc.biopet.pipelines.tarmac.scripts.SampleFromMatrix
import nl.lumc.sasc.biopet.pipelines.tarmac.scripts.{ BedThreshold, SampleFromMatrix }
import nl.lumc.sasc.biopet.utils.Logging
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.queue.QScript
......@@ -26,9 +26,13 @@ class Tarmac(val parent: Configurable) extends QScript with PedigreeQscript with
lazy val targets: File = config("targets")
lazy val stouffWindowSizes: List[Int] = config("stouff_window_size")
lazy val threshold: Int = config("threshold")
def this() = this(null)
private var _finalFiles: Map[Sample, List[File]] = Map()
def finalFiles: Map[Sample, List[File]] = _finalFiles
/* Fixed values for xhmm count file */
override def fixedValues: Map[String, Any] = {
super.fixedValues ++ Map(
......@@ -137,6 +141,31 @@ class Tarmac(val parent: Configurable) extends QScript with PedigreeQscript with
sample -> subMap.toMap
}
val thresholdJobs = windowStouffJobs map {
case (sample, subMap) =>
val threshSubMap = subMap map {
case (size, job) =>
val windowDir = new File(sample.sampleDir, s"window_$size")
val thresholder = new BedThreshold(this)
thresholder.input = job.output
thresholder.isIntermediate = true
thresholder.output = Some(new File(windowDir, s"${sample.sampleId}.threshold-raw.bed"))
thresholder.threshold = threshold
val merger = new BedtoolsMerge(this)
merger.input = thresholder.output.get
merger.output = new File(windowDir, s"${sample.sampleId}.threshold.bed")
merger.additionalColumns = Some(List(4))
merger.operation = Some("median")
size -> List(thresholder, merger)
}
sample -> threshSubMap
}
_finalFiles = thresholdJobs map {
case (sample, subMap) =>
sample -> subMap.values.flatten.collect { case j: BedtoolsMerge => j.output }.toList
}
addAll(xhmmRefJobs.values.flatMap(_._1))
addAll(wisecondorRefJobs.values.flatMap(_._1))
addAll(xhmmZJobs.values.flatMap(_._1))
......@@ -145,6 +174,7 @@ class Tarmac(val parent: Configurable) extends QScript with PedigreeQscript with
addAll(xhmmSyncJobs.values)
addAll(zScoreMergeJobs.values)
addAll(windowStouffJobs.values.flatMap(_.values))
addAll(thresholdJobs.values.flatMap(_.values).flatten)
}
/**
......
package nl.lumc.sasc.biopet.pipelines.tarmac.scripts
import java.io.File
import nl.lumc.sasc.biopet.core.extensions.PythonCommandLineFunction
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Argument, Input, Output }
/**
* Created by Sander Bollen on 24-4-17.
*/
class BedThreshold(val parent: Configurable) extends PythonCommandLineFunction {
setPythonScript("bed_threshold.py")
@Input
var input: File = _
@Argument
var threshold: Int = _
@Output(required = false)
var output: Option[File] = None
def cmdLine: String = {
getPythonCommand +
required("-i", input) +
required("-t", threshold) +
(if (outputAsStsout) "" else " > " + required(output))
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment