Skip to content
Snippets Groups Projects
Commit 4bf8722f authored by bow's avatar bow
Browse files

Merge branch 'feature-add_ids_to_filter' into 'develop'

Feature add ids to filter

Added options to vcf filter tool for @s.a.j.van_der_zeeuw and @j.f.j.laros

See merge request !88
parents 3befe631 2757c868
No related branches found
No related tags found
No related merge requests found
...@@ -26,6 +26,7 @@ import nl.lumc.sasc.biopet.core.ToolCommand ...@@ -26,6 +26,7 @@ import nl.lumc.sasc.biopet.core.ToolCommand
import nl.lumc.sasc.biopet.core.config.Configurable import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Output, Input } import org.broadinstitute.gatk.utils.commandline.{ Output, Input }
import scala.collection.JavaConversions._ import scala.collection.JavaConversions._
import scala.io.Source
class VcfFilter(val root: Configurable) extends BiopetJavaCommandLineFunction { class VcfFilter(val root: Configurable) extends BiopetJavaCommandLineFunction {
javaMainClass = getClass.getName javaMainClass = getClass.getName
...@@ -58,6 +59,7 @@ class VcfFilter(val root: Configurable) extends BiopetJavaCommandLineFunction { ...@@ -58,6 +59,7 @@ class VcfFilter(val root: Configurable) extends BiopetJavaCommandLineFunction {
object VcfFilter extends ToolCommand { object VcfFilter extends ToolCommand {
case class Args(inputVcf: File = null, case class Args(inputVcf: File = null,
outputVcf: File = null, outputVcf: File = null,
invertedOutputVcf: Option[File] = None,
minQualscore: Option[Double] = None, minQualscore: Option[Double] = None,
minSampleDepth: Int = -1, minSampleDepth: Int = -1,
minTotalDepth: Int = -1, minTotalDepth: Int = -1,
...@@ -69,7 +71,8 @@ object VcfFilter extends ToolCommand { ...@@ -69,7 +71,8 @@ object VcfFilter extends ToolCommand {
diffGenotype: List[(String, String)] = Nil, diffGenotype: List[(String, String)] = Nil,
filterHetVarToHomVar: List[(String, String)] = Nil, filterHetVarToHomVar: List[(String, String)] = Nil,
filterRefCalls: Boolean = false, filterRefCalls: Boolean = false,
filterNoCalls: Boolean = false) extends AbstractArgs filterNoCalls: Boolean = false,
iDset: Set[String] = Set()) extends AbstractArgs
class OptParser extends AbstractOptParser { class OptParser extends AbstractOptParser {
opt[File]('I', "inputVcf") required () maxOccurs (1) valueName ("<file>") action { (x, c) => opt[File]('I', "inputVcf") required () maxOccurs (1) valueName ("<file>") action { (x, c) =>
...@@ -78,6 +81,9 @@ object VcfFilter extends ToolCommand { ...@@ -78,6 +81,9 @@ object VcfFilter extends ToolCommand {
opt[File]('o', "outputVcf") required () maxOccurs (1) valueName ("<file>") action { (x, c) => opt[File]('o', "outputVcf") required () maxOccurs (1) valueName ("<file>") action { (x, c) =>
c.copy(outputVcf = x) c.copy(outputVcf = x)
} text ("Output vcf file") } text ("Output vcf file")
opt[File]("invertedOutputVcf") maxOccurs (1) valueName ("<file>") action { (x, c) =>
c.copy(invertedOutputVcf = Some(x))
} text ("inverted output vcf file")
opt[Int]("minSampleDepth") unbounded () valueName ("<int>") action { (x, c) => opt[Int]("minSampleDepth") unbounded () valueName ("<int>") action { (x, c) =>
c.copy(minSampleDepth = x) c.copy(minSampleDepth = x)
} text ("Min value for DP in genotype fields") } text ("Min value for DP in genotype fields")
...@@ -116,6 +122,12 @@ object VcfFilter extends ToolCommand { ...@@ -116,6 +122,12 @@ object VcfFilter extends ToolCommand {
opt[Double]("minQualscore") unbounded () action { (x, c) => opt[Double]("minQualscore") unbounded () action { (x, c) =>
c.copy(minQualscore = Some(x)) c.copy(minQualscore = Some(x))
} text ("Min qual score") } text ("Min qual score")
opt[String]("id") unbounded () action { (x, c) =>
c.copy(iDset = c.iDset + x)
} text ("Id that may pass the filter")
opt[File]("id-file") unbounded () action { (x, c) =>
c.copy(iDset = c.iDset ++ Source.fromFile(x).getLines())
} text ("File that contain list of IDs to get from vcf file")
} }
var commandArgs: Args = _ var commandArgs: Args = _
...@@ -124,6 +136,7 @@ object VcfFilter extends ToolCommand { ...@@ -124,6 +136,7 @@ object VcfFilter extends ToolCommand {
* @param args the command line arguments * @param args the command line arguments
*/ */
def main(args: Array[String]): Unit = { def main(args: Array[String]): Unit = {
logger.info("Start")
val argsParser = new OptParser val argsParser = new OptParser
commandArgs = argsParser.parse(args, Args()) getOrElse sys.exit(1) commandArgs = argsParser.parse(args, Args()) getOrElse sys.exit(1)
...@@ -132,6 +145,11 @@ object VcfFilter extends ToolCommand { ...@@ -132,6 +145,11 @@ object VcfFilter extends ToolCommand {
val writer = new AsyncVariantContextWriter(new VariantContextWriterBuilder().setOutputFile(commandArgs.outputVcf).build) val writer = new AsyncVariantContextWriter(new VariantContextWriterBuilder().setOutputFile(commandArgs.outputVcf).build)
writer.writeHeader(header) writer.writeHeader(header)
val invertedWriter = commandArgs.invertedOutputVcf.collect { case x => new VariantContextWriterBuilder().setOutputFile(x).build }
invertedWriter.foreach(_.writeHeader(header))
var counterTotal = 0
var counterLeft = 0
for (record <- reader) { for (record <- reader) {
if (minQualscore(record) && if (minQualscore(record) &&
filterRefCalls(record) && filterRefCalls(record) &&
...@@ -143,12 +161,20 @@ object VcfFilter extends ToolCommand { ...@@ -143,12 +161,20 @@ object VcfFilter extends ToolCommand {
mustHaveVariant(record) && mustHaveVariant(record) &&
notSameGenotype(record) && notSameGenotype(record) &&
filterHetVarToHomVar(record) && filterHetVarToHomVar(record) &&
denovoInSample(record)) { denovoInSample(record) &&
inIdSet(record)) {
writer.add(record) writer.add(record)
} counterLeft += 1
} else
invertedWriter.foreach(_.add(record))
counterTotal += 1
if (counterTotal % 100000 == 0) logger.info(counterTotal + " variants processed, " + counterLeft + " left")
} }
logger.info(counterTotal + " variants processed, " + counterLeft + " left")
reader.close reader.close
writer.close writer.close
invertedWriter.foreach(_.close())
logger.info("Done")
} }
def minQualscore(record: VariantContext): Boolean = { def minQualscore(record: VariantContext): Boolean = {
...@@ -241,4 +267,9 @@ object VcfFilter extends ToolCommand { ...@@ -241,4 +267,9 @@ object VcfFilter extends ToolCommand {
} }
return true return true
} }
def inIdSet(record: VariantContext): Boolean = {
if (commandArgs.iDset.isEmpty) true
else record.getID.split(",").exists(commandArgs.iDset.contains(_))
}
} }
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment