Skip to content
Snippets Groups Projects
Commit 14300afa authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Add --id and --id-file option

parent cda615ea
No related branches found
No related tags found
No related merge requests found
......@@ -26,6 +26,7 @@ import nl.lumc.sasc.biopet.core.ToolCommand
import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Output, Input }
import scala.collection.JavaConversions._
import scala.io.Source
class VcfFilter(val root: Configurable) extends BiopetJavaCommandLineFunction {
javaMainClass = getClass.getName
......@@ -69,7 +70,8 @@ object VcfFilter extends ToolCommand {
diffGenotype: List[(String, String)] = Nil,
filterHetVarToHomVar: List[(String, String)] = Nil,
filterRefCalls: Boolean = false,
filterNoCalls: Boolean = false) extends AbstractArgs
filterNoCalls: Boolean = false,
iDset: Set[String] = Set()) extends AbstractArgs
class OptParser extends AbstractOptParser {
opt[File]('I', "inputVcf") required () maxOccurs (1) valueName ("<file>") action { (x, c) =>
......@@ -116,6 +118,12 @@ object VcfFilter extends ToolCommand {
opt[Double]("minQualscore") unbounded () action { (x, c) =>
c.copy(minQualscore = Some(x))
} text ("Min qual score")
opt[String]("id") unbounded () action { (x, c) =>
c.copy(iDset = c.iDset + x)
} text ("Id that may pass the filter")
opt[File]("id-file") unbounded () action { (x, c) =>
c.copy(iDset = c.iDset ++ Source.fromFile(x).getLines())
} text ("File that contain list of IDs to get from vcf file")
}
var commandArgs: Args = _
......@@ -124,6 +132,7 @@ object VcfFilter extends ToolCommand {
* @param args the command line arguments
*/
def main(args: Array[String]): Unit = {
logger.info("Start")
val argsParser = new OptParser
commandArgs = argsParser.parse(args, Args()) getOrElse sys.exit(1)
......@@ -132,6 +141,8 @@ object VcfFilter extends ToolCommand {
val writer = new AsyncVariantContextWriter(new VariantContextWriterBuilder().setOutputFile(commandArgs.outputVcf).build)
writer.writeHeader(header)
var counterTotal = 0
var counterLeft = 0
for (record <- reader) {
if (minQualscore(record) &&
filterRefCalls(record) &&
......@@ -143,12 +154,18 @@ object VcfFilter extends ToolCommand {
mustHaveVariant(record) &&
notSameGenotype(record) &&
filterHetVarToHomVar(record) &&
denovoInSample(record)) {
denovoInSample(record) &&
inIdSet(record)) {
writer.add(record)
counterLeft += 1
}
counterTotal += 1
if (counterTotal % 100000 == 0) logger.info(counterTotal + " variants processed, " + counterLeft + " left")
}
logger.info(counterTotal + " variants processed, " + counterLeft + " left")
reader.close
writer.close
logger.info("Done")
}
def minQualscore(record: VariantContext): Boolean = {
......@@ -241,4 +258,9 @@ object VcfFilter extends ToolCommand {
}
return true
}
def inIdSet(record: VariantContext): Boolean = {
if (commandArgs.iDset.isEmpty) true
else record.getID.split(",").exists(commandArgs.iDset.contains(_))
}
}
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment