Commit 97eb9752 authored by Wai Yi Leung's avatar Wai Yi Leung

Merge branch 'feature-vcfanotate' into 'develop'

Feature vcfanotate

Adding a feature for project

See merge request !167
parents f262d102 be33324b
......@@ -3,18 +3,16 @@ package nl.lumc.sasc.biopet.tools
import java.io.File
import scala.collection.JavaConversions._
import htsjdk.variant.variantcontext.{ VariantContextBuilder, VariantContext }
import htsjdk.variant.variantcontext.VariantContextBuilder
import htsjdk.variant.variantcontext.writer.{ AsyncVariantContextWriter, VariantContextWriterBuilder }
import htsjdk.variant.vcf._
import nl.lumc.sasc.biopet.core.ToolCommand
import scala.collection.immutable
/**
* Created by ahbbollen on 11-2-15.
*/
object VcfWithVcf extends ToolCommand {
case class Fields(inputField: String, outputField: String)
case class Fields(inputField: String, outputField: String, fieldMethod: FieldMethod.Value = FieldMethod.none)
case class Args(inputFile: File = null,
outputFile: File = null,
......@@ -22,6 +20,10 @@ object VcfWithVcf extends ToolCommand {
fields: List[Fields] = Nil,
matchAllele: Boolean = true) extends AbstractArgs
object FieldMethod extends Enumeration {
val none, max, min, unique = Value
}
class OptParser extends AbstractOptParser {
opt[File]('I', "inputFile") required () maxOccurs (1) valueName ("<file>") action { (x, c) =>
c.copy(inputFile = x)
......@@ -29,20 +31,29 @@ object VcfWithVcf extends ToolCommand {
opt[File]('O', "outputFile") required () maxOccurs (1) valueName ("<file>") action { (x, c) =>
c.copy(outputFile = x)
}
opt[File]('S', "SecondaryVcf") required () maxOccurs (1) valueName ("<file>") action { (x, c) =>
opt[File]('S', "secondaryVcf") required () maxOccurs (1) valueName ("<file>") action { (x, c) =>
c.copy(secondaryVcf = x)
}
opt[String]('f', "field") unbounded () valueName ("<input_field:output_field>") action { (x, c) =>
opt[String]('f', "field") unbounded () valueName ("<field> or <input_field:output_field> or <input_field:output_field:method>") action { (x, c) =>
val values = x.split(":")
if (values.size > 1) c.copy(fields = Fields(values(0), values(1)) :: c.fields)
if (values.size > 2) c.copy(fields = Fields(values(0), values(1), FieldMethod.withName(values(2))) :: c.fields)
else if (values.size > 1) c.copy(fields = Fields(values(0), values(1)) :: c.fields)
else c.copy(fields = Fields(x, x) :: c.fields)
}
} text ("""| If only <field> is given, the field's identifier in the output VCF will be identical to <field>.
| By default we will return all values found for a given field.
| With <method> the values will processed after getting it from the secondary VCF file, posible methods are:
| - max : takes maximum of found value, only works for numeric (integer/float) fields
| - min : takes minemal of found value, only works for numeric (integer/float) fields
| - unique: takes only unique values """.stripMargin
)
opt[Boolean]("match") valueName ("<Boolean>") maxOccurs (1) action { (x, c) =>
c.copy(matchAllele = x)
} text ("Match alternative alleles; default true")
}
def main(args: Array[String]): Unit = {
logger.info("Init phase")
val argsParser = new OptParser
val commandArgs: Args = argsParser.parse(args, Args()) getOrElse sys.exit(1)
......@@ -55,7 +66,6 @@ object VcfWithVcf extends ToolCommand {
setOutputFile(commandArgs.outputFile).
setReferenceDictionary(header.getSequenceDictionary).
build)
writer.writeHeader(header)
for (x <- commandArgs.fields) {
if (header.hasInfoLine(x.outputField))
......@@ -69,9 +79,11 @@ object VcfWithVcf extends ToolCommand {
oldHeaderLine.getType, oldHeaderLine.getDescription)
header.addMetaDataLine(newHeaderLine)
}
writer.writeHeader(header)
var idx = 0
logger.info("Start reading records")
var counter = 0
for (record <- reader) {
val secondaryRecords = if (commandArgs.matchAllele) {
secondaryReader.query(record.getChr, record.getStart, record.getEnd).toList.
......@@ -96,21 +108,37 @@ object VcfWithVcf extends ToolCommand {
}).toMap
writer.add(fieldMap.foldLeft(new VariantContextBuilder(record))((builder, attribute) => {
builder.attribute(attribute._1, attribute._2.toArray)
builder.attribute(attribute._1, commandArgs.fields.filter(_.outputField == attribute._1).head.fieldMethod match {
case FieldMethod.max => {
header.getInfoHeaderLine(attribute._1).getType match {
case VCFHeaderLineType.Integer => Array(attribute._2.map(_.toString.toInt).max)
case VCFHeaderLineType.Float => Array(attribute._2.map(_.toString.toFloat).max)
case _ => throw new IllegalArgumentException("Type of field " + attribute._1 + " is not numeric")
}
}
case FieldMethod.min => {
header.getInfoHeaderLine(attribute._1).getType match {
case VCFHeaderLineType.Integer => Array(attribute._2.map(_.toString.toInt).min)
case VCFHeaderLineType.Float => Array(attribute._2.map(_.toString.toFloat).min)
case _ => throw new IllegalArgumentException("Type of field " + attribute._1 + " is not numeric")
}
}
case FieldMethod.unique => attribute._2.distinct.toArray
case _ => attribute._2.toArray
})
}).make())
idx += 1
if (idx % 100000 == 0) {
logger.info(s"""Processed $idx records""")
counter += 1
if (counter % 100000 == 0) {
logger.info(s"""Processed $counter records""")
}
}
logger.info(s"""Processed $idx records""")
logger.info(s"""Processed $counter records""")
logger.debug("Closing readers")
writer.close()
reader.close()
secondaryReader.close()
logger.info("Done. Goodbye")
logger.info("Done")
}
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment