Commit 0499783f authored by Sander Bollen's avatar Sander Bollen
Browse files

VcfWithVCf refactor

parent 20eb2f37
......@@ -16,8 +16,9 @@
package nl.lumc.sasc.biopet.tools
import java.io.File
import java.util
import htsjdk.variant.variantcontext.VariantContextBuilder
import htsjdk.variant.variantcontext.{VariantContext, VariantContextBuilder}
import htsjdk.variant.variantcontext.writer.{ AsyncVariantContextWriter, VariantContextWriterBuilder }
import htsjdk.variant.vcf._
import nl.lumc.sasc.biopet.core.{ ToolCommandFuntion, ToolCommand }
......@@ -25,6 +26,7 @@ import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Output, Input }
import scala.collection.JavaConversions._
import scala.collection.JavaConverters._
/**
* Biopet extension for tool VcfWithVcf
......@@ -140,44 +142,11 @@ object VcfWithVcf extends ToolCommand {
var counter = 0
for (record <- reader) {
val secondaryRecords = if (commandArgs.matchAllele) {
secondaryReader.query(record.getContig, record.getStart, record.getEnd).toList.
filter(x => record.getAlternateAlleles.exists(x.hasAlternateAllele))
} else {
secondaryReader.query(record.getContig, record.getStart, record.getEnd).toList
}
val secondaryRecords = getSecondaryRecords(secondaryReader, record, commandArgs.matchAllele)
val fieldMap = (for (
f <- commandArgs.fields if secondaryRecords.exists(_.hasAttribute(f.inputField))
) yield {
f.outputField -> (for (
secondRecord <- secondaryRecords if secondRecord.hasAttribute(f.inputField)
) yield {
secondRecord.getAttribute(f.inputField) match {
case l: List[_] => l
case x => List(x)
}
}).fold(Nil)(_ ::: _)
}).toMap
val fieldMap = createFieldMap(commandArgs.fields, secondaryRecords)
writer.add(fieldMap.foldLeft(new VariantContextBuilder(record))((builder, attribute) => {
builder.attribute(attribute._1, commandArgs.fields.filter(_.outputField == attribute._1).head.fieldMethod match {
case FieldMethod.max =>
header.getInfoHeaderLine(attribute._1).getType match {
case VCFHeaderLineType.Integer => Array(attribute._2.map(_.toString.toInt).max)
case VCFHeaderLineType.Float => Array(attribute._2.map(_.toString.toFloat).max)
case _ => throw new IllegalArgumentException("Type of field " + attribute._1 + " is not numeric")
}
case FieldMethod.min =>
header.getInfoHeaderLine(attribute._1).getType match {
case VCFHeaderLineType.Integer => Array(attribute._2.map(_.toString.toInt).min)
case VCFHeaderLineType.Float => Array(attribute._2.map(_.toString.toFloat).min)
case _ => throw new IllegalArgumentException("Type of field " + attribute._1 + " is not numeric")
}
case FieldMethod.unique => attribute._2.distinct.toArray
case _ => attribute._2.toArray
})
}).make())
writer.add(createRecord(fieldMap, record, commandArgs.fields, header))
counter += 1
if (counter % 100000 == 0) {
......@@ -192,4 +161,88 @@ object VcfWithVcf extends ToolCommand {
secondaryReader.close()
logger.info("Done")
}
/**
* Create Map of field -> List of attributes in secondary records
* @param fields List of Field
* @param secondaryRecords List of VariantContext with secondary records
* @return Map of fields and their values in secondary records
*/
def createFieldMap(fields: List[Fields], secondaryRecords: List[VariantContext]) : Map[String, List[Any]] = {
val fieldMap = (for (
f <- fields if secondaryRecords.exists(_.hasAttribute(f.inputField))
) yield {
f.outputField -> (for (
secondRecord <- secondaryRecords if secondRecord.hasAttribute(f.inputField)
) yield {
secondRecord.getAttribute(f.inputField) match {
case l: List[_] => l
case x => List(x)
}
}).fold(Nil)(_ ::: _)
}).toMap
fieldMap
}
/**
* Get secondary records matching the query record
* @param secondaryReader reader for secondary records
* @param record query record
* @param matchAllele allele has to match query allele?
* @return List of VariantContext
*/
def getSecondaryRecords(secondaryReader: VCFFileReader,
record: VariantContext, matchAllele: Boolean) : List[VariantContext] = {
if (matchAllele) {
secondaryReader.query(record.getContig, record.getStart, record.getEnd).toList.
filter(x => record.getAlternateAlleles.exists(x.hasAlternateAllele))
} else {
secondaryReader.query(record.getContig, record.getStart, record.getEnd).toList
}
}
def createRecord(fieldMap: Map[String, List[Any]], record: VariantContext,
fields: List[Fields], header: VCFHeader): VariantContext = {
fieldMap.foldLeft(new VariantContextBuilder(record))((builder, attribute) => {
builder.attribute(attribute._1, fields.filter(_.outputField == attribute._1).head.fieldMethod match {
case FieldMethod.max =>
header.getInfoHeaderLine(attribute._1).getType match {
case VCFHeaderLineType.Integer => sL2JOAL(List(attribute._2.map(_.toString.toInt).max))
case VCFHeaderLineType.Float => sL2JOAL(List(attribute._2.map(_.toString.toFloat).max))
case _ => throw new IllegalArgumentException("Type of field " + attribute._1 + " is not numeric")
}
case FieldMethod.min =>
header.getInfoHeaderLine(attribute._1).getType match {
case VCFHeaderLineType.Integer => sL2JOAL(List(attribute._2.map(_.toString.toInt).min))
case VCFHeaderLineType.Float => sL2JOAL(List(attribute._2.map(_.toString.toFloat).min))
case _ => throw new IllegalArgumentException("Type of field " + attribute._1 + " is not numeric")
}
case FieldMethod.unique => sL2JOAL(attribute._2.distinct)
case _ => sL2JOAL(attribute._2)
})
}).make()
}
/**
* HACK!!
* Stands for scalaListToJavaObjectArrayList
* Convert a scala List[Any] to a java ArrayList[Object]. This is necessary for BCF conversions
* As scala ints and floats cannot be directly cast to java objects (they aren't objects),
* we need to box them.
* For items not int and float, we assume them to be strings (TODO: sane assumption?)
* @param array scala List[Any]
* @return converted java ArrayList[Object]
*/
private def sL2JOAL(array: List[Any]): util.ArrayList[Object] = {
val out = new util.ArrayList[Object]()
array.foreach {
case x: Int => out.add(Int.box(x))
case x: Float => out.add(Float.box(x))
case x => out.add(x.toString)
}
out
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment