Skip to content
Snippets Groups Projects
Commit 0499783f authored by Sander Bollen's avatar Sander Bollen
Browse files

VcfWithVCf refactor

parent 20eb2f37
No related branches found
No related tags found
No related merge requests found
......@@ -16,8 +16,9 @@
package nl.lumc.sasc.biopet.tools
import java.io.File
import java.util
import htsjdk.variant.variantcontext.VariantContextBuilder
import htsjdk.variant.variantcontext.{VariantContext, VariantContextBuilder}
import htsjdk.variant.variantcontext.writer.{ AsyncVariantContextWriter, VariantContextWriterBuilder }
import htsjdk.variant.vcf._
import nl.lumc.sasc.biopet.core.{ ToolCommandFuntion, ToolCommand }
......@@ -25,6 +26,7 @@ import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Output, Input }
import scala.collection.JavaConversions._
import scala.collection.JavaConverters._
/**
* Biopet extension for tool VcfWithVcf
......@@ -140,44 +142,11 @@ object VcfWithVcf extends ToolCommand {
var counter = 0
for (record <- reader) {
val secondaryRecords = if (commandArgs.matchAllele) {
secondaryReader.query(record.getContig, record.getStart, record.getEnd).toList.
filter(x => record.getAlternateAlleles.exists(x.hasAlternateAllele))
} else {
secondaryReader.query(record.getContig, record.getStart, record.getEnd).toList
}
val secondaryRecords = getSecondaryRecords(secondaryReader, record, commandArgs.matchAllele)
val fieldMap = (for (
f <- commandArgs.fields if secondaryRecords.exists(_.hasAttribute(f.inputField))
) yield {
f.outputField -> (for (
secondRecord <- secondaryRecords if secondRecord.hasAttribute(f.inputField)
) yield {
secondRecord.getAttribute(f.inputField) match {
case l: List[_] => l
case x => List(x)
}
}).fold(Nil)(_ ::: _)
}).toMap
val fieldMap = createFieldMap(commandArgs.fields, secondaryRecords)
writer.add(fieldMap.foldLeft(new VariantContextBuilder(record))((builder, attribute) => {
builder.attribute(attribute._1, commandArgs.fields.filter(_.outputField == attribute._1).head.fieldMethod match {
case FieldMethod.max =>
header.getInfoHeaderLine(attribute._1).getType match {
case VCFHeaderLineType.Integer => Array(attribute._2.map(_.toString.toInt).max)
case VCFHeaderLineType.Float => Array(attribute._2.map(_.toString.toFloat).max)
case _ => throw new IllegalArgumentException("Type of field " + attribute._1 + " is not numeric")
}
case FieldMethod.min =>
header.getInfoHeaderLine(attribute._1).getType match {
case VCFHeaderLineType.Integer => Array(attribute._2.map(_.toString.toInt).min)
case VCFHeaderLineType.Float => Array(attribute._2.map(_.toString.toFloat).min)
case _ => throw new IllegalArgumentException("Type of field " + attribute._1 + " is not numeric")
}
case FieldMethod.unique => attribute._2.distinct.toArray
case _ => attribute._2.toArray
})
}).make())
writer.add(createRecord(fieldMap, record, commandArgs.fields, header))
counter += 1
if (counter % 100000 == 0) {
......@@ -192,4 +161,88 @@ object VcfWithVcf extends ToolCommand {
secondaryReader.close()
logger.info("Done")
}
/**
* Create Map of field -> List of attributes in secondary records
* @param fields List of Field
* @param secondaryRecords List of VariantContext with secondary records
* @return Map of fields and their values in secondary records
*/
def createFieldMap(fields: List[Fields], secondaryRecords: List[VariantContext]) : Map[String, List[Any]] = {
val fieldMap = (for (
f <- fields if secondaryRecords.exists(_.hasAttribute(f.inputField))
) yield {
f.outputField -> (for (
secondRecord <- secondaryRecords if secondRecord.hasAttribute(f.inputField)
) yield {
secondRecord.getAttribute(f.inputField) match {
case l: List[_] => l
case x => List(x)
}
}).fold(Nil)(_ ::: _)
}).toMap
fieldMap
}
/**
* Get secondary records matching the query record
* @param secondaryReader reader for secondary records
* @param record query record
* @param matchAllele allele has to match query allele?
* @return List of VariantContext
*/
def getSecondaryRecords(secondaryReader: VCFFileReader,
record: VariantContext, matchAllele: Boolean) : List[VariantContext] = {
if (matchAllele) {
secondaryReader.query(record.getContig, record.getStart, record.getEnd).toList.
filter(x => record.getAlternateAlleles.exists(x.hasAlternateAllele))
} else {
secondaryReader.query(record.getContig, record.getStart, record.getEnd).toList
}
}
def createRecord(fieldMap: Map[String, List[Any]], record: VariantContext,
fields: List[Fields], header: VCFHeader): VariantContext = {
fieldMap.foldLeft(new VariantContextBuilder(record))((builder, attribute) => {
builder.attribute(attribute._1, fields.filter(_.outputField == attribute._1).head.fieldMethod match {
case FieldMethod.max =>
header.getInfoHeaderLine(attribute._1).getType match {
case VCFHeaderLineType.Integer => sL2JOAL(List(attribute._2.map(_.toString.toInt).max))
case VCFHeaderLineType.Float => sL2JOAL(List(attribute._2.map(_.toString.toFloat).max))
case _ => throw new IllegalArgumentException("Type of field " + attribute._1 + " is not numeric")
}
case FieldMethod.min =>
header.getInfoHeaderLine(attribute._1).getType match {
case VCFHeaderLineType.Integer => sL2JOAL(List(attribute._2.map(_.toString.toInt).min))
case VCFHeaderLineType.Float => sL2JOAL(List(attribute._2.map(_.toString.toFloat).min))
case _ => throw new IllegalArgumentException("Type of field " + attribute._1 + " is not numeric")
}
case FieldMethod.unique => sL2JOAL(attribute._2.distinct)
case _ => sL2JOAL(attribute._2)
})
}).make()
}
/**
* HACK!!
* Stands for scalaListToJavaObjectArrayList
* Convert a scala List[Any] to a java ArrayList[Object]. This is necessary for BCF conversions
* As scala ints and floats cannot be directly cast to java objects (they aren't objects),
* we need to box them.
* For items not int and float, we assume them to be strings (TODO: sane assumption?)
* @param array scala List[Any]
* @return converted java ArrayList[Object]
*/
private def sL2JOAL(array: List[Any]): util.ArrayList[Object] = {
val out = new util.ArrayList[Object]()
array.foreach {
case x: Int => out.add(Int.box(x))
case x: Float => out.add(Float.box(x))
case x => out.add(x.toString)
}
out
}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment