Skip to content
Snippets Groups Projects
Commit 7a797196 authored by bow's avatar bow
Browse files

Merge branch 'fix-vep_normalizer' into 'release-0.3.0'

Fix vep normalizer

fix on output of normalizer

See merge request !129
parents 9bb14daa da93f0ca
No related branches found
No related tags found
No related merge requests found
...@@ -126,13 +126,17 @@ object VEPNormalizer extends ToolCommand { ...@@ -126,13 +126,17 @@ object VEPNormalizer extends ToolCommand {
logger.info(s"""You have selected mode $mode""") logger.info(s"""You have selected mode $mode""")
logger.info("Start processing records") logger.info("Start processing records")
var counter = 0
for (record <- reader) { for (record <- reader) {
mode match { mode match {
case "explode" => explodeTranscripts(record, newInfos, removeCsq).foreach(vc => writer.add(vc)) case "explode" => explodeTranscripts(record, newInfos, removeCsq).foreach(vc => writer.add(vc))
case "standard" => writer.add(standardTranscripts(record, newInfos, removeCsq)) case "standard" => writer.add(standardTranscripts(record, newInfos, removeCsq))
case _ => throw new IllegalArgumentException("Something odd happened!") case _ => throw new IllegalArgumentException("Something odd happened!")
} }
counter += 1
if (counter % 100000 == 0) logger.info(counter + " variants processed")
} }
logger.info("done: " + counter + " variants processed")
} }
/** /**
...@@ -180,44 +184,45 @@ object VEPNormalizer extends ToolCommand { ...@@ -180,44 +184,45 @@ object VEPNormalizer extends ToolCommand {
* Explode a single VEP-annotated record to multiple normal records * Explode a single VEP-annotated record to multiple normal records
* Based on the number of annotated transcripts in the CSQ tag * Based on the number of annotated transcripts in the CSQ tag
* @param record the record as a VariantContext object * @param record the record as a VariantContext object
* @param csq_infos An array with names of new info tags * @param csqInfos An array with names of new info tags
* @return An array with the new records * @return An array with the new records
*/ */
def explodeTranscripts(record: VariantContext, csq_infos: Array[String], remove_CSQ: Boolean): Array[VariantContext] = { def explodeTranscripts(record: VariantContext, csqInfos: Array[String], removeCsq: Boolean): Array[VariantContext] = {
val csq = record.getAttributeAsString("CSQ", "unknown") for (transcript <- parseCsq(record)) yield {
val attributes = if (remove_CSQ) record.getAttributes.toMap - "CSQ" else record.getAttributes.toMap (for (
fieldId <- 0 until csqInfos.size if transcript.isDefinedAt(fieldId);
csq. value = transcript(fieldId) if value.nonEmpty
stripPrefix("["). ) yield csqInfos(fieldId) -> value)
stripSuffix("]"). .filterNot(_._2.isEmpty)
split(","). .foldLeft(createBuilder(record, removeCsq))((builder, attribute) => builder.attribute(attribute._1, attribute._2))
map(x => attributes ++ csq_infos.zip(x.split("""\|""", -1))). .make()
map(x => { }
if (remove_CSQ) new VariantContextBuilder(record)
.attributes(x)
.make()
else new VariantContextBuilder(record).attributes(x).make()
})
} }
def standardTranscripts(record: VariantContext, csqInfos: Array[String], removeCsq: Boolean): VariantContext = { def standardTranscripts(record: VariantContext, csqInfos: Array[String], removeCsq: Boolean): VariantContext = {
val csq = record.getAttributeAsString("CSQ", "unknown") val attribs = parseCsq(record)
val attributes = if (removeCsq) record.getAttributes.toMap - "CSQ" else record.getAttributes.toMap
(for (fieldId <- 0 until csqInfos.size) yield csqInfos(fieldId) -> {
for (
transcript <- attribs if transcript.isDefinedAt(fieldId);
value = transcript(fieldId) if value.nonEmpty
) yield value
})
.filter(_._2.nonEmpty)
.foldLeft(createBuilder(record, removeCsq))((builder, attribute) => builder.attribute(attribute._1, attribute._2))
.make()
}
protected def createBuilder(record: VariantContext, removeCsq: Boolean) = {
if (removeCsq) new VariantContextBuilder(record).rmAttribute("CSQ")
else new VariantContextBuilder(record)
}
val newAttrs = attributes ++ csqInfos.zip(csq. protected def parseCsq(record: VariantContext) = {
record.getAttributeAsString("CSQ", "unknown").
stripPrefix("["). stripPrefix("[").
stripSuffix("]"). stripSuffix("]").
split(","). split(",").map(_.split("""\|""").map(_.trim))
// This makes a list of lists with each annotation for every transcript in a top-level list element
foldLeft(List.fill(csqInfos.length) { List.empty[String] })(
(acc, x) => {
val broken = x.split("""\|""", -1)
acc.zip(broken).map(x => x._2 :: x._1)
}
).
map(x => x.mkString(",")))
new VariantContextBuilder(record).attributes(newAttrs).make()
} }
case class Args(inputVCF: File = null, case class Args(inputVCF: File = null,
......
...@@ -71,7 +71,10 @@ class VEPNormalizerTest extends TestNGSuite with MockitoSugar with Matchers { ...@@ -71,7 +71,10 @@ class VEPNormalizerTest extends TestNGSuite with MockitoSugar with Matchers {
} }
def check(item: String) = { def check(item: String) = {
record.getAttribute(item).toString.split(""",""", -1).length should be(11) record.getAttribute(item) match {
case l: List[_] => l.length should be(11)
case _ =>
}
} }
val items = Array("AA_MAF", "AFR_MAF", "ALLELE_NUM", "AMR_MAF", "ASN_MAF", "Allele", val items = Array("AA_MAF", "AFR_MAF", "ALLELE_NUM", "AMR_MAF", "ASN_MAF", "Allele",
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment