Skip to content
Snippets Groups Projects
Commit 9a2c81d5 authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

added counter

parent 7dafdb1b
No related branches found
No related tags found
No related merge requests found
...@@ -126,13 +126,17 @@ object VEPNormalizer extends ToolCommand { ...@@ -126,13 +126,17 @@ object VEPNormalizer extends ToolCommand {
logger.info(s"""You have selected mode $mode""") logger.info(s"""You have selected mode $mode""")
logger.info("Start processing records") logger.info("Start processing records")
var counter = 0
for (record <- reader) { for (record <- reader) {
mode match { mode match {
case "explode" => explodeTranscripts(record, newInfos, removeCsq).foreach(vc => writer.add(vc)) case "explode" => explodeTranscripts(record, newInfos, removeCsq).foreach(vc => writer.add(vc))
case "standard" => writer.add(standardTranscripts(record, newInfos, removeCsq)) case "standard" => writer.add(standardTranscripts(record, newInfos, removeCsq))
case _ => throw new IllegalArgumentException("Something odd happened!") case _ => throw new IllegalArgumentException("Something odd happened!")
} }
counter += 1
if (counter % 100000 == 0) logger.info(counter + " variants processed")
} }
logger.info("done: " + counter + " variants processed")
} }
/** /**
...@@ -184,51 +188,42 @@ object VEPNormalizer extends ToolCommand { ...@@ -184,51 +188,42 @@ object VEPNormalizer extends ToolCommand {
* @return An array with the new records * @return An array with the new records
*/ */
def explodeTranscripts(record: VariantContext, csqInfos: Array[String], removeCsq: Boolean): Array[VariantContext] = { def explodeTranscripts(record: VariantContext, csqInfos: Array[String], removeCsq: Boolean): Array[VariantContext] = {
val csq = record.getAttributeAsString("CSQ", "unknown") val arti = parseCsq(record)
val builder = {
if (removeCsq) new VariantContextBuilder(record).rmAttribute("CSQ")
else new VariantContextBuilder(record)
}
// atributes for each transcript (transcript)(csq field index)
val arti = csq.
stripPrefix("[").
stripSuffix("]").
split(",").map(_.split("""\|"""))
for (transcript <- arti) yield { for (transcript <- arti) yield {
(for ( (for (
fieldId <- 0 until csqInfos.size if transcript.isDefinedAt(fieldId) && !transcript(fieldId).isEmpty fieldId <- 0 until csqInfos.size if transcript.isDefinedAt(fieldId);
) yield csqInfos(fieldId) -> transcript(fieldId).trim) value = transcript(fieldId).trim if value.nonEmpty
) yield csqInfos(fieldId) -> value)
.filterNot(_._2.isEmpty) .filterNot(_._2.isEmpty)
.foldLeft(builder)((builder, artibute) => builder.attribute(artibute._1, artibute._2)) .foldLeft(createBuilder(record, removeCsq))((builder, artibute) => builder.attribute(artibute._1, artibute._2))
.make() .make()
} }
} }
def standardTranscripts(record: VariantContext, csqInfos: Array[String], removeCsq: Boolean): VariantContext = { def standardTranscripts(record: VariantContext, csqInfos: Array[String], removeCsq: Boolean): VariantContext = {
val csq = record.getAttributeAsString("CSQ", "unknown") val arti = parseCsq(record)
val builder = { (for (fieldId <- 0 until csqInfos.size) yield csqInfos(fieldId) -> {
if (removeCsq) new VariantContextBuilder(record).rmAttribute("CSQ") for (
else new VariantContextBuilder(record) transcript <- arti if transcript.isDefinedAt(fieldId);
} value = transcript(fieldId).trim if value.nonEmpty
) yield value
}).filter(_._2.nonEmpty)
.foldLeft(createBuilder(record, removeCsq))((builder, attribute) => builder.attribute(attribute._1, attribute._2))
.make()
}
protected def createBuilder(record: VariantContext, removeCsq: Boolean) = {
if (removeCsq) new VariantContextBuilder(record).rmAttribute("CSQ")
else new VariantContextBuilder(record)
}
// atributes for each transcript (transcript)(csq field index) protected def parseCsq(record: VariantContext) = {
val arti = csq. record.getAttributeAsString("CSQ", "unknown").
stripPrefix("["). stripPrefix("[").
stripSuffix("]"). stripSuffix("]").
split(",").map(_.split("""\|""")) split(",").map(_.split("""\|"""))
(for (fieldId <- 0 until csqInfos.size) yield csqInfos(fieldId) -> {
for (
transcript <- arti if transcript.isDefinedAt(fieldId) && !transcript(fieldId).isEmpty
) yield transcript(fieldId).trim
})
.filterNot(_._2.isEmpty)
.foldLeft(builder)((builder, artibute) => builder.attribute(artibute._1, artibute._2))
.make()
} }
case class Args(inputVCF: File = null, case class Args(inputVCF: File = null,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment