From db1a67557fad2f4dbee6998c71e33b50f0770396 Mon Sep 17 00:00:00 2001 From: Peter van 't Hof <p.j.van_t_hof@lumc.nl> Date: Tue, 29 Mar 2016 13:32:26 +0200 Subject: [PATCH] Fixed sorting issue --- .../nl/lumc/sasc/biopet/tools/GensToVcf.scala | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/GensToVcf.scala b/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/GensToVcf.scala index d02483747..143228366 100644 --- a/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/GensToVcf.scala +++ b/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/GensToVcf.scala @@ -83,7 +83,7 @@ object GensToVcf extends ToolCommand { lazy val fastaFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(cmdArgs.referenceFasta, true, true) case class Line(genotype: String, info: Option[String]) - def lineIt: Iterator[Line] = { + val lineIt: Iterator[Line] = { val it = infoIt match { case Some(x) => genotypeIt.zip(x).map(x => Line(x._1, Some(x._2))) case _ => genotypeIt.map(x => Line(x, None)) @@ -92,13 +92,20 @@ object GensToVcf extends ToolCommand { if (cmdArgs.sortInput) { logger.info("Start Sorting input files") val list = it.toList - val pos = list.map(_.genotype.split(" ")(2).toInt) + val pos = list.map{ line => + val values = line.genotype.split(" ") + val p = values(2).toInt + val alt = values(4) + if (alt == "-") p - 1 + else p + } list.zip(pos).sortBy(_._2).map(_._1).toIterator } else it } logger.info("Start processing genotypes") + var count = 0L for (line <- lineIt) { val genotypeValues = line.genotype.split(" ") val (start, end, ref, alt) = { @@ -141,8 +148,12 @@ object GensToVcf extends ToolCommand { val id = genotypeValues(1) if (id.startsWith(cmdArgs.contig + ":")) writer.add(builder.make()) else writer.add(builder.id(id).make()) + count += 1 + if (count % 10000 == 0) logger.info(s"$count lines processed") } + logger.info(s"$count lines processed") + writer.close() logger.info("Done") -- GitLab