From 4e6b4240be8e8df089fba43c4a01776535173746 Mon Sep 17 00:00:00 2001 From: Peter van 't Hof <p.j.van_t_hof@lumc.nl> Date: Tue, 29 Mar 2016 13:38:37 +0200 Subject: [PATCH] Added auto `sortInput` on retry --- .../lumc/sasc/biopet/extensions/tools/GensToVcf.scala | 10 +++++++++- .../scala/nl/lumc/sasc/biopet/tools/GensToVcf.scala | 11 +++++------ 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/GensToVcf.scala b/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/GensToVcf.scala index c620bc161..bab7b8fd5 100644 --- a/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/GensToVcf.scala +++ b/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/GensToVcf.scala @@ -45,6 +45,8 @@ class GensToVcf(val root: Configurable) extends ToolCommandFunction with Referen var contig: String = _ + var sortInput: Boolean = false + override def defaultCoreMemory = 5.0 override def beforeGraph(): Unit = { @@ -54,13 +56,19 @@ class GensToVcf(val root: Configurable) extends ToolCommandFunction with Referen if (outputVcf.getName.endsWith(".vcf.gz")) outputFiles :+= new File(outputVcf.getAbsolutePath + ".tbi") } + override def setupRetry(): Unit = { + super.setupRetry() + sortInput = true + } + override def cmdLine = super.cmdLine + required("--inputGenotypes", inputGens) + required("--inputInfo", inputInfo) + required("--outputVcf", outputVcf) + optional("--contig", contig) + required("--referenceFasta", reference) + - required("--samplesFile", samplesFile) + required("--samplesFile", samplesFile) + + conditional(sortInput, "--sortInput") } diff --git a/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/GensToVcf.scala b/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/GensToVcf.scala index 143228366..e6f37c163 100644 --- a/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/GensToVcf.scala +++ b/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/GensToVcf.scala @@ -5,7 +5,7 @@ import java.util import htsjdk.samtools.reference.{ FastaSequenceFile, ReferenceSequenceFileFactory } import htsjdk.variant.variantcontext.writer.{ AsyncVariantContextWriter, VariantContextWriterBuilder } -import htsjdk.variant.variantcontext.{VariantContext, Allele, GenotypeBuilder, VariantContextBuilder} +import htsjdk.variant.variantcontext.{ VariantContext, Allele, GenotypeBuilder, VariantContextBuilder } import htsjdk.variant.vcf._ import nl.lumc.sasc.biopet.utils.ToolCommand @@ -44,7 +44,7 @@ object GensToVcf extends ToolCommand { opt[String]('c', "contig") required () maxOccurs 1 valueName "<file>" action { (x, c) => c.copy(contig = x) } text "contig of impute file" - opt[Unit]("sort") maxOccurs 1 action { (x, c) => + opt[Unit]("sortInput") maxOccurs 1 action { (x, c) => c.copy(sortInput = true) } text "In memory sorting" } @@ -86,13 +86,13 @@ object GensToVcf extends ToolCommand { val lineIt: Iterator[Line] = { val it = infoIt match { case Some(x) => genotypeIt.zip(x).map(x => Line(x._1, Some(x._2))) - case _ => genotypeIt.map(x => Line(x, None)) + case _ => genotypeIt.map(x => Line(x, None)) } if (cmdArgs.sortInput) { logger.info("Start Sorting input files") val list = it.toList - val pos = list.map{ line => + val pos = list.map { line => val values = line.genotype.split(" ") val p = values(2).toInt val alt = values(4) @@ -100,8 +100,7 @@ object GensToVcf extends ToolCommand { else p } list.zip(pos).sortBy(_._2).map(_._1).toIterator - } - else it + } else it } logger.info("Start processing genotypes") -- GitLab