diff --git a/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/GatkGeneral.scala b/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/GatkGeneral.scala index 1c557e5223634fdc14d992313e006bca6b141c90..11b59f2fa52bf56e455475e2e719a746f6ff41ed 100644 --- a/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/GatkGeneral.scala +++ b/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/GatkGeneral.scala @@ -5,13 +5,15 @@ */ package nl.lumc.sasc.biopet.extensions.gatk.broad -import nl.lumc.sasc.biopet.core.{ Version, CommandLineResources, Reference, BiopetJavaCommandLineFunction } +import nl.lumc.sasc.biopet.core._ import org.broadinstitute.gatk.engine.phonehome.GATKRunReport import org.broadinstitute.gatk.queue.extensions.gatk.CommandLineGATK trait GatkGeneral extends CommandLineGATK with CommandLineResources with Reference with Version { memoryLimit = Option(3) + var executable: String = config("java", default = "java", submodule = "java", freeVar = false) + override def subPath = "gatk" :: super.subPath jarFile = config("gatk_jar") @@ -20,6 +22,7 @@ trait GatkGeneral extends CommandLineGATK with CommandLineResources with Referen override def defaultCoreMemory = 4.0 override def faiRequired = true + override def dictRequired = true if (config.contains("intervals")) intervals = config("intervals").asFileList if (config.contains("exclude_intervals")) excludeIntervals = config("exclude_intervals").asFileList @@ -39,5 +42,8 @@ trait GatkGeneral extends CommandLineGATK with CommandLineResources with Referen override def versionExitcode = List(0, 1) def versionCommand = "java" + " -jar " + jarFile + " -version" - override def getVersion = super.getVersion.collect { case v => "Gatk " + v } + override def getVersion = { + BiopetCommandLineFunction.preProcessExecutable(executable).path.foreach(executable = _) + super.getVersion.collect { case v => "Gatk " + v } + } } diff --git a/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/GenotypeGVCFs.scala b/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/GenotypeGVCFs.scala index 5bd36585d1de2f62c50cb46cedacc8f3a9bcf16d..e347ebf4743eca9b2f80bc3f072c922cb012e88a 100644 --- a/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/GenotypeGVCFs.scala +++ b/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/GenotypeGVCFs.scala @@ -10,7 +10,7 @@ import java.io.File import nl.lumc.sasc.biopet.utils.config.Configurable class GenotypeGVCFs(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.GenotypeGVCFs with GatkGeneral { - annotation ++= config("annotation", default = Seq("FisherStrand", "QualByDepth", "ChromosomeCounts")).asStringList + annotation ++= config("annotation", default = Seq(), freeVar = false).asStringList if (config.contains("dbsnp")) dbsnp = config("dbsnp") if (config.contains("scattercount", "genotypegvcfs")) scatterCount = config("scattercount") diff --git a/protected/biopet-gatk-pipelines/src/test/scala/nl/lumc/sasc/biopet/pipelines/gatk/ShivaVariantcallingTest.scala b/protected/biopet-gatk-pipelines/src/test/scala/nl/lumc/sasc/biopet/pipelines/gatk/ShivaVariantcallingTest.scala index 1740b33ac108a4c20205973a2410aabbeb4dbd83..bd78727cad7e431d9af8dceaa8a4f6aef4c1d163 100644 --- a/protected/biopet-gatk-pipelines/src/test/scala/nl/lumc/sasc/biopet/pipelines/gatk/ShivaVariantcallingTest.scala +++ b/protected/biopet-gatk-pipelines/src/test/scala/nl/lumc/sasc/biopet/pipelines/gatk/ShivaVariantcallingTest.scala @@ -44,12 +44,13 @@ class ShivaVariantcallingTest extends TestNGSuite with Matchers { bams <- 0 to 2; raw <- bool; bcftools <- bool; + bcftools_singlesample <- bool; haplotypeCallerGvcf <- bool; haplotypeCallerAllele <- bool; unifiedGenotyperAllele <- bool; unifiedGenotyper <- bool; haplotypeCaller <- bool - ) yield Array[Any](bams, raw, bcftools, unifiedGenotyper, haplotypeCaller, haplotypeCallerGvcf, haplotypeCallerAllele, unifiedGenotyperAllele) + ) yield Array[Any](bams, raw, bcftools, bcftools_singlesample, unifiedGenotyper, haplotypeCaller, haplotypeCallerGvcf, haplotypeCallerAllele, unifiedGenotyperAllele) ).toArray } @@ -57,6 +58,7 @@ class ShivaVariantcallingTest extends TestNGSuite with Matchers { def testShivaVariantcalling(bams: Int, raw: Boolean, bcftools: Boolean, + bcftools_singlesample: Boolean, unifiedGenotyper: Boolean, haplotypeCaller: Boolean, haplotypeCallerGvcf: Boolean, @@ -65,6 +67,7 @@ class ShivaVariantcallingTest extends TestNGSuite with Matchers { val callers: ListBuffer[String] = ListBuffer() if (raw) callers.append("raw") if (bcftools) callers.append("bcftools") + if (bcftools_singlesample) callers.append("bcftools_singlesample") if (unifiedGenotyper) callers.append("unifiedgenotyper") if (haplotypeCallerGvcf) callers.append("haplotypecaller_gvcf") if (haplotypeCallerAllele) callers.append("haplotypecaller_allele") @@ -78,7 +81,8 @@ class ShivaVariantcallingTest extends TestNGSuite with Matchers { val illegalArgumentException = pipeline.inputBams.isEmpty || (!raw && !bcftools && !haplotypeCaller && !unifiedGenotyper && - !haplotypeCallerGvcf && !haplotypeCallerAllele && !unifiedGenotyperAllele) + !haplotypeCallerGvcf && !haplotypeCallerAllele && !unifiedGenotyperAllele && + !bcftools_singlesample) if (illegalArgumentException) intercept[IllegalArgumentException] { pipeline.script() @@ -90,7 +94,7 @@ class ShivaVariantcallingTest extends TestNGSuite with Matchers { pipeline.functions.count(_.isInstanceOf[CombineVariants]) shouldBe 1 + (if (raw) 1 else 0) //pipeline.functions.count(_.isInstanceOf[Bcftools]) shouldBe (if (bcftools) 1 else 0) //FIXME: Can not check for bcftools because of piping - pipeline.functions.count(_.isInstanceOf[MpileupToVcf]) shouldBe (if (raw) bams else 0) + //pipeline.functions.count(_.isInstanceOf[MpileupToVcf]) shouldBe (if (raw) bams else 0) pipeline.functions.count(_.isInstanceOf[VcfFilter]) shouldBe (if (raw) bams else 0) pipeline.functions.count(_.isInstanceOf[HaplotypeCaller]) shouldBe (if (haplotypeCaller) 1 else 0) + (if (haplotypeCallerAllele) 1 else 0) + (if (haplotypeCallerGvcf) bams else 0) diff --git a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetCommandLineFunction.scala b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetCommandLineFunction.scala index 45dbe227c72cd10243b38ea9865ed01f4ad493bc..c0046d0535ccb5c429b2fa8b5707868c74f71f41 100644 --- a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetCommandLineFunction.scala +++ b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetCommandLineFunction.scala @@ -26,7 +26,6 @@ import org.ggf.drmaa.JobTemplate import scala.collection.mutable import scala.io.Source import scala.sys.process.{ Process, ProcessLogger } -import scala.util.matching.Regex import scala.collection.JavaConversions._ /** Biopet command line trait to auto check executable and cluster values */ @@ -50,20 +49,18 @@ trait BiopetCommandLineFunction extends CommandLineResources { biopetFunction => val writer = new PrintWriter(file) writer.println("set -eubf") writer.println("set -o pipefail") - lines.foreach(writer.println(_)) + lines.foreach(writer.println) writer.close() } // This overrides the default "sh" from queue. For Biopet the default is "bash" updateJobRun = { - case jt: JobTemplate => { + case jt: JobTemplate => changeScript(new File(jt.getArgs.head.toString)) jt.setRemoteCommand(remoteCommand) - } - case ps: ProcessSettings => { + case ps: ProcessSettings => changeScript(new File(ps.getCommand.tail.head)) ps.setCommand(Array(remoteCommand) ++ ps.getCommand.tail) - } } /** @@ -91,50 +88,19 @@ trait BiopetCommandLineFunction extends CommandLineResources { biopetFunction => } - /** can override this value is executable may not be converted to CanonicalPath */ + /** + * Can override this value is executable may not be converted to CanonicalPath + * @deprecated + */ val executableToCanonicalPath = true /** * Checks executable. Follow full CanonicalPath, checks if it is existing and do a md5sum on it to store in job report */ protected[core] def preProcessExecutable() { - if (!BiopetCommandLineFunction.executableMd5Cache.contains(executable)) { - if (executable != null) { - if (!BiopetCommandLineFunction.executableCache.contains(executable)) { - try { - val oldExecutable = executable - val buffer = new StringBuffer() - val cmd = Seq("which", executable) - val process = Process(cmd).run(ProcessLogger(buffer.append(_))) - if (process.exitValue == 0) { - executable = buffer.toString - val file = new File(executable) - if (executableToCanonicalPath) executable = file.getCanonicalPath - else executable = file.getAbsolutePath - } else Logging.addError("executable: '" + executable + "' not found, please check config") - BiopetCommandLineFunction.executableCache += oldExecutable -> executable - BiopetCommandLineFunction.executableCache += executable -> executable - } catch { - case ioe: java.io.IOException => - logger.warn(s"Could not use 'which' on '$executable', check on executable skipped: " + ioe) - } - } else executable = BiopetCommandLineFunction.executableCache(executable) - - if (!BiopetCommandLineFunction.executableMd5Cache.contains(executable)) { - if (new File(executable).exists()) { - val is = new FileInputStream(executable) - val cnt = is.available - val bytes = Array.ofDim[Byte](cnt) - is.read(bytes) - is.close() - val temp = MessageDigest.getInstance("MD5").digest(bytes).map("%02X".format(_)).mkString.toLowerCase - BiopetCommandLineFunction.executableMd5Cache += executable -> temp - } else BiopetCommandLineFunction.executableMd5Cache += executable -> "file_does_not_exist" - } - } - } - val md5 = BiopetCommandLineFunction.executableMd5Cache.get(executable) - addJobReportBinding("md5sum_exe", md5.getOrElse("None")) + val exe = BiopetCommandLineFunction.preProcessExecutable(executable) + exe.path.foreach(executable = _) + addJobReportBinding("md5sum_exe", exe.md5.getOrElse("N/A")) } /** executes checkExecutable method and fill job report */ @@ -166,10 +132,9 @@ trait BiopetCommandLineFunction extends CommandLineResources { biopetFunction => that.beforeGraph() that.internalBeforeGraph() this match { - case p: BiopetPipe => { + case p: BiopetPipe => p.commands.last._outputAsStdout = true new BiopetPipe(p.commands ::: that :: Nil) - } case _ => new BiopetPipe(List(this, that)) } } @@ -230,7 +195,48 @@ trait BiopetCommandLineFunction extends CommandLineResources { biopetFunction => } /** stores global caches */ -object BiopetCommandLineFunction { +object BiopetCommandLineFunction extends Logging { private[core] val executableMd5Cache: mutable.Map[String, String] = mutable.Map() private[core] val executableCache: mutable.Map[String, String] = mutable.Map() + + case class Executable(path: Option[String], md5: Option[String]) + def preProcessExecutable(executable: String): Executable = { + if (!BiopetCommandLineFunction.executableMd5Cache.contains(executable)) { + if (executable != null) { + if (!BiopetCommandLineFunction.executableCache.contains(executable)) { + try { + val buffer = new StringBuffer() + val cmd = Seq("which", executable) + val process = Process(cmd).run(ProcessLogger(buffer.append(_))) + if (process.exitValue == 0) { + val file = new File(buffer.toString) + BiopetCommandLineFunction.executableCache += executable -> file.getAbsolutePath + } else { + Logging.addError("executable: '" + executable + "' not found, please check config") + BiopetCommandLineFunction.executableCache += executable -> executable + } + } catch { + case ioe: java.io.IOException => + logger.warn(s"Could not use 'which' on '$executable', check on executable skipped: " + ioe) + BiopetCommandLineFunction.executableCache += executable -> executable + } + } + + if (!BiopetCommandLineFunction.executableMd5Cache.contains(executable)) { + val newExe = BiopetCommandLineFunction.executableCache(executable) + if (new File(newExe).exists()) { + val is = new FileInputStream(newExe) + val cnt = is.available + val bytes = Array.ofDim[Byte](cnt) + is.read(bytes) + is.close() + val temp = MessageDigest.getInstance("MD5").digest(bytes).map("%02X".format(_)).mkString.toLowerCase + BiopetCommandLineFunction.executableMd5Cache += newExe -> temp + } else BiopetCommandLineFunction.executableMd5Cache += newExe -> "file_does_not_exist" + } + } + } + Executable(BiopetCommandLineFunction.executableCache.get(executable), + BiopetCommandLineFunction.executableMd5Cache.get(executable)) + } } diff --git a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetJavaCommandLineFunction.scala b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetJavaCommandLineFunction.scala index 86e97f4c7dff893869eb8d8c7eec94c4389b9146..5722871028d92aeddd2723c22971a2e08e51107a 100644 --- a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetJavaCommandLineFunction.scala +++ b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetJavaCommandLineFunction.scala @@ -21,9 +21,9 @@ import org.broadinstitute.gatk.queue.function.JavaCommandLineFunction trait BiopetJavaCommandLineFunction extends JavaCommandLineFunction with BiopetCommandLineFunction { executable = config("java", default = "java", submodule = "java", freeVar = false) - javaGCThreads = config("java_gc_threads") - javaGCHeapFreeLimit = config("java_gc_heap_freelimit") - javaGCTimeLimit = config("java_gc_timelimit") + javaGCThreads = config("java_gc_threads", default = 4) + javaGCHeapFreeLimit = config("java_gc_heap_freelimit", default = 10) + javaGCTimeLimit = config("java_gc_timelimit", default = 50) override def defaultVmemFactor: Double = 2.0 @@ -38,8 +38,6 @@ trait BiopetJavaCommandLineFunction extends JavaCommandLineFunction with BiopetC if (javaMainClass != null && javaClasspath.isEmpty) javaClasspath = JavaCommandLineFunction.currentClasspath - - //threads = getThreads(defaultThreads) } /** Creates command to execute extension */ diff --git a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/CommandLineResources.scala b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/CommandLineResources.scala index 0fdc946e31808db3b41eb7e81bd5377c3ec563f3..1d9d6396235164d611f10291657cff7ffe72be60 100644 --- a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/CommandLineResources.scala +++ b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/CommandLineResources.scala @@ -28,7 +28,7 @@ trait CommandLineResources extends CommandLineFunction with Configurable { var residentFactor: Double = config("resident_factor", default = 1.2) private var _coreMemory: Double = 2.0 - def coreMemeory = _coreMemory + def coreMemory = _coreMemory var retry = 0 @@ -91,7 +91,7 @@ trait CommandLineResources extends CommandLineFunction with Configurable { commands.foreach(_.setResources()) nCoresRequest = Some(commands.map(_.threads).sum + threadsCorrection) - _coreMemory = commands.map(cmd => cmd.coreMemeory * (cmd.threads.toDouble / threads.toDouble)).sum + _coreMemory = commands.map(cmd => cmd.coreMemory * (cmd.threads.toDouble / threads.toDouble)).sum memoryLimit = Some(_coreMemory * threads) residentLimit = Some((_coreMemory + (0.5 * retry)) * residentFactor) vmem = Some((_coreMemory * (vmemFactor + (0.5 * retry))) + "G") diff --git a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/Version.scala b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/Version.scala index d283729b2c5a853461d7d75a56eb8d5cb8125c0f..91d813acb3a2c633d65a7c6d8f403c05d2ea2332 100644 --- a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/Version.scala +++ b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/Version.scala @@ -54,8 +54,6 @@ object Version extends Logging { if (versionCache.contains(versionCommand)) return versionCache.get(versionCommand) else if (versionCommand == null || versionRegex == null) return None else { - val exe = new File(versionCommand.trim.split(" ")(0)) - if (!exe.exists()) return None val stdout = new StringBuffer() val stderr = new StringBuffer() def outputLog = "Version command: \n" + versionCommand + diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Tabix.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Tabix.scala index 4b9bf4a37779ed6f3390c6890d43136809c6cb6f..daaaf73d96c7d190c61bbdbc45331115eb388aed 100644 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Tabix.scala +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Tabix.scala @@ -35,10 +35,9 @@ class Tabix(val root: Configurable) extends BiopetCommandLineFunction with Versi @Output(doc = "Output (for region query)", required = false) var outputQuery: File = null - @Output(doc = "Output (for indexing)", required = false) // NOTE: it's a def since we can't change the index name ~ it's always input_name + .tbi - lazy val outputIndex: File = { - require(input != null, "Input must be defined") - new File(input.toString + ".tbi") + def outputIndex: File = { + require(input != null, "Input should be defined") + new File(input.getAbsolutePath + ".tbi") } @Argument(doc = "Regions to query", required = false) @@ -70,7 +69,8 @@ class Tabix(val root: Configurable) extends BiopetCommandLineFunction with Versi p match { case Some(fmt) => require(validFormats.contains(fmt), "-p flag must be one of " + validFormats.mkString(", ")) - case None => ; + outputFiles :+= outputIndex + case None => } } @@ -96,3 +96,19 @@ class Tabix(val root: Configurable) extends BiopetCommandLineFunction with Versi else baseCommand } } + +object Tabix { + def apply(root: Configurable, input: File) = { + val tabix = new Tabix(root) + tabix.input = input + tabix.p = tabix.input.getName match { + case s if s.endsWith(".vcf.gz") => Some("vcf") + case s if s.endsWith(".bed.gz") => Some("bed") + case s if s.endsWith(".sam.gz") => Some("sam") + case s if s.endsWith(".gff.gz") => Some("gff") + case s if s.endsWith(".psltbl.gz") => Some("psltbl") + case _ => throw new IllegalArgumentException("Unknown file type") + } + tabix + } +} diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/WigToBigWig.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/WigToBigWig.scala index eaef86a39e2d02e22b49cb2a0a9a12073b841934..b70865176acc39ad3946c6acbf13dda83599b53c 100644 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/WigToBigWig.scala +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/WigToBigWig.scala @@ -29,6 +29,8 @@ class WigToBigWig(val root: Configurable) extends BiopetCommandLineFunction { @Input(doc = "Input wig file") var inputWigFile: File = _ + override def defaultCoreMemory = 3.0 + @Input(doc = "Input chrom sizes file", required = true) var inputChromSizesFile: File = _ diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bcftools/BcftoolsCall.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bcftools/BcftoolsCall.scala index 537cd377daa4c3f731541717e5378159a9f4cdc7..329ea4a656cd6a45d995109d0d99f7b0b3154f05 100644 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bcftools/BcftoolsCall.scala +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bcftools/BcftoolsCall.scala @@ -22,28 +22,66 @@ import org.broadinstitute.gatk.utils.commandline.{ Input, Output } /** This extension is based on bcftools 1.1-134 */ class BcftoolsCall(val root: Configurable) extends Bcftools { - @Input(doc = "Input File") + @Input(doc = "Input File", required = false) var input: File = _ - @Output(doc = "output File") + @Output(doc = "output File", required = false) var output: File = _ - var O: String = null - var v: Boolean = config("v", default = true) + var O: Option[String] = None + var v: Boolean = config("v", default = false) var c: Boolean = config("c", default = false) var m: Boolean = config("m", default = false) + var r: Option[String] = config("r") + @Input(required = false) + var R: Option[File] = config("R") + var s: Option[String] = config("s") + @Input(required = false) + var S: Option[File] = config("S") + var t: Option[String] = config("t") + @Input(required = false) + var T: Option[File] = config("T") + var A: Boolean = config("A", default = false) + var f: List[String] = config("f", default = Nil) + var g: Option[Int] = config("g") + var i: Boolean = config("i", default = false) + var M: Boolean = config("M", default = false) + var V: Option[String] = config("V") + var C: Option[String] = config("C") + var n: Option[Float] = config("n") + var p: Option[Float] = config("p") + var P: Option[Float] = config("P") + var X: Boolean = config("X", default = false) + var Y: Boolean = config("Y", default = false) override def beforeGraph(): Unit = { require(c != m) } - def cmdBase = required(executable) + + def cmdLine = required(executable) + required("call") + optional("-O", O) + conditional(v, "-v") + conditional(c, "-c") + - conditional(m, "-m") - def cmdPipeInput = cmdBase + "-" - def cmdPipe = cmdBase + input - def cmdLine = cmdPipe + " > " + required(output) + conditional(m, "-m") + + optional("-r", r) + + optional("-R", R) + + optional("-s", s) + + optional("-S", S) + + optional("-t", t) + + optional("-T", T) + + conditional(A, "-A") + + repeat("-f", f) + + optional("-g", g) + + conditional(i, "-i") + + conditional(M, "-M") + + optional("-V", V) + + optional("-C", C) + + optional("-n", n) + + optional("-p", p) + + optional("-P", P) + + conditional(X, "-X") + + conditional(Y, "-Y") + + (if (outputAsStsout) "" else required("-o", output)) + + (if (inputAsStdin) "-" else required(input)) } diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bcftools/BcftoolsMerge.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bcftools/BcftoolsMerge.scala new file mode 100644 index 0000000000000000000000000000000000000000..3a6fb6978cfb63deae159ad620ef472a28cdd7c2 --- /dev/null +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bcftools/BcftoolsMerge.scala @@ -0,0 +1,49 @@ +package nl.lumc.sasc.biopet.extensions.bcftools + +import java.io.File + +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.utils.commandline.{ Output, Input } + +/** + * Created by sajvanderzeeuw on 16-10-15. + */ +class BcftoolsMerge(val root: Configurable) extends Bcftools { + @Input(doc = "Input File", required = true) + var input: List[File] = Nil + + @Output(doc = "output File", required = false) + var output: File = _ + + @Input(required = false) + var R: Option[File] = config("R") + + @Input(required = false) + var useheader: Option[File] = config("useheader") + + @Input(required = false) + var l: Option[File] = config("l") + + var forcesamples: Boolean = config("forcesamples", default = false) + var printheader: Boolean = config("printheader", default = false) + var f: List[String] = config("f", default = Nil) + var i: List[String] = config("i", default = Nil) + var m: Option[String] = config("m") + var O: Option[String] = config("O") + var r: List[String] = config("r", default = Nil) + + def cmdLine = required(executable) + + required("merge") + + (if (outputAsStsout) "" else required("-o", output)) + + conditional(forcesamples, "--force-samples") + + conditional(printheader, "--print-header") + + optional("--use-header", useheader) + + optional("-f", f) + + optional("-i", i) + + optional("-l", l) + + optional("-m", m) + + optional("-O", O) + + optional("-r", r) + + optional("-R", R) + + repeat(input) +} diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CombineVariants.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CombineVariants.scala index 144a91f21ae0479438fdb78f9432632d22584318..8291e667b67df0b03a10114f6386670f18d572e4 100644 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CombineVariants.scala +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CombineVariants.scala @@ -53,6 +53,8 @@ class CombineVariants(val root: Configurable) extends Gatk { case Some("UNIQUIFY") | Some("PRIORITIZE") | Some("UNSORTED") | Some("REQUIRE_UNIQUE") | None => case _ => throw new IllegalArgumentException("Wrong option for genotypeMergeOptions") } + deps :::= inputFiles.filter(_.getName.endsWith("vcf.gz")).map(x => new File(x.getAbsolutePath + ".tbi")) + deps = deps.distinct } override def cmdLine = super.cmdLine + diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/GenotypeConcordance.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/GenotypeConcordance.scala new file mode 100644 index 0000000000000000000000000000000000000000..519cbfad6a8db14cf4812fb0005dc7e0e0e6aa65 --- /dev/null +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/GenotypeConcordance.scala @@ -0,0 +1,94 @@ +/** + * Biopet is built on top of GATK Queue for building bioinformatic + * pipelines. It is mainly intended to support LUMC SHARK cluster which is running + * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) + * should also be able to execute Biopet tools and pipelines. + * + * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center + * + * Contact us at: sasc@lumc.nl + * + * A dual licensing mode is applied. The source code within this project that are + * not part of GATK Queue is freely available for non-commercial use under an AGPL + * license; For commercial users or users who do not want to follow the AGPL + * license, please contact us to obtain a separate license. + */ +package nl.lumc.sasc.biopet.extensions.gatk + +import java.io.File + +import nl.lumc.sasc.biopet.core.summary.Summarizable +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.utils.commandline.{ Input, Output } +import org.broadinstitute.gatk.utils.report.{ GATKReportTable, GATKReport } + +/** + * Extension for CombineVariants from GATK + * + * Created by pjvan_thof on 2/26/15. + */ +class GenotypeConcordance(val root: Configurable) extends Gatk with Summarizable { + val analysisType = "GenotypeConcordance" + + @Input(required = true) + var evalFile: File = null + + @Input(required = true) + var compFile: File = null + + @Output(required = true) + var outputFile: File = null + + var moltenize = true + + def summaryFiles = Map("output" -> outputFile) + + def summaryStats = { + val report = new GATKReport(outputFile) + val compProportions = report.getTable("GenotypeConcordance_CompProportions") + val counts = report.getTable("GenotypeConcordance_Counts") + val evalProportions = report.getTable("GenotypeConcordance_EvalProportions") + val genotypeSummary = report.getTable("GenotypeConcordance_Summary") + val siteSummary = report.getTable("SiteConcordance_Summary") + + val samples = for (i <- 0 until genotypeSummary.getNumRows) yield genotypeSummary.get(i, "Sample").toString + + def getMap(table: GATKReportTable, column: String) = samples.distinct.map(sample => sample -> { + (for (i <- 0 until table.getNumRows if table.get(i, "Sample") == sample) yield s"${table.get(i, "Eval_Genotype")}__${table.get(i, "Comp_Genotype")}" -> table.get(i, column)).toMap + }).toMap + + Map( + "compProportions" -> getMap(compProportions, "Proportion"), + "counts" -> getMap(counts, "Count"), + "evalProportions" -> getMap(evalProportions, "Proportion"), + "genotypeSummary" -> samples.distinct.map(sample => { + val i = samples.indexOf(sample) + sample -> Map( + "Non-Reference_Discrepancy" -> genotypeSummary.get(i, "Non-Reference_Discrepancy"), + "Non-Reference_Sensitivity" -> genotypeSummary.get(i, "Non-Reference_Sensitivity"), + "Overall_Genotype_Concordance" -> genotypeSummary.get(i, "Overall_Genotype_Concordance") + ) + }).toMap, + "siteSummary" -> Map( + "ALLELES_MATCH" -> siteSummary.get(0, "ALLELES_MATCH"), + "EVAL_SUPERSET_TRUTH" -> siteSummary.get(0, "EVAL_SUPERSET_TRUTH"), + "EVAL_SUBSET_TRUTH" -> siteSummary.get(0, "EVAL_SUBSET_TRUTH"), + "ALLELES_DO_NOT_MATCH" -> siteSummary.get(0, "ALLELES_DO_NOT_MATCH"), + "EVAL_ONLY" -> siteSummary.get(0, "EVAL_ONLY"), + "TRUTH_ONLY" -> siteSummary.get(0, "TRUTH_ONLY") + ) + ) + } + + override def beforeGraph(): Unit = { + super.beforeGraph() + deps :::= (evalFile :: compFile :: Nil).filter(_.getName.endsWith("vcf.gz")).map(x => new File(x.getAbsolutePath + ".tbi")) + deps = deps.distinct + } + + override def cmdLine = super.cmdLine + + required("--eval", evalFile) + + required("--comp", compFile) + + required("-o", outputFile) + + conditional(moltenize, "--moltenize") +} diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/samtools/SamtoolsMpileup.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/samtools/SamtoolsMpileup.scala index 449b49cf0fbc3e1620a804347ce3d61790210183..02ded651d249c432c585349bab230329ccd45dee 100644 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/samtools/SamtoolsMpileup.scala +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/samtools/SamtoolsMpileup.scala @@ -47,7 +47,7 @@ class SamtoolsMpileup(val root: Configurable) extends Samtools with Reference { reference = referenceFasta() } - def cmdBase = required(executable) + + def cmdLine = required(executable) + required("mpileup") + optional("-f", reference) + optional("-l", intervalBed) + @@ -56,12 +56,9 @@ class SamtoolsMpileup(val root: Configurable) extends Samtools with Reference { optional("-d", depth) + conditional(outputMappingQuality, "-s") + conditional(disableBaq, "-B") + - conditional(u, "-u") - def cmdPipeInput = cmdBase + "-" - def cmdPipe = cmdBase + repeat(input) - - /** Returns command to execute */ - def cmdLine = cmdPipe + " > " + required(output) + conditional(u, "-u") + + (if (outputAsStsout) "" else required("-o", output)) + + (if (inputAsStdin) "-" else repeat(input)) } object SamtoolsMpileup { diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/samtools/SamtoolsSort.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/samtools/SamtoolsSort.scala index 5df2a3a7b47f5771dcdbd033ef2c273ba70ed89f..1e145929642f9c0c29e1cd8f07b9eb0a8fcf3da6 100644 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/samtools/SamtoolsSort.scala +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/samtools/SamtoolsSort.scala @@ -27,7 +27,7 @@ class SamtoolsSort(val root: Configurable) extends Samtools { } def cmdLine = required(executable) + required("sort") + - optional("-m", (coreMemeory + "G")) + + optional("-m", (coreMemory + "G")) + optional("-@", threads) + optional("-O", outputFormat) + required("-T", prefix) + diff --git a/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/MpileupToVcf.scala b/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/MpileupToVcf.scala index 3d77acdd67979a849c4bd59defdbe13bbe59e790..b6f90cc6261b3677340c51c5d1197a50d901181e 100644 --- a/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/MpileupToVcf.scala +++ b/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/MpileupToVcf.scala @@ -47,12 +47,9 @@ class MpileupToVcf(val root: Configurable) extends ToolCommandFunction with Refe override def defaultCoreMemory = 3.0 - override def defaults = ConfigUtils.mergeMaps(Map("samtoolsmpileup" -> Map("disable_baq" -> true, "min_map_quality" -> 1)), - super.defaults) - override def beforeGraph() { super.beforeGraph() - reference = referenceFasta().getAbsolutePath + if (reference == null) reference = referenceFasta().getAbsolutePath val samtoolsMpileup = new SamtoolsMpileup(this) } @@ -66,20 +63,12 @@ class MpileupToVcf(val root: Configurable) extends ToolCommandFunction with Refe } } - override def cmdLine = { - (if (inputMpileup == null) { - val samtoolsMpileup = new SamtoolsMpileup(this) - samtoolsMpileup.reference = referenceFasta() - samtoolsMpileup.input = List(inputBam) - samtoolsMpileup.cmdPipe + " | " - } else "") + - super.cmdLine + - required("-o", output) + - optional("--minDP", minDP) + - optional("--minAP", minAP) + - optional("--homoFraction", homoFraction) + - optional("--ploidy", ploidy) + - required("--sample", sample) + - (if (inputBam == null) required("-I", inputMpileup) else "") - } + override def cmdLine = super.cmdLine + + required("-o", output) + + optional("--minDP", minDP) + + optional("--minAP", minAP) + + optional("--homoFraction", homoFraction) + + optional("--ploidy", ploidy) + + required("--sample", sample) + + (if (inputAsStdin) "" else required("-I", inputMpileup)) } diff --git a/public/biopet-tools/src/test/resources/VEP_oneline.vcf b/public/biopet-tools/src/test/resources/VEP_oneline.vcf index 6d575651ff43e7bc475e1b3c093521ed18dd4995..e5cd64bcb5ff107f53fb6c22fd1634be4c969e54 100644 --- a/public/biopet-tools/src/test/resources/VEP_oneline.vcf +++ b/public/biopet-tools/src/test/resources/VEP_oneline.vcf @@ -1,11 +1,5 @@ ##fileformat=VCFv4.1 ##reference=file:///data/DIV5/KG/references/gatk_bundle_2.5/hg19_nohap/ucsc.hg19_nohap.fasta -##UnifiedGenotyperCommandLine=<ID=ApplyRecalibration,Version=3.1-1-g07a4bf8,Date="Sat Jun 14 16:58:07 CEST 2014",Epoch=1402757887567,CommandLineOptions="analysis_type=ApplyRecalibration input_file=[] showFullBamList=false read_buffer_size=null phone_home=AWS gatk_key=null tag=NA read_filter=[] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_sequence=/data/DIV5/KG/references/gatk_bundle_2.5/hg19_nohap/ucsc.hg19_nohap.fasta nonDeterministicRandomSeed=false disableDithering=false maxRuntime=-1 maxRuntimeUnits=MINUTES downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=1000 baq=OFF baqGapOpenPenalty=40.0 fix_misencoded_quality_scores=false allow_potentially_misencoded_quality_scores=false useOriginalQualities=false defaultBaseQualities=-1 performanceLog=null BQSR=null quantize_quals=0 disable_indel_quals=false emit_original_quals=false preserve_qscores_less_than=6 globalQScorePrior=-1.0 validation_strictness=SILENT remove_program_records=false keep_program_records=false sample_rename_mapping_file=null unsafe=null disable_auto_index_creation_and_locking_when_reading_rods=false num_threads=1 num_cpu_threads_per_data_thread=1 num_io_threads=0 monitorThreadEfficiency=false num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false generateShadowBCF=false variant_index_type=DYNAMIC_SEEK variant_index_parameter=-1 logging_level=INFO log_to_file=null help=false version=false input=[(RodBinding name=input source=/data/DIV5/KG/kg_wes_mr/runs/trio_7006504_run_00/trio_7006504/phase2/Child_7006504.ug.chrom_merged.vcf)] recal_file=(RodBinding name=recal_file source=/data/DIV5/KG/kg_wes_mr/runs/trio_7006504_run_00/trio_7006504/phase2/recalibration_ug/Child_7006504.snp.recal) tranches_file=/data/DIV5/KG/kg_wes_mr/runs/trio_7006504_run_00/trio_7006504/phase2/recalibration_ug/Child_7006504.snp.tranches out=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub no_cmdline_in_header=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub bcf=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub ts_filter_level=99.0 lodCutoff=null ignore_filter=null excludeFiltered=false mode=SNP filter_reads_with_N_cigar=false filter_mismatching_base_and_quals=false filter_bases_not_stored=false"> -##UnifiedGenotyperCommandLine=<ID=ApplyRecalibration,Version=3.1-1-g07a4bf8,Date="Sat Jun 14 17:01:08 CEST 2014",Epoch=1402758068552,CommandLineOptions="analysis_type=ApplyRecalibration input_file=[] showFullBamList=false read_buffer_size=null phone_home=AWS gatk_key=null tag=NA read_filter=[] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_sequence=/data/DIV5/KG/references/gatk_bundle_2.5/hg19_nohap/ucsc.hg19_nohap.fasta nonDeterministicRandomSeed=false disableDithering=false maxRuntime=-1 maxRuntimeUnits=MINUTES downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=1000 baq=OFF baqGapOpenPenalty=40.0 fix_misencoded_quality_scores=false allow_potentially_misencoded_quality_scores=false useOriginalQualities=false defaultBaseQualities=-1 performanceLog=null BQSR=null quantize_quals=0 disable_indel_quals=false emit_original_quals=false preserve_qscores_less_than=6 globalQScorePrior=-1.0 validation_strictness=SILENT remove_program_records=false keep_program_records=false sample_rename_mapping_file=null unsafe=null disable_auto_index_creation_and_locking_when_reading_rods=false num_threads=1 num_cpu_threads_per_data_thread=1 num_io_threads=0 monitorThreadEfficiency=false num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false generateShadowBCF=false variant_index_type=DYNAMIC_SEEK variant_index_parameter=-1 logging_level=INFO log_to_file=null help=false version=false input=[(RodBinding name=input source=/data/DIV5/KG/kg_wes_mr/runs/trio_7006504_run_00/trio_7006504/phase2/recalibration_ug/Child_7006504.snp.recalibrated.vcf)] recal_file=(RodBinding name=recal_file source=/data/DIV5/KG/kg_wes_mr/runs/trio_7006504_run_00/trio_7006504/phase2/recalibration_ug/Child_7006504.indel.recal) tranches_file=/data/DIV5/KG/kg_wes_mr/runs/trio_7006504_run_00/trio_7006504/phase2/recalibration_ug/Child_7006504.indel.tranches out=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub no_cmdline_in_header=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub bcf=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub ts_filter_level=99.0 lodCutoff=null ignore_filter=null excludeFiltered=false mode=INDEL filter_reads_with_N_cigar=false filter_mismatching_base_and_quals=false filter_bases_not_stored=false"> -##UnifiedGenotyperCommandLine=<ID=UnifiedGenotyper,CommandLineOptions="analysis_type=UnifiedGenotyper input_file=[/data/DIV5/KG/kg_wes_mr/runs/trio_7006504_run_00/trio_7006504/phase2/bams/Child_7006504.ready.bam, /data/DIV5/KG/kg_wes_mr/runs/trio_7006504_run_00/trio_7006504/phase2/bams/Mother_7006508.ready.bam, /data/DIV5/KG/kg_wes_mr/runs/trio_7006504_run_00/trio_7006504/phase2/bams/Father_7006506.ready.bam] showFullBamList=false read_buffer_size=null phone_home=AWS gatk_key=null tag=NA read_filter=[] intervals=[chrM] excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_sequence=/data/DIV5/KG/references/gatk_bundle_2.5/hg19_nohap/ucsc.hg19_nohap.fasta nonDeterministicRandomSeed=false disableDithering=false maxRuntime=-1 maxRuntimeUnits=MINUTES downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=250 baq=OFF baqGapOpenPenalty=40.0 fix_misencoded_quality_scores=false allow_potentially_misencoded_quality_scores=false useOriginalQualities=false defaultBaseQualities=-1 performanceLog=null BQSR=null quantize_quals=0 disable_indel_quals=false emit_original_quals=false preserve_qscores_less_than=6 globalQScorePrior=-1.0 validation_strictness=SILENT remove_program_records=false keep_program_records=false sample_rename_mapping_file=null unsafe=null disable_auto_index_creation_and_locking_when_reading_rods=false num_threads=1 num_cpu_threads_per_data_thread=1 num_io_threads=0 monitorThreadEfficiency=false num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false generateShadowBCF=false variant_index_type=DYNAMIC_SEEK variant_index_parameter=-1 logging_level=INFO log_to_file=null help=false version=false genotype_likelihoods_model=BOTH pcr_error_rate=1.0E-4 computeSLOD=false annotateNDA=false pair_hmm_implementation=LOGLESS_CACHING min_base_quality_score=17 max_deletion_fraction=0.05 allSitePLs=false min_indel_count_for_genotyping=5 min_indel_fraction_per_sample=0.25 indelGapContinuationPenalty=10 indelGapOpenPenalty=45 indelHaplotypeSize=80 indelDebug=false ignoreSNPAlleles=false allReadsSP=false ignoreLaneInfo=false reference_sample_calls=(RodBinding name= source=UNBOUND) reference_sample_name=null sample_ploidy=2 min_quality_score=1 max_quality_score=40 site_quality_prior=20 min_power_threshold_for_calling=0.95 min_reference_depth=100 exclude_filtered_reference_sites=false output_mode=EMIT_VARIANTS_ONLY heterozygosity=0.001 indel_heterozygosity=1.25E-4 genotyping_mode=DISCOVERY standard_min_confidence_threshold_for_calling=20.0 standard_min_confidence_threshold_for_emitting=20.0 alleles=(RodBinding name= source=UNBOUND) max_alternate_alleles=6 input_prior=[] contamination_fraction_to_filter=0.0 contamination_fraction_per_sample_file=null p_nonref_model=EXACT_INDEPENDENT exactcallslog=null dbsnp=(RodBinding name=dbsnp source=/data/DIV5/KG/references/gatk_bundle_2.5/hg19_nohap/dbsnp_137.hg19_nohap.vcf) comp=[] out=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub no_cmdline_in_header=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub bcf=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub onlyEmitSamples=[] debug_file=null metrics_file=null annotation=[] excludeAnnotation=[] filter_reads_with_N_cigar=false filter_mismatching_base_and_quals=false filter_bases_not_stored=false",Date="Sat Jun 14 15:20:24 CEST 2014",Epoch=1402752024377,Version=3.1-1-g07a4bf8> -##HaplotypeCallerCommandLine=<ID=ApplyRecalibration,Version=3.1-1-g07a4bf8,Date="Sat Jun 14 22:28:02 CEST 2014",Epoch=1402777682364,CommandLineOptions="analysis_type=ApplyRecalibration input_file=[] showFullBamList=false read_buffer_size=null phone_home=AWS gatk_key=null tag=NA read_filter=[] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_sequence=/data/DIV5/KG/references/gatk_bundle_2.5/hg19_nohap/ucsc.hg19_nohap.fasta nonDeterministicRandomSeed=false disableDithering=false maxRuntime=-1 maxRuntimeUnits=MINUTES downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=1000 baq=OFF baqGapOpenPenalty=40.0 fix_misencoded_quality_scores=false allow_potentially_misencoded_quality_scores=false useOriginalQualities=false defaultBaseQualities=-1 performanceLog=null BQSR=null quantize_quals=0 disable_indel_quals=false emit_original_quals=false preserve_qscores_less_than=6 globalQScorePrior=-1.0 validation_strictness=SILENT remove_program_records=false keep_program_records=false sample_rename_mapping_file=null unsafe=null disable_auto_index_creation_and_locking_when_reading_rods=false num_threads=1 num_cpu_threads_per_data_thread=1 num_io_threads=0 monitorThreadEfficiency=false num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false generateShadowBCF=false variant_index_type=DYNAMIC_SEEK variant_index_parameter=-1 logging_level=INFO log_to_file=null help=false version=false input=[(RodBinding name=input source=/data/DIV5/KG/kg_wes_mr/runs/trio_7006504_run_00/trio_7006504/phase2/Child_7006504.hc.chrom_merged.vcf)] recal_file=(RodBinding name=recal_file source=/data/DIV5/KG/kg_wes_mr/runs/trio_7006504_run_00/trio_7006504/phase2/recalibration_hc/Child_7006504.snp.recal) tranches_file=/data/DIV5/KG/kg_wes_mr/runs/trio_7006504_run_00/trio_7006504/phase2/recalibration_hc/Child_7006504.snp.tranches out=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub no_cmdline_in_header=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub bcf=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub ts_filter_level=99.0 lodCutoff=null ignore_filter=null excludeFiltered=false mode=SNP filter_reads_with_N_cigar=false filter_mismatching_base_and_quals=false filter_bases_not_stored=false"> -##HaplotypeCallerCommandLine=<ID=ApplyRecalibration,Version=3.1-1-g07a4bf8,Date="Sat Jun 14 22:31:13 CEST 2014",Epoch=1402777873043,CommandLineOptions="analysis_type=ApplyRecalibration input_file=[] showFullBamList=false read_buffer_size=null phone_home=AWS gatk_key=null tag=NA read_filter=[] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_sequence=/data/DIV5/KG/references/gatk_bundle_2.5/hg19_nohap/ucsc.hg19_nohap.fasta nonDeterministicRandomSeed=false disableDithering=false maxRuntime=-1 maxRuntimeUnits=MINUTES downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=1000 baq=OFF baqGapOpenPenalty=40.0 fix_misencoded_quality_scores=false allow_potentially_misencoded_quality_scores=false useOriginalQualities=false defaultBaseQualities=-1 performanceLog=null BQSR=null quantize_quals=0 disable_indel_quals=false emit_original_quals=false preserve_qscores_less_than=6 globalQScorePrior=-1.0 validation_strictness=SILENT remove_program_records=false keep_program_records=false sample_rename_mapping_file=null unsafe=null disable_auto_index_creation_and_locking_when_reading_rods=false num_threads=1 num_cpu_threads_per_data_thread=1 num_io_threads=0 monitorThreadEfficiency=false num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false generateShadowBCF=false variant_index_type=DYNAMIC_SEEK variant_index_parameter=-1 logging_level=INFO log_to_file=null help=false version=false input=[(RodBinding name=input source=/data/DIV5/KG/kg_wes_mr/runs/trio_7006504_run_00/trio_7006504/phase2/recalibration_hc/Child_7006504.snp.recalibrated.vcf)] recal_file=(RodBinding name=recal_file source=/data/DIV5/KG/kg_wes_mr/runs/trio_7006504_run_00/trio_7006504/phase2/recalibration_hc/Child_7006504.indel.recal) tranches_file=/data/DIV5/KG/kg_wes_mr/runs/trio_7006504_run_00/trio_7006504/phase2/recalibration_hc/Child_7006504.indel.tranches out=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub no_cmdline_in_header=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub bcf=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub ts_filter_level=99.0 lodCutoff=null ignore_filter=null excludeFiltered=false mode=INDEL filter_reads_with_N_cigar=false filter_mismatching_base_and_quals=false filter_bases_not_stored=false"> -##HaplotypeCallerCommandLine=<ID=HaplotypeCaller,CommandLineOptions="analysis_type=HaplotypeCaller input_file=[/data/DIV5/KG/kg_wes_mr/runs/trio_7006504_run_00/trio_7006504/phase2/bams/Child_7006504.ready.bam, /data/DIV5/KG/kg_wes_mr/runs/trio_7006504_run_00/trio_7006504/phase2/bams/Mother_7006508.ready.bam, /data/DIV5/KG/kg_wes_mr/runs/trio_7006504_run_00/trio_7006504/phase2/bams/Father_7006506.ready.bam] showFullBamList=false read_buffer_size=null phone_home=AWS gatk_key=null tag=NA read_filter=[] intervals=[chrM] excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_sequence=/data/DIV5/KG/references/gatk_bundle_2.5/hg19_nohap/ucsc.hg19_nohap.fasta nonDeterministicRandomSeed=false disableDithering=false maxRuntime=-1 maxRuntimeUnits=MINUTES downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=250 baq=OFF baqGapOpenPenalty=40.0 fix_misencoded_quality_scores=false allow_potentially_misencoded_quality_scores=false useOriginalQualities=false defaultBaseQualities=-1 performanceLog=null BQSR=null quantize_quals=0 disable_indel_quals=false emit_original_quals=false preserve_qscores_less_than=6 globalQScorePrior=-1.0 validation_strictness=SILENT remove_program_records=false keep_program_records=false sample_rename_mapping_file=null unsafe=null disable_auto_index_creation_and_locking_when_reading_rods=false num_threads=1 num_cpu_threads_per_data_thread=1 num_io_threads=0 monitorThreadEfficiency=false num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false generateShadowBCF=false variant_index_type=DYNAMIC_SEEK variant_index_parameter=-1 logging_level=INFO log_to_file=null help=false version=false out=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub no_cmdline_in_header=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub bcf=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub likelihoodCalculationEngine=PairHMM heterogeneousKmerSizeResolution=COMBO_MIN graphOutput=null bamOutput=null bam_compression=null disable_bam_indexing=null generate_md5=null simplifyBAM=null bamWriterType=CALLED_HAPLOTYPES dbsnp=(RodBinding name=dbsnp source=/data/DIV5/KG/references/gatk_bundle_2.5/hg19_nohap/dbsnp_137.hg19_nohap.vcf) dontTrimActiveRegions=false maxDiscARExtension=25 maxGGAARExtension=300 paddingAroundIndels=150 paddingAroundSNPs=20 comp=[] annotation=[ClippingRankSumTest, DepthPerSampleHC] excludeAnnotation=[SpanningDeletions, TandemRepeatAnnotator] heterozygosity=0.001 indel_heterozygosity=1.25E-4 genotyping_mode=DISCOVERY standard_min_confidence_threshold_for_calling=20.0 standard_min_confidence_threshold_for_emitting=20.0 alleles=(RodBinding name= source=UNBOUND) max_alternate_alleles=6 input_prior=[] contamination_fraction_to_filter=0.0 contamination_fraction_per_sample_file=null p_nonref_model=EXACT_INDEPENDENT exactcallslog=null kmerSize=[10, 25] dontIncreaseKmerSizesForCycles=false numPruningSamples=1 recoverDanglingHeads=false dontRecoverDanglingTails=false consensus=false emitRefConfidence=NONE GVCFGQBands=[5, 20, 60] indelSizeToEliminateInRefModel=10 min_base_quality_score=10 minPruning=2 gcpHMM=10 includeUmappedReads=false useAllelesTrigger=false useFilteredReadsForAnnotations=false phredScaledGlobalReadMismappingRate=45 maxNumHaplotypesInPopulation=128 mergeVariantsViaLD=false pair_hmm_implementation=LOGLESS_CACHING keepRG=null justDetermineActiveRegions=false dontGenotype=false errorCorrectKmers=false debug=false debugGraphTransformations=false dontUseSoftClippedBases=false captureAssemblyFailureBAM=false allowCyclesInKmerGraphToGeneratePaths=false noFpga=false errorCorrectReads=false kmerLengthForReadErrorCorrection=25 minObservationsForKmerToBeSolid=20 pcr_indel_model=CONSERVATIVE activityProfileOut=null activeRegionOut=null activeRegionIn=null activeRegionExtension=null forceActive=false activeRegionMaxSize=null bandPassSigma=null min_mapping_quality_score=20 filter_reads_with_N_cigar=false filter_mismatching_base_and_quals=false filter_bases_not_stored=false",Date="Sat Jun 14 15:26:18 CEST 2014",Epoch=1402752378803,Version=3.1-1-g07a4bf8> ##INFO=<ID=DN,Number=1,Type=Integer,Description="inDbSNP"> ##INFO=<ID=DT,Number=0,Type=Flag,Description="in1000Genomes"> ##INFO=<ID=DA,Number=1,Type=String,Description="allelesDBSNP"> @@ -164,5 +158,5 @@ ##contig=<ID=chrUn_gl000248,length=39786> ##contig=<ID=chrUn_gl000249,length=38502> ##INFO=<ID=CSQ,Number=.,Type=String,Description="Consequence type as predicted by VEP. Format: Allele|Gene|Feature|Feature_type|Consequence|cDNA_position|CDS_position|Protein_position|Amino_acids|Codons|Existing_variation|AA_MAF|EA_MAF|ALLELE_NUM|DISTANCE|STRAND|CLIN_SIG|SYMBOL|SYMBOL_SOURCE|GMAF|HGVSc|HGVSp|AFR_MAF|AMR_MAF|ASN_MAF|EUR_MAF|PUBMED"> -#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT Child_7006504 Father_7006506 Mother_7006508 +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT Sample_101 Sample_102 Sample_103 chr1 871042 rs199537431 C CA 1541.12 PASS FG=intron;FD=unknown;GM=NM_152486.2;GL=SAMD11;CP=0.000;CG=-1.630;CN=2294,3274,30362,112930;DSP=107;AC=2;AF=0.333;AN=6;BaseQRankSum=4.068;DB;DP=124;FS=1.322;MLEAC=2;MLEAF=0.333;MQ=60.0;MQ0=0;MQRankSum=-0.197;QD=19.03;RPA=1,2;RU=A;ReadPosRankSum=-0.424;STR;VQSLOD=0.079;culprit=FS;GATKCaller=UG,HC;CSQ=A|ENSESTG00000013623|ENSESTT00000034081|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||||A:0.0078|ENSESTT00000034081.1:c.306-110_306-109insA||||||,A|CCDS2.2|CCDS2.2|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||||A:0.0078|CCDS2.2:c.306-110_306-109insA||||||,A|ENSESTG00000013623|ENSESTT00000034116|Transcript|upstream_gene_variant||||||rs199537431|||1|3610|1||||A:0.0078|||||||,A|ENSESTG00000013623|ENSESTT00000034091|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||||A:0.0078|ENSESTT00000034091.1:c.306-110_306-109insA||||||,A|ENSESTG00000013623|ENSESTT00000034102|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||||A:0.0078|ENSESTT00000034102.1:c.29-110_29-109insA||||||,A|148398|XM_005244723.1|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||SAMD11||A:0.0078|XM_005244723.1:c.306-110_306-109insA||||||,A|148398|XM_005244724.1|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||SAMD11||A:0.0078|XM_005244724.1:c.306-110_306-109insA||||||,A|148398|XM_005244725.1|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||SAMD11||A:0.0078|XM_005244725.1:c.306-110_306-109insA||||||,A|148398|NM_152486.2|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||SAMD11||A:0.0078|NM_152486.2:c.306-110_306-109insA||||||,A|148398|XM_005244727.1|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||SAMD11||A:0.0078|XM_005244727.1:c.306-110_306-109insA||||||,A|148398|XM_005244726.1|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||SAMD11||A:0.0078|XM_005244726.1:c.306-110_306-109insA|||||| GT:AD:DP:GQ:PL 0/1:24,21:45:99:838,0,889 0/1:17,19:36:99:744,0,603 0/0:42,0:43:99:0,126,1717 diff --git a/public/biopet-tools/src/test/resources/VEP_oneline.vcf.gz b/public/biopet-tools/src/test/resources/VEP_oneline.vcf.gz index 3dccfacc66deb4dfc034ce927addceeb9a716535..8f0e4cb3292cbb6ac9918f7f13633e21b9f2ed1a 100644 Binary files a/public/biopet-tools/src/test/resources/VEP_oneline.vcf.gz and b/public/biopet-tools/src/test/resources/VEP_oneline.vcf.gz differ diff --git a/public/biopet-tools/src/test/resources/VEP_oneline.vcf.gz.tbi b/public/biopet-tools/src/test/resources/VEP_oneline.vcf.gz.tbi index 1e25b0dccdd45daf908188094e8a998f875094df..1d1bc2ce9351b5f9d267a75fd6f7df3fc34326df 100644 Binary files a/public/biopet-tools/src/test/resources/VEP_oneline.vcf.gz.tbi and b/public/biopet-tools/src/test/resources/VEP_oneline.vcf.gz.tbi differ diff --git a/public/biopet-tools/src/test/resources/chrQ.vcf b/public/biopet-tools/src/test/resources/chrQ.vcf index 4b97e48678c5dddab38dd2c7d56cd1d0c3d298f9..2ed17f44b1d3f9bbbb731c70c52e7d4388a64bc9 100644 --- a/public/biopet-tools/src/test/resources/chrQ.vcf +++ b/public/biopet-tools/src/test/resources/chrQ.vcf @@ -1,11 +1,5 @@ ##fileformat=VCFv4.1 ##reference=file:///data/DIV5/KG/references/gatk_bundle_2.5/hg19_nohap/ucsc.hg19_nohap.fasta -##UnifiedGenotyperCommandLine=<ID=ApplyRecalibration,Version=3.1-1-g07a4bf8,Date="Sat Jun 14 16:58:07 CEST 2014",Epoch=1402757887567,CommandLineOptions="analysis_type=ApplyRecalibration input_file=[] showFullBamList=false read_buffer_size=null phone_home=AWS gatk_key=null tag=NA read_filter=[] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_sequence=/data/DIV5/KG/references/gatk_bundle_2.5/hg19_nohap/ucsc.hg19_nohap.fasta nonDeterministicRandomSeed=false disableDithering=false maxRuntime=-1 maxRuntimeUnits=MINUTES downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=1000 baq=OFF baqGapOpenPenalty=40.0 fix_misencoded_quality_scores=false allow_potentially_misencoded_quality_scores=false useOriginalQualities=false defaultBaseQualities=-1 performanceLog=null BQSR=null quantize_quals=0 disable_indel_quals=false emit_original_quals=false preserve_qscores_less_than=6 globalQScorePrior=-1.0 validation_strictness=SILENT remove_program_records=false keep_program_records=false sample_rename_mapping_file=null unsafe=null disable_auto_index_creation_and_locking_when_reading_rods=false num_threads=1 num_cpu_threads_per_data_thread=1 num_io_threads=0 monitorThreadEfficiency=false num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false generateShadowBCF=false variant_index_type=DYNAMIC_SEEK variant_index_parameter=-1 logging_level=INFO log_to_file=null help=false version=false input=[(RodBinding name=input source=/data/DIV5/KG/kg_wes_mr/runs/trio_7006504_run_00/trio_7006504/phase2/Child_7006504.ug.chrom_merged.vcf)] recal_file=(RodBinding name=recal_file source=/data/DIV5/KG/kg_wes_mr/runs/trio_7006504_run_00/trio_7006504/phase2/recalibration_ug/Child_7006504.snp.recal) tranches_file=/data/DIV5/KG/kg_wes_mr/runs/trio_7006504_run_00/trio_7006504/phase2/recalibration_ug/Child_7006504.snp.tranches out=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub no_cmdline_in_header=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub bcf=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub ts_filter_level=99.0 lodCutoff=null ignore_filter=null excludeFiltered=false mode=SNP filter_reads_with_N_cigar=false filter_mismatching_base_and_quals=false filter_bases_not_stored=false"> -##UnifiedGenotyperCommandLine=<ID=ApplyRecalibration,Version=3.1-1-g07a4bf8,Date="Sat Jun 14 17:01:08 CEST 2014",Epoch=1402758068552,CommandLineOptions="analysis_type=ApplyRecalibration input_file=[] showFullBamList=false read_buffer_size=null phone_home=AWS gatk_key=null tag=NA read_filter=[] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_sequence=/data/DIV5/KG/references/gatk_bundle_2.5/hg19_nohap/ucsc.hg19_nohap.fasta nonDeterministicRandomSeed=false disableDithering=false maxRuntime=-1 maxRuntimeUnits=MINUTES downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=1000 baq=OFF baqGapOpenPenalty=40.0 fix_misencoded_quality_scores=false allow_potentially_misencoded_quality_scores=false useOriginalQualities=false defaultBaseQualities=-1 performanceLog=null BQSR=null quantize_quals=0 disable_indel_quals=false emit_original_quals=false preserve_qscores_less_than=6 globalQScorePrior=-1.0 validation_strictness=SILENT remove_program_records=false keep_program_records=false sample_rename_mapping_file=null unsafe=null disable_auto_index_creation_and_locking_when_reading_rods=false num_threads=1 num_cpu_threads_per_data_thread=1 num_io_threads=0 monitorThreadEfficiency=false num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false generateShadowBCF=false variant_index_type=DYNAMIC_SEEK variant_index_parameter=-1 logging_level=INFO log_to_file=null help=false version=false input=[(RodBinding name=input source=/data/DIV5/KG/kg_wes_mr/runs/trio_7006504_run_00/trio_7006504/phase2/recalibration_ug/Child_7006504.snp.recalibrated.vcf)] recal_file=(RodBinding name=recal_file source=/data/DIV5/KG/kg_wes_mr/runs/trio_7006504_run_00/trio_7006504/phase2/recalibration_ug/Child_7006504.indel.recal) tranches_file=/data/DIV5/KG/kg_wes_mr/runs/trio_7006504_run_00/trio_7006504/phase2/recalibration_ug/Child_7006504.indel.tranches out=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub no_cmdline_in_header=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub bcf=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub ts_filter_level=99.0 lodCutoff=null ignore_filter=null excludeFiltered=false mode=INDEL filter_reads_with_N_cigar=false filter_mismatching_base_and_quals=false filter_bases_not_stored=false"> -##UnifiedGenotyperCommandLine=<ID=UnifiedGenotyper,CommandLineOptions="analysis_type=UnifiedGenotyper input_file=[/data/DIV5/KG/kg_wes_mr/runs/trio_7006504_run_00/trio_7006504/phase2/bams/Child_7006504.ready.bam, /data/DIV5/KG/kg_wes_mr/runs/trio_7006504_run_00/trio_7006504/phase2/bams/Mother_7006508.ready.bam, /data/DIV5/KG/kg_wes_mr/runs/trio_7006504_run_00/trio_7006504/phase2/bams/Father_7006506.ready.bam] showFullBamList=false read_buffer_size=null phone_home=AWS gatk_key=null tag=NA read_filter=[] intervals=[chrM] excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_sequence=/data/DIV5/KG/references/gatk_bundle_2.5/hg19_nohap/ucsc.hg19_nohap.fasta nonDeterministicRandomSeed=false disableDithering=false maxRuntime=-1 maxRuntimeUnits=MINUTES downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=250 baq=OFF baqGapOpenPenalty=40.0 fix_misencoded_quality_scores=false allow_potentially_misencoded_quality_scores=false useOriginalQualities=false defaultBaseQualities=-1 performanceLog=null BQSR=null quantize_quals=0 disable_indel_quals=false emit_original_quals=false preserve_qscores_less_than=6 globalQScorePrior=-1.0 validation_strictness=SILENT remove_program_records=false keep_program_records=false sample_rename_mapping_file=null unsafe=null disable_auto_index_creation_and_locking_when_reading_rods=false num_threads=1 num_cpu_threads_per_data_thread=1 num_io_threads=0 monitorThreadEfficiency=false num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false generateShadowBCF=false variant_index_type=DYNAMIC_SEEK variant_index_parameter=-1 logging_level=INFO log_to_file=null help=false version=false genotype_likelihoods_model=BOTH pcr_error_rate=1.0E-4 computeSLOD=false annotateNDA=false pair_hmm_implementation=LOGLESS_CACHING min_base_quality_score=17 max_deletion_fraction=0.05 allSitePLs=false min_indel_count_for_genotyping=5 min_indel_fraction_per_sample=0.25 indelGapContinuationPenalty=10 indelGapOpenPenalty=45 indelHaplotypeSize=80 indelDebug=false ignoreSNPAlleles=false allReadsSP=false ignoreLaneInfo=false reference_sample_calls=(RodBinding name= source=UNBOUND) reference_sample_name=null sample_ploidy=2 min_quality_score=1 max_quality_score=40 site_quality_prior=20 min_power_threshold_for_calling=0.95 min_reference_depth=100 exclude_filtered_reference_sites=false output_mode=EMIT_VARIANTS_ONLY heterozygosity=0.001 indel_heterozygosity=1.25E-4 genotyping_mode=DISCOVERY standard_min_confidence_threshold_for_calling=20.0 standard_min_confidence_threshold_for_emitting=20.0 alleles=(RodBinding name= source=UNBOUND) max_alternate_alleles=6 input_prior=[] contamination_fraction_to_filter=0.0 contamination_fraction_per_sample_file=null p_nonref_model=EXACT_INDEPENDENT exactcallslog=null dbsnp=(RodBinding name=dbsnp source=/data/DIV5/KG/references/gatk_bundle_2.5/hg19_nohap/dbsnp_137.hg19_nohap.vcf) comp=[] out=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub no_cmdline_in_header=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub bcf=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub onlyEmitSamples=[] debug_file=null metrics_file=null annotation=[] excludeAnnotation=[] filter_reads_with_N_cigar=false filter_mismatching_base_and_quals=false filter_bases_not_stored=false",Date="Sat Jun 14 15:20:24 CEST 2014",Epoch=1402752024377,Version=3.1-1-g07a4bf8> -##HaplotypeCallerCommandLine=<ID=ApplyRecalibration,Version=3.1-1-g07a4bf8,Date="Sat Jun 14 22:28:02 CEST 2014",Epoch=1402777682364,CommandLineOptions="analysis_type=ApplyRecalibration input_file=[] showFullBamList=false read_buffer_size=null phone_home=AWS gatk_key=null tag=NA read_filter=[] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_sequence=/data/DIV5/KG/references/gatk_bundle_2.5/hg19_nohap/ucsc.hg19_nohap.fasta nonDeterministicRandomSeed=false disableDithering=false maxRuntime=-1 maxRuntimeUnits=MINUTES downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=1000 baq=OFF baqGapOpenPenalty=40.0 fix_misencoded_quality_scores=false allow_potentially_misencoded_quality_scores=false useOriginalQualities=false defaultBaseQualities=-1 performanceLog=null BQSR=null quantize_quals=0 disable_indel_quals=false emit_original_quals=false preserve_qscores_less_than=6 globalQScorePrior=-1.0 validation_strictness=SILENT remove_program_records=false keep_program_records=false sample_rename_mapping_file=null unsafe=null disable_auto_index_creation_and_locking_when_reading_rods=false num_threads=1 num_cpu_threads_per_data_thread=1 num_io_threads=0 monitorThreadEfficiency=false num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false generateShadowBCF=false variant_index_type=DYNAMIC_SEEK variant_index_parameter=-1 logging_level=INFO log_to_file=null help=false version=false input=[(RodBinding name=input source=/data/DIV5/KG/kg_wes_mr/runs/trio_7006504_run_00/trio_7006504/phase2/Child_7006504.hc.chrom_merged.vcf)] recal_file=(RodBinding name=recal_file source=/data/DIV5/KG/kg_wes_mr/runs/trio_7006504_run_00/trio_7006504/phase2/recalibration_hc/Child_7006504.snp.recal) tranches_file=/data/DIV5/KG/kg_wes_mr/runs/trio_7006504_run_00/trio_7006504/phase2/recalibration_hc/Child_7006504.snp.tranches out=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub no_cmdline_in_header=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub bcf=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub ts_filter_level=99.0 lodCutoff=null ignore_filter=null excludeFiltered=false mode=SNP filter_reads_with_N_cigar=false filter_mismatching_base_and_quals=false filter_bases_not_stored=false"> -##HaplotypeCallerCommandLine=<ID=ApplyRecalibration,Version=3.1-1-g07a4bf8,Date="Sat Jun 14 22:31:13 CEST 2014",Epoch=1402777873043,CommandLineOptions="analysis_type=ApplyRecalibration input_file=[] showFullBamList=false read_buffer_size=null phone_home=AWS gatk_key=null tag=NA read_filter=[] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_sequence=/data/DIV5/KG/references/gatk_bundle_2.5/hg19_nohap/ucsc.hg19_nohap.fasta nonDeterministicRandomSeed=false disableDithering=false maxRuntime=-1 maxRuntimeUnits=MINUTES downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=1000 baq=OFF baqGapOpenPenalty=40.0 fix_misencoded_quality_scores=false allow_potentially_misencoded_quality_scores=false useOriginalQualities=false defaultBaseQualities=-1 performanceLog=null BQSR=null quantize_quals=0 disable_indel_quals=false emit_original_quals=false preserve_qscores_less_than=6 globalQScorePrior=-1.0 validation_strictness=SILENT remove_program_records=false keep_program_records=false sample_rename_mapping_file=null unsafe=null disable_auto_index_creation_and_locking_when_reading_rods=false num_threads=1 num_cpu_threads_per_data_thread=1 num_io_threads=0 monitorThreadEfficiency=false num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false generateShadowBCF=false variant_index_type=DYNAMIC_SEEK variant_index_parameter=-1 logging_level=INFO log_to_file=null help=false version=false input=[(RodBinding name=input source=/data/DIV5/KG/kg_wes_mr/runs/trio_7006504_run_00/trio_7006504/phase2/recalibration_hc/Child_7006504.snp.recalibrated.vcf)] recal_file=(RodBinding name=recal_file source=/data/DIV5/KG/kg_wes_mr/runs/trio_7006504_run_00/trio_7006504/phase2/recalibration_hc/Child_7006504.indel.recal) tranches_file=/data/DIV5/KG/kg_wes_mr/runs/trio_7006504_run_00/trio_7006504/phase2/recalibration_hc/Child_7006504.indel.tranches out=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub no_cmdline_in_header=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub bcf=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub ts_filter_level=99.0 lodCutoff=null ignore_filter=null excludeFiltered=false mode=INDEL filter_reads_with_N_cigar=false filter_mismatching_base_and_quals=false filter_bases_not_stored=false"> -##HaplotypeCallerCommandLine=<ID=HaplotypeCaller,CommandLineOptions="analysis_type=HaplotypeCaller input_file=[/data/DIV5/KG/kg_wes_mr/runs/trio_7006504_run_00/trio_7006504/phase2/bams/Child_7006504.ready.bam, /data/DIV5/KG/kg_wes_mr/runs/trio_7006504_run_00/trio_7006504/phase2/bams/Mother_7006508.ready.bam, /data/DIV5/KG/kg_wes_mr/runs/trio_7006504_run_00/trio_7006504/phase2/bams/Father_7006506.ready.bam] showFullBamList=false read_buffer_size=null phone_home=AWS gatk_key=null tag=NA read_filter=[] intervals=[chrM] excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_sequence=/data/DIV5/KG/references/gatk_bundle_2.5/hg19_nohap/ucsc.hg19_nohap.fasta nonDeterministicRandomSeed=false disableDithering=false maxRuntime=-1 maxRuntimeUnits=MINUTES downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=250 baq=OFF baqGapOpenPenalty=40.0 fix_misencoded_quality_scores=false allow_potentially_misencoded_quality_scores=false useOriginalQualities=false defaultBaseQualities=-1 performanceLog=null BQSR=null quantize_quals=0 disable_indel_quals=false emit_original_quals=false preserve_qscores_less_than=6 globalQScorePrior=-1.0 validation_strictness=SILENT remove_program_records=false keep_program_records=false sample_rename_mapping_file=null unsafe=null disable_auto_index_creation_and_locking_when_reading_rods=false num_threads=1 num_cpu_threads_per_data_thread=1 num_io_threads=0 monitorThreadEfficiency=false num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false generateShadowBCF=false variant_index_type=DYNAMIC_SEEK variant_index_parameter=-1 logging_level=INFO log_to_file=null help=false version=false out=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub no_cmdline_in_header=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub bcf=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub likelihoodCalculationEngine=PairHMM heterogeneousKmerSizeResolution=COMBO_MIN graphOutput=null bamOutput=null bam_compression=null disable_bam_indexing=null generate_md5=null simplifyBAM=null bamWriterType=CALLED_HAPLOTYPES dbsnp=(RodBinding name=dbsnp source=/data/DIV5/KG/references/gatk_bundle_2.5/hg19_nohap/dbsnp_137.hg19_nohap.vcf) dontTrimActiveRegions=false maxDiscARExtension=25 maxGGAARExtension=300 paddingAroundIndels=150 paddingAroundSNPs=20 comp=[] annotation=[ClippingRankSumTest, DepthPerSampleHC] excludeAnnotation=[SpanningDeletions, TandemRepeatAnnotator] heterozygosity=0.001 indel_heterozygosity=1.25E-4 genotyping_mode=DISCOVERY standard_min_confidence_threshold_for_calling=20.0 standard_min_confidence_threshold_for_emitting=20.0 alleles=(RodBinding name= source=UNBOUND) max_alternate_alleles=6 input_prior=[] contamination_fraction_to_filter=0.0 contamination_fraction_per_sample_file=null p_nonref_model=EXACT_INDEPENDENT exactcallslog=null kmerSize=[10, 25] dontIncreaseKmerSizesForCycles=false numPruningSamples=1 recoverDanglingHeads=false dontRecoverDanglingTails=false consensus=false emitRefConfidence=NONE GVCFGQBands=[5, 20, 60] indelSizeToEliminateInRefModel=10 min_base_quality_score=10 minPruning=2 gcpHMM=10 includeUmappedReads=false useAllelesTrigger=false useFilteredReadsForAnnotations=false phredScaledGlobalReadMismappingRate=45 maxNumHaplotypesInPopulation=128 mergeVariantsViaLD=false pair_hmm_implementation=LOGLESS_CACHING keepRG=null justDetermineActiveRegions=false dontGenotype=false errorCorrectKmers=false debug=false debugGraphTransformations=false dontUseSoftClippedBases=false captureAssemblyFailureBAM=false allowCyclesInKmerGraphToGeneratePaths=false noFpga=false errorCorrectReads=false kmerLengthForReadErrorCorrection=25 minObservationsForKmerToBeSolid=20 pcr_indel_model=CONSERVATIVE activityProfileOut=null activeRegionOut=null activeRegionIn=null activeRegionExtension=null forceActive=false activeRegionMaxSize=null bandPassSigma=null min_mapping_quality_score=20 filter_reads_with_N_cigar=false filter_mismatching_base_and_quals=false filter_bases_not_stored=false",Date="Sat Jun 14 15:26:18 CEST 2014",Epoch=1402752378803,Version=3.1-1-g07a4bf8> ##INFO=<ID=DN,Number=1,Type=Integer,Description="inDbSNP"> ##INFO=<ID=DT,Number=0,Type=Flag,Description="in1000Genomes"> ##INFO=<ID=DA,Number=1,Type=String,Description="allelesDBSNP"> @@ -81,5 +75,5 @@ ##FILTER=<ID=IsIntergenic,Description="Variant found in intergenic region"> ##contig=<ID=chrQ,length=16571> ##INFO=<ID=CSQ,Number=.,Type=String,Description="Consequence type as predicted by VEP. Format: Allele|Gene|Feature|Feature_type|Consequence|cDNA_position|CDS_position|Protein_position|Amino_acids|Codons|Existing_variation|AA_MAF|EA_MAF|ALLELE_NUM|DISTANCE|STRAND|CLIN_SIG|SYMBOL|SYMBOL_SOURCE|GMAF|HGVSc|HGVSp|AFR_MAF|AMR_MAF|ASN_MAF|EUR_MAF|PUBMED"> -#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT Child_7006504 Father_7006506 Mother_7006508 +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT Sample_101 Sample_102 Sample_103 chrQ 1042 rs199537431 C CA 1541.12 PASS FG=intron;FD=unknown;GM=NM_152486.2;GL=SAMD11;CP=0.000;CG=-1.630;CN=2294,3274,30362,112930;DSP=107;AC=2;AF=0.333;AN=6;BaseQRankSum=4.068;DB;DP=124;FS=1.322;MLEAC=2;MLEAF=0.333;MQ=60.0;MQ0=0;MQRankSum=-0.197;QD=19.03;RPA=1,2;RU=A;ReadPosRankSum=-0.424;STR;VQSLOD=0.079;culprit=FS;GATKCaller=UG,HC;CSQ=A|ENSESTG00000013623|ENSESTT00000034081|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||||A:0.0078|ENSESTT00000034081.1:c.306-110_306-109insA||||||,A|CCDS2.2|CCDS2.2|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||||A:0.0078|CCDS2.2:c.306-110_306-109insA||||||,A|ENSESTG00000013623|ENSESTT00000034116|Transcript|upstream_gene_variant||||||rs199537431|||1|3610|1||||A:0.0078|||||||,A|ENSESTG00000013623|ENSESTT00000034091|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||||A:0.0078|ENSESTT00000034091.1:c.306-110_306-109insA||||||,A|ENSESTG00000013623|ENSESTT00000034102|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||||A:0.0078|ENSESTT00000034102.1:c.29-110_29-109insA||||||,A|148398|XM_005244723.1|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||SAMD11||A:0.0078|XM_005244723.1:c.306-110_306-109insA||||||,A|148398|XM_005244724.1|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||SAMD11||A:0.0078|XM_005244724.1:c.306-110_306-109insA||||||,A|148398|XM_005244725.1|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||SAMD11||A:0.0078|XM_005244725.1:c.306-110_306-109insA||||||,A|148398|NM_152486.2|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||SAMD11||A:0.0078|NM_152486.2:c.306-110_306-109insA||||||,A|148398|XM_005244727.1|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||SAMD11||A:0.0078|XM_005244727.1:c.306-110_306-109insA||||||,A|148398|XM_005244726.1|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||SAMD11||A:0.0078|XM_005244726.1:c.306-110_306-109insA|||||| GT:AD:DP:GQ:PL 0/1:24,21:45:99:838,0,889 0/1:17,19:36:99:744,0,603 0/0:42,0:43:99:0,126,1717 diff --git a/public/biopet-tools/src/test/resources/chrQ.vcf.gz b/public/biopet-tools/src/test/resources/chrQ.vcf.gz index f4d4d695f057d29e4744ede187c8c1b155929366..4068353b8e36ee701149416a96adcd97dd23706b 100644 Binary files a/public/biopet-tools/src/test/resources/chrQ.vcf.gz and b/public/biopet-tools/src/test/resources/chrQ.vcf.gz differ diff --git a/public/biopet-tools/src/test/resources/chrQ.vcf.gz.tbi b/public/biopet-tools/src/test/resources/chrQ.vcf.gz.tbi index 845dc40b2ef04fb67c5e54e06798e822c4c54bec..0cf22460546e4c2e1f9fab6448e4c95be79cb8b7 100644 Binary files a/public/biopet-tools/src/test/resources/chrQ.vcf.gz.tbi and b/public/biopet-tools/src/test/resources/chrQ.vcf.gz.tbi differ diff --git a/public/biopet-tools/src/test/resources/chrQ2.vcf b/public/biopet-tools/src/test/resources/chrQ2.vcf index e49f468d7a6d54de23ed5e3d118d45a663c1cb63..5c98366c9864ac72b3e9d64aa9ebd1dce72e4586 100644 --- a/public/biopet-tools/src/test/resources/chrQ2.vcf +++ b/public/biopet-tools/src/test/resources/chrQ2.vcf @@ -1,11 +1,5 @@ ##fileformat=VCFv4.1 ##reference=file:///data/DIV5/KG/references/gatk_bundle_2.5/hg19_nohap/ucsc.hg19_nohap.fasta -##UnifiedGenotyperCommandLine=<ID=ApplyRecalibration,Version=3.1-1-g07a4bf8,Date="Sat Jun 14 16:58:07 CEST 2014",Epoch=1402757887567,CommandLineOptions="analysis_type=ApplyRecalibration input_file=[] showFullBamList=false read_buffer_size=null phone_home=AWS gatk_key=null tag=NA read_filter=[] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_sequence=/data/DIV5/KG/references/gatk_bundle_2.5/hg19_nohap/ucsc.hg19_nohap.fasta nonDeterministicRandomSeed=false disableDithering=false maxRuntime=-1 maxRuntimeUnits=MINUTES downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=1000 baq=OFF baqGapOpenPenalty=40.0 fix_misencoded_quality_scores=false allow_potentially_misencoded_quality_scores=false useOriginalQualities=false defaultBaseQualities=-1 performanceLog=null BQSR=null quantize_quals=0 disable_indel_quals=false emit_original_quals=false preserve_qscores_less_than=6 globalQScorePrior=-1.0 validation_strictness=SILENT remove_program_records=false keep_program_records=false sample_rename_mapping_file=null unsafe=null disable_auto_index_creation_and_locking_when_reading_rods=false num_threads=1 num_cpu_threads_per_data_thread=1 num_io_threads=0 monitorThreadEfficiency=false num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false generateShadowBCF=false variant_index_type=DYNAMIC_SEEK variant_index_parameter=-1 logging_level=INFO log_to_file=null help=false version=false input=[(RodBinding name=input source=/data/DIV5/KG/kg_wes_mr/runs/trio_7006504_run_00/trio_7006504/phase2/Child_7006504.ug.chrom_merged.vcf)] recal_file=(RodBinding name=recal_file source=/data/DIV5/KG/kg_wes_mr/runs/trio_7006504_run_00/trio_7006504/phase2/recalibration_ug/Child_7006504.snp.recal) tranches_file=/data/DIV5/KG/kg_wes_mr/runs/trio_7006504_run_00/trio_7006504/phase2/recalibration_ug/Child_7006504.snp.tranches out=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub no_cmdline_in_header=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub bcf=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub ts_filter_level=99.0 lodCutoff=null ignore_filter=null excludeFiltered=false mode=SNP filter_reads_with_N_cigar=false filter_mismatching_base_and_quals=false filter_bases_not_stored=false"> -##UnifiedGenotyperCommandLine=<ID=ApplyRecalibration,Version=3.1-1-g07a4bf8,Date="Sat Jun 14 17:01:08 CEST 2014",Epoch=1402758068552,CommandLineOptions="analysis_type=ApplyRecalibration input_file=[] showFullBamList=false read_buffer_size=null phone_home=AWS gatk_key=null tag=NA read_filter=[] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_sequence=/data/DIV5/KG/references/gatk_bundle_2.5/hg19_nohap/ucsc.hg19_nohap.fasta nonDeterministicRandomSeed=false disableDithering=false maxRuntime=-1 maxRuntimeUnits=MINUTES downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=1000 baq=OFF baqGapOpenPenalty=40.0 fix_misencoded_quality_scores=false allow_potentially_misencoded_quality_scores=false useOriginalQualities=false defaultBaseQualities=-1 performanceLog=null BQSR=null quantize_quals=0 disable_indel_quals=false emit_original_quals=false preserve_qscores_less_than=6 globalQScorePrior=-1.0 validation_strictness=SILENT remove_program_records=false keep_program_records=false sample_rename_mapping_file=null unsafe=null disable_auto_index_creation_and_locking_when_reading_rods=false num_threads=1 num_cpu_threads_per_data_thread=1 num_io_threads=0 monitorThreadEfficiency=false num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false generateShadowBCF=false variant_index_type=DYNAMIC_SEEK variant_index_parameter=-1 logging_level=INFO log_to_file=null help=false version=false input=[(RodBinding name=input source=/data/DIV5/KG/kg_wes_mr/runs/trio_7006504_run_00/trio_7006504/phase2/recalibration_ug/Child_7006504.snp.recalibrated.vcf)] recal_file=(RodBinding name=recal_file source=/data/DIV5/KG/kg_wes_mr/runs/trio_7006504_run_00/trio_7006504/phase2/recalibration_ug/Child_7006504.indel.recal) tranches_file=/data/DIV5/KG/kg_wes_mr/runs/trio_7006504_run_00/trio_7006504/phase2/recalibration_ug/Child_7006504.indel.tranches out=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub no_cmdline_in_header=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub bcf=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub ts_filter_level=99.0 lodCutoff=null ignore_filter=null excludeFiltered=false mode=INDEL filter_reads_with_N_cigar=false filter_mismatching_base_and_quals=false filter_bases_not_stored=false"> -##UnifiedGenotyperCommandLine=<ID=UnifiedGenotyper,CommandLineOptions="analysis_type=UnifiedGenotyper input_file=[/data/DIV5/KG/kg_wes_mr/runs/trio_7006504_run_00/trio_7006504/phase2/bams/Child_7006504.ready.bam, /data/DIV5/KG/kg_wes_mr/runs/trio_7006504_run_00/trio_7006504/phase2/bams/Mother_7006508.ready.bam, /data/DIV5/KG/kg_wes_mr/runs/trio_7006504_run_00/trio_7006504/phase2/bams/Father_7006506.ready.bam] showFullBamList=false read_buffer_size=null phone_home=AWS gatk_key=null tag=NA read_filter=[] intervals=[chrM] excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_sequence=/data/DIV5/KG/references/gatk_bundle_2.5/hg19_nohap/ucsc.hg19_nohap.fasta nonDeterministicRandomSeed=false disableDithering=false maxRuntime=-1 maxRuntimeUnits=MINUTES downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=250 baq=OFF baqGapOpenPenalty=40.0 fix_misencoded_quality_scores=false allow_potentially_misencoded_quality_scores=false useOriginalQualities=false defaultBaseQualities=-1 performanceLog=null BQSR=null quantize_quals=0 disable_indel_quals=false emit_original_quals=false preserve_qscores_less_than=6 globalQScorePrior=-1.0 validation_strictness=SILENT remove_program_records=false keep_program_records=false sample_rename_mapping_file=null unsafe=null disable_auto_index_creation_and_locking_when_reading_rods=false num_threads=1 num_cpu_threads_per_data_thread=1 num_io_threads=0 monitorThreadEfficiency=false num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false generateShadowBCF=false variant_index_type=DYNAMIC_SEEK variant_index_parameter=-1 logging_level=INFO log_to_file=null help=false version=false genotype_likelihoods_model=BOTH pcr_error_rate=1.0E-4 computeSLOD=false annotateNDA=false pair_hmm_implementation=LOGLESS_CACHING min_base_quality_score=17 max_deletion_fraction=0.05 allSitePLs=false min_indel_count_for_genotyping=5 min_indel_fraction_per_sample=0.25 indelGapContinuationPenalty=10 indelGapOpenPenalty=45 indelHaplotypeSize=80 indelDebug=false ignoreSNPAlleles=false allReadsSP=false ignoreLaneInfo=false reference_sample_calls=(RodBinding name= source=UNBOUND) reference_sample_name=null sample_ploidy=2 min_quality_score=1 max_quality_score=40 site_quality_prior=20 min_power_threshold_for_calling=0.95 min_reference_depth=100 exclude_filtered_reference_sites=false output_mode=EMIT_VARIANTS_ONLY heterozygosity=0.001 indel_heterozygosity=1.25E-4 genotyping_mode=DISCOVERY standard_min_confidence_threshold_for_calling=20.0 standard_min_confidence_threshold_for_emitting=20.0 alleles=(RodBinding name= source=UNBOUND) max_alternate_alleles=6 input_prior=[] contamination_fraction_to_filter=0.0 contamination_fraction_per_sample_file=null p_nonref_model=EXACT_INDEPENDENT exactcallslog=null dbsnp=(RodBinding name=dbsnp source=/data/DIV5/KG/references/gatk_bundle_2.5/hg19_nohap/dbsnp_137.hg19_nohap.vcf) comp=[] out=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub no_cmdline_in_header=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub bcf=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub onlyEmitSamples=[] debug_file=null metrics_file=null annotation=[] excludeAnnotation=[] filter_reads_with_N_cigar=false filter_mismatching_base_and_quals=false filter_bases_not_stored=false",Date="Sat Jun 14 15:20:24 CEST 2014",Epoch=1402752024377,Version=3.1-1-g07a4bf8> -##HaplotypeCallerCommandLine=<ID=ApplyRecalibration,Version=3.1-1-g07a4bf8,Date="Sat Jun 14 22:28:02 CEST 2014",Epoch=1402777682364,CommandLineOptions="analysis_type=ApplyRecalibration input_file=[] showFullBamList=false read_buffer_size=null phone_home=AWS gatk_key=null tag=NA read_filter=[] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_sequence=/data/DIV5/KG/references/gatk_bundle_2.5/hg19_nohap/ucsc.hg19_nohap.fasta nonDeterministicRandomSeed=false disableDithering=false maxRuntime=-1 maxRuntimeUnits=MINUTES downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=1000 baq=OFF baqGapOpenPenalty=40.0 fix_misencoded_quality_scores=false allow_potentially_misencoded_quality_scores=false useOriginalQualities=false defaultBaseQualities=-1 performanceLog=null BQSR=null quantize_quals=0 disable_indel_quals=false emit_original_quals=false preserve_qscores_less_than=6 globalQScorePrior=-1.0 validation_strictness=SILENT remove_program_records=false keep_program_records=false sample_rename_mapping_file=null unsafe=null disable_auto_index_creation_and_locking_when_reading_rods=false num_threads=1 num_cpu_threads_per_data_thread=1 num_io_threads=0 monitorThreadEfficiency=false num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false generateShadowBCF=false variant_index_type=DYNAMIC_SEEK variant_index_parameter=-1 logging_level=INFO log_to_file=null help=false version=false input=[(RodBinding name=input source=/data/DIV5/KG/kg_wes_mr/runs/trio_7006504_run_00/trio_7006504/phase2/Child_7006504.hc.chrom_merged.vcf)] recal_file=(RodBinding name=recal_file source=/data/DIV5/KG/kg_wes_mr/runs/trio_7006504_run_00/trio_7006504/phase2/recalibration_hc/Child_7006504.snp.recal) tranches_file=/data/DIV5/KG/kg_wes_mr/runs/trio_7006504_run_00/trio_7006504/phase2/recalibration_hc/Child_7006504.snp.tranches out=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub no_cmdline_in_header=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub bcf=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub ts_filter_level=99.0 lodCutoff=null ignore_filter=null excludeFiltered=false mode=SNP filter_reads_with_N_cigar=false filter_mismatching_base_and_quals=false filter_bases_not_stored=false"> -##HaplotypeCallerCommandLine=<ID=ApplyRecalibration,Version=3.1-1-g07a4bf8,Date="Sat Jun 14 22:31:13 CEST 2014",Epoch=1402777873043,CommandLineOptions="analysis_type=ApplyRecalibration input_file=[] showFullBamList=false read_buffer_size=null phone_home=AWS gatk_key=null tag=NA read_filter=[] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_sequence=/data/DIV5/KG/references/gatk_bundle_2.5/hg19_nohap/ucsc.hg19_nohap.fasta nonDeterministicRandomSeed=false disableDithering=false maxRuntime=-1 maxRuntimeUnits=MINUTES downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=1000 baq=OFF baqGapOpenPenalty=40.0 fix_misencoded_quality_scores=false allow_potentially_misencoded_quality_scores=false useOriginalQualities=false defaultBaseQualities=-1 performanceLog=null BQSR=null quantize_quals=0 disable_indel_quals=false emit_original_quals=false preserve_qscores_less_than=6 globalQScorePrior=-1.0 validation_strictness=SILENT remove_program_records=false keep_program_records=false sample_rename_mapping_file=null unsafe=null disable_auto_index_creation_and_locking_when_reading_rods=false num_threads=1 num_cpu_threads_per_data_thread=1 num_io_threads=0 monitorThreadEfficiency=false num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false generateShadowBCF=false variant_index_type=DYNAMIC_SEEK variant_index_parameter=-1 logging_level=INFO log_to_file=null help=false version=false input=[(RodBinding name=input source=/data/DIV5/KG/kg_wes_mr/runs/trio_7006504_run_00/trio_7006504/phase2/recalibration_hc/Child_7006504.snp.recalibrated.vcf)] recal_file=(RodBinding name=recal_file source=/data/DIV5/KG/kg_wes_mr/runs/trio_7006504_run_00/trio_7006504/phase2/recalibration_hc/Child_7006504.indel.recal) tranches_file=/data/DIV5/KG/kg_wes_mr/runs/trio_7006504_run_00/trio_7006504/phase2/recalibration_hc/Child_7006504.indel.tranches out=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub no_cmdline_in_header=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub bcf=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub ts_filter_level=99.0 lodCutoff=null ignore_filter=null excludeFiltered=false mode=INDEL filter_reads_with_N_cigar=false filter_mismatching_base_and_quals=false filter_bases_not_stored=false"> -##HaplotypeCallerCommandLine=<ID=HaplotypeCaller,CommandLineOptions="analysis_type=HaplotypeCaller input_file=[/data/DIV5/KG/kg_wes_mr/runs/trio_7006504_run_00/trio_7006504/phase2/bams/Child_7006504.ready.bam, /data/DIV5/KG/kg_wes_mr/runs/trio_7006504_run_00/trio_7006504/phase2/bams/Mother_7006508.ready.bam, /data/DIV5/KG/kg_wes_mr/runs/trio_7006504_run_00/trio_7006504/phase2/bams/Father_7006506.ready.bam] showFullBamList=false read_buffer_size=null phone_home=AWS gatk_key=null tag=NA read_filter=[] intervals=[chrM] excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_sequence=/data/DIV5/KG/references/gatk_bundle_2.5/hg19_nohap/ucsc.hg19_nohap.fasta nonDeterministicRandomSeed=false disableDithering=false maxRuntime=-1 maxRuntimeUnits=MINUTES downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=250 baq=OFF baqGapOpenPenalty=40.0 fix_misencoded_quality_scores=false allow_potentially_misencoded_quality_scores=false useOriginalQualities=false defaultBaseQualities=-1 performanceLog=null BQSR=null quantize_quals=0 disable_indel_quals=false emit_original_quals=false preserve_qscores_less_than=6 globalQScorePrior=-1.0 validation_strictness=SILENT remove_program_records=false keep_program_records=false sample_rename_mapping_file=null unsafe=null disable_auto_index_creation_and_locking_when_reading_rods=false num_threads=1 num_cpu_threads_per_data_thread=1 num_io_threads=0 monitorThreadEfficiency=false num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false generateShadowBCF=false variant_index_type=DYNAMIC_SEEK variant_index_parameter=-1 logging_level=INFO log_to_file=null help=false version=false out=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub no_cmdline_in_header=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub bcf=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub likelihoodCalculationEngine=PairHMM heterogeneousKmerSizeResolution=COMBO_MIN graphOutput=null bamOutput=null bam_compression=null disable_bam_indexing=null generate_md5=null simplifyBAM=null bamWriterType=CALLED_HAPLOTYPES dbsnp=(RodBinding name=dbsnp source=/data/DIV5/KG/references/gatk_bundle_2.5/hg19_nohap/dbsnp_137.hg19_nohap.vcf) dontTrimActiveRegions=false maxDiscARExtension=25 maxGGAARExtension=300 paddingAroundIndels=150 paddingAroundSNPs=20 comp=[] annotation=[ClippingRankSumTest, DepthPerSampleHC] excludeAnnotation=[SpanningDeletions, TandemRepeatAnnotator] heterozygosity=0.001 indel_heterozygosity=1.25E-4 genotyping_mode=DISCOVERY standard_min_confidence_threshold_for_calling=20.0 standard_min_confidence_threshold_for_emitting=20.0 alleles=(RodBinding name= source=UNBOUND) max_alternate_alleles=6 input_prior=[] contamination_fraction_to_filter=0.0 contamination_fraction_per_sample_file=null p_nonref_model=EXACT_INDEPENDENT exactcallslog=null kmerSize=[10, 25] dontIncreaseKmerSizesForCycles=false numPruningSamples=1 recoverDanglingHeads=false dontRecoverDanglingTails=false consensus=false emitRefConfidence=NONE GVCFGQBands=[5, 20, 60] indelSizeToEliminateInRefModel=10 min_base_quality_score=10 minPruning=2 gcpHMM=10 includeUmappedReads=false useAllelesTrigger=false useFilteredReadsForAnnotations=false phredScaledGlobalReadMismappingRate=45 maxNumHaplotypesInPopulation=128 mergeVariantsViaLD=false pair_hmm_implementation=LOGLESS_CACHING keepRG=null justDetermineActiveRegions=false dontGenotype=false errorCorrectKmers=false debug=false debugGraphTransformations=false dontUseSoftClippedBases=false captureAssemblyFailureBAM=false allowCyclesInKmerGraphToGeneratePaths=false noFpga=false errorCorrectReads=false kmerLengthForReadErrorCorrection=25 minObservationsForKmerToBeSolid=20 pcr_indel_model=CONSERVATIVE activityProfileOut=null activeRegionOut=null activeRegionIn=null activeRegionExtension=null forceActive=false activeRegionMaxSize=null bandPassSigma=null min_mapping_quality_score=20 filter_reads_with_N_cigar=false filter_mismatching_base_and_quals=false filter_bases_not_stored=false",Date="Sat Jun 14 15:26:18 CEST 2014",Epoch=1402752378803,Version=3.1-1-g07a4bf8> ##INFO=<ID=DN,Number=1,Type=Integer,Description="inDbSNP"> ##INFO=<ID=DT,Number=0,Type=Flag,Description="in1000Genomes"> ##INFO=<ID=DA,Number=1,Type=String,Description="allelesDBSNP"> @@ -81,5 +75,5 @@ ##FILTER=<ID=IsIntergenic,Description="Variant found in intergenic region"> ##contig=<ID=chrQ,length=16571> ##INFO=<ID=CSQ,Number=.,Type=String,Description="Consequence type as predicted by VEP. Format: Allele|Gene|Feature|Feature_type|Consequence|cDNA_position|CDS_position|Protein_position|Amino_acids|Codons|Existing_variation|AA_MAF|EA_MAF|ALLELE_NUM|DISTANCE|STRAND|CLIN_SIG|SYMBOL|SYMBOL_SOURCE|GMAF|HGVSc|HGVSp|AFR_MAF|AMR_MAF|ASN_MAF|EUR_MAF|PUBMED"> -#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT Child_7006504 Father_7006506 Mother_7006508 +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT Sample_101 Sample_102 Sample_103 chrQ 50 rs199537431 T A 1541.12 PASS FG=intron;FD=unknown;GM=NM_152486.2;GL=SAMD11;CP=0.000;CG=-1.630;CN=2294,3274,30362,112930;DSP=107;AC=2;AF=0.333;AN=6;BaseQRankSum=4.068;DB;DP=124;FS=1.322;MLEAC=2;MLEAF=0.333;MQ=60.0;MQ0=0;MQRankSum=-0.197;QD=19.03;RPA=1,2;RU=A;ReadPosRankSum=-0.424;STR;VQSLOD=0.079;culprit=FS;GATKCaller=UG,HC;CSQ=A|ENSESTG00000013623|ENSESTT00000034081|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||||A:0.0078|ENSESTT00000034081.1:c.306-110_306-109insA||||||,A|CCDS2.2|CCDS2.2|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||||A:0.0078|CCDS2.2:c.306-110_306-109insA||||||,A|ENSESTG00000013623|ENSESTT00000034116|Transcript|upstream_gene_variant||||||rs199537431|||1|3610|1||||A:0.0078|||||||,A|ENSESTG00000013623|ENSESTT00000034091|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||||A:0.0078|ENSESTT00000034091.1:c.306-110_306-109insA||||||,A|ENSESTG00000013623|ENSESTT00000034102|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||||A:0.0078|ENSESTT00000034102.1:c.29-110_29-109insA||||||,A|148398|XM_005244723.1|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||SAMD11||A:0.0078|XM_005244723.1:c.306-110_306-109insA||||||,A|148398|XM_005244724.1|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||SAMD11||A:0.0078|XM_005244724.1:c.306-110_306-109insA||||||,A|148398|XM_005244725.1|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||SAMD11||A:0.0078|XM_005244725.1:c.306-110_306-109insA||||||,A|148398|NM_152486.2|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||SAMD11||A:0.0078|NM_152486.2:c.306-110_306-109insA||||||,A|148398|XM_005244727.1|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||SAMD11||A:0.0078|XM_005244727.1:c.306-110_306-109insA||||||,A|148398|XM_005244726.1|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||SAMD11||A:0.0078|XM_005244726.1:c.306-110_306-109insA|||||| GT:AD:DP:GQ:PL 0/1:24,21:45:99:838,0,889 0/1:17,19:36:99:744,0,603 0/0:42,0:43:99:0,126,1717 diff --git a/public/biopet-tools/src/test/resources/chrQ2.vcf.gz b/public/biopet-tools/src/test/resources/chrQ2.vcf.gz index 22435b2c513dc40a2f9632f1970395188292aa67..94a70075ea0258cf6132cf9a7a7dfedb18542fac 100644 Binary files a/public/biopet-tools/src/test/resources/chrQ2.vcf.gz and b/public/biopet-tools/src/test/resources/chrQ2.vcf.gz differ diff --git a/public/biopet-tools/src/test/resources/chrQ2.vcf.gz.tbi b/public/biopet-tools/src/test/resources/chrQ2.vcf.gz.tbi index d376218edbf3aeb9bcbf9a16275c36a6005c57b2..a1c3a9073ca4fab9c79fb6f8070905d35567de33 100644 Binary files a/public/biopet-tools/src/test/resources/chrQ2.vcf.gz.tbi and b/public/biopet-tools/src/test/resources/chrQ2.vcf.gz.tbi differ diff --git a/public/biopet-tools/src/test/resources/unvep_online.vcf.gz b/public/biopet-tools/src/test/resources/unvep_online.vcf.gz index f102295f99d0bc62e25de296b84dc6610930a683..210f83e016e5d83d303e755b4772cb8299089997 100644 Binary files a/public/biopet-tools/src/test/resources/unvep_online.vcf.gz and b/public/biopet-tools/src/test/resources/unvep_online.vcf.gz differ diff --git a/public/biopet-tools/src/test/resources/unvep_online.vcf.gz.tbi b/public/biopet-tools/src/test/resources/unvep_online.vcf.gz.tbi index bb43ff545f591dd276973e7158919e6d14c78f23..c52458d5601b25642ad72a90934955f280428dea 100644 Binary files a/public/biopet-tools/src/test/resources/unvep_online.vcf.gz.tbi and b/public/biopet-tools/src/test/resources/unvep_online.vcf.gz.tbi differ diff --git a/public/biopet-tools/src/test/resources/unvepped.vcf b/public/biopet-tools/src/test/resources/unvepped.vcf index 2ea7ff21e22818bf719b94857f246d7a0ddf66b2..95c2f1e17692780827baabddba774e5a7ce92cb2 100644 --- a/public/biopet-tools/src/test/resources/unvepped.vcf +++ b/public/biopet-tools/src/test/resources/unvepped.vcf @@ -1,11 +1,5 @@ ##fileformat=VCFv4.1 ##reference=file:///data/DIV5/KG/references/gatk_bundle_2.5/hg19_nohap/ucsc.hg19_nohap.fasta -##UnifiedGenotyperCommandLine=<ID=ApplyRecalibration,Version=3.1-1-g07a4bf8,Date="Wed Jul 09 06:04:23 CEST 2014",Epoch=1404878663625,CommandLineOptions="analysis_type=ApplyRecalibration input_file=[] showFullBamList=false read_buffer_size=null phone_home=AWS gatk_key=null tag=NA read_filter=[] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_sequence=/data/DIV5/KG/references/gatk_bundle_2.5/hg19_nohap/ucsc.hg19_nohap.fasta nonDeterministicRandomSeed=false disableDithering=false maxRuntime=-1 maxRuntimeUnits=MINUTES downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=1000 baq=OFF baqGapOpenPenalty=40.0 fix_misencoded_quality_scores=false allow_potentially_misencoded_quality_scores=false useOriginalQualities=false defaultBaseQualities=-1 performanceLog=null BQSR=null quantize_quals=0 disable_indel_quals=false emit_original_quals=false preserve_qscores_less_than=6 globalQScorePrior=-1.0 validation_strictness=SILENT remove_program_records=false keep_program_records=false sample_rename_mapping_file=null unsafe=null disable_auto_index_creation_and_locking_when_reading_rods=false num_threads=1 num_cpu_threads_per_data_thread=1 num_io_threads=0 monitorThreadEfficiency=false num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false generateShadowBCF=false variant_index_type=DYNAMIC_SEEK variant_index_parameter=-1 logging_level=INFO log_to_file=null help=false version=false input=[(RodBinding name=input source=/data/DIV5/KG/kg_wes_mr_devel/runs/trio_7006504_run_00/trio_7006504/phase2/Child_7006504.ug.chrom_merged.vcf)] recal_file=(RodBinding name=recal_file source=/data/DIV5/KG/kg_wes_mr_devel/runs/trio_7006504_run_00/trio_7006504/phase2/recalibration_ug/Child_7006504.snp.recal) tranches_file=/data/DIV5/KG/kg_wes_mr_devel/runs/trio_7006504_run_00/trio_7006504/phase2/recalibration_ug/Child_7006504.snp.tranches out=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub no_cmdline_in_header=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub bcf=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub ts_filter_level=99.0 lodCutoff=null ignore_filter=null excludeFiltered=false mode=SNP filter_reads_with_N_cigar=false filter_mismatching_base_and_quals=false filter_bases_not_stored=false"> -##UnifiedGenotyperCommandLine=<ID=ApplyRecalibration,Version=3.1-1-g07a4bf8,Date="Wed Jul 09 06:07:15 CEST 2014",Epoch=1404878835287,CommandLineOptions="analysis_type=ApplyRecalibration input_file=[] showFullBamList=false read_buffer_size=null phone_home=AWS gatk_key=null tag=NA read_filter=[] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_sequence=/data/DIV5/KG/references/gatk_bundle_2.5/hg19_nohap/ucsc.hg19_nohap.fasta nonDeterministicRandomSeed=false disableDithering=false maxRuntime=-1 maxRuntimeUnits=MINUTES downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=1000 baq=OFF baqGapOpenPenalty=40.0 fix_misencoded_quality_scores=false allow_potentially_misencoded_quality_scores=false useOriginalQualities=false defaultBaseQualities=-1 performanceLog=null BQSR=null quantize_quals=0 disable_indel_quals=false emit_original_quals=false preserve_qscores_less_than=6 globalQScorePrior=-1.0 validation_strictness=SILENT remove_program_records=false keep_program_records=false sample_rename_mapping_file=null unsafe=null disable_auto_index_creation_and_locking_when_reading_rods=false num_threads=1 num_cpu_threads_per_data_thread=1 num_io_threads=0 monitorThreadEfficiency=false num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false generateShadowBCF=false variant_index_type=DYNAMIC_SEEK variant_index_parameter=-1 logging_level=INFO log_to_file=null help=false version=false input=[(RodBinding name=input source=/data/DIV5/KG/kg_wes_mr_devel/runs/trio_7006504_run_00/trio_7006504/phase2/recalibration_ug/Child_7006504.snp.recalibrated.vcf)] recal_file=(RodBinding name=recal_file source=/data/DIV5/KG/kg_wes_mr_devel/runs/trio_7006504_run_00/trio_7006504/phase2/recalibration_ug/Child_7006504.indel.recal) tranches_file=/data/DIV5/KG/kg_wes_mr_devel/runs/trio_7006504_run_00/trio_7006504/phase2/recalibration_ug/Child_7006504.indel.tranches out=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub no_cmdline_in_header=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub bcf=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub ts_filter_level=99.0 lodCutoff=null ignore_filter=null excludeFiltered=false mode=INDEL filter_reads_with_N_cigar=false filter_mismatching_base_and_quals=false filter_bases_not_stored=false"> -##UnifiedGenotyperCommandLine=<ID=UnifiedGenotyper,CommandLineOptions="analysis_type=UnifiedGenotyper input_file=[/data/DIV5/KG/kg_wes_mr_devel/runs/trio_7006504_run_00/trio_7006504/phase2/bams/Child_7006504.ready.bam, /data/DIV5/KG/kg_wes_mr_devel/runs/trio_7006504_run_00/trio_7006504/phase2/bams/Mother_7006508.ready.bam, /data/DIV5/KG/kg_wes_mr_devel/runs/trio_7006504_run_00/trio_7006504/phase2/bams/Father_7006506.ready.bam] showFullBamList=false read_buffer_size=null phone_home=AWS gatk_key=null tag=NA read_filter=[] intervals=[chrM] excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_sequence=/data/DIV5/KG/references/gatk_bundle_2.5/hg19_nohap/ucsc.hg19_nohap.fasta nonDeterministicRandomSeed=false disableDithering=false maxRuntime=-1 maxRuntimeUnits=MINUTES downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=250 baq=OFF baqGapOpenPenalty=40.0 fix_misencoded_quality_scores=false allow_potentially_misencoded_quality_scores=false useOriginalQualities=false defaultBaseQualities=-1 performanceLog=null BQSR=null quantize_quals=0 disable_indel_quals=false emit_original_quals=false preserve_qscores_less_than=6 globalQScorePrior=-1.0 validation_strictness=SILENT remove_program_records=false keep_program_records=false sample_rename_mapping_file=null unsafe=null disable_auto_index_creation_and_locking_when_reading_rods=false num_threads=1 num_cpu_threads_per_data_thread=1 num_io_threads=0 monitorThreadEfficiency=false num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false generateShadowBCF=false variant_index_type=DYNAMIC_SEEK variant_index_parameter=-1 logging_level=INFO log_to_file=null help=false version=false genotype_likelihoods_model=BOTH pcr_error_rate=1.0E-4 computeSLOD=false annotateNDA=false pair_hmm_implementation=LOGLESS_CACHING min_base_quality_score=17 max_deletion_fraction=0.05 allSitePLs=false min_indel_count_for_genotyping=5 min_indel_fraction_per_sample=0.25 indelGapContinuationPenalty=10 indelGapOpenPenalty=45 indelHaplotypeSize=80 indelDebug=false ignoreSNPAlleles=false allReadsSP=false ignoreLaneInfo=false reference_sample_calls=(RodBinding name= source=UNBOUND) reference_sample_name=null sample_ploidy=2 min_quality_score=1 max_quality_score=40 site_quality_prior=20 min_power_threshold_for_calling=0.95 min_reference_depth=100 exclude_filtered_reference_sites=false output_mode=EMIT_VARIANTS_ONLY heterozygosity=0.001 indel_heterozygosity=1.25E-4 genotyping_mode=DISCOVERY standard_min_confidence_threshold_for_calling=20.0 standard_min_confidence_threshold_for_emitting=20.0 alleles=(RodBinding name= source=UNBOUND) max_alternate_alleles=6 input_prior=[] contamination_fraction_to_filter=0.0 contamination_fraction_per_sample_file=null p_nonref_model=EXACT_INDEPENDENT exactcallslog=null dbsnp=(RodBinding name=dbsnp source=/data/DIV5/KG/references/gatk_bundle_2.5/hg19_nohap/dbsnp_137.hg19_nohap.vcf) comp=[] out=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub no_cmdline_in_header=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub bcf=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub onlyEmitSamples=[] debug_file=null metrics_file=null annotation=[] excludeAnnotation=[] filter_reads_with_N_cigar=false filter_mismatching_base_and_quals=false filter_bases_not_stored=false",Date="Wed Jul 09 04:32:56 CEST 2014",Epoch=1404873176100,Version=3.1-1-g07a4bf8> -##HaplotypeCallerCommandLine=<ID=ApplyRecalibration,Version=3.1-1-g07a4bf8,Date="Wed Jul 09 10:12:42 CEST 2014",Epoch=1404893562589,CommandLineOptions="analysis_type=ApplyRecalibration input_file=[] showFullBamList=false read_buffer_size=null phone_home=AWS gatk_key=null tag=NA read_filter=[] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_sequence=/data/DIV5/KG/references/gatk_bundle_2.5/hg19_nohap/ucsc.hg19_nohap.fasta nonDeterministicRandomSeed=false disableDithering=false maxRuntime=-1 maxRuntimeUnits=MINUTES downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=1000 baq=OFF baqGapOpenPenalty=40.0 fix_misencoded_quality_scores=false allow_potentially_misencoded_quality_scores=false useOriginalQualities=false defaultBaseQualities=-1 performanceLog=null BQSR=null quantize_quals=0 disable_indel_quals=false emit_original_quals=false preserve_qscores_less_than=6 globalQScorePrior=-1.0 validation_strictness=SILENT remove_program_records=false keep_program_records=false sample_rename_mapping_file=null unsafe=null disable_auto_index_creation_and_locking_when_reading_rods=false num_threads=1 num_cpu_threads_per_data_thread=1 num_io_threads=0 monitorThreadEfficiency=false num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false generateShadowBCF=false variant_index_type=DYNAMIC_SEEK variant_index_parameter=-1 logging_level=INFO log_to_file=null help=false version=false input=[(RodBinding name=input source=/data/DIV5/KG/kg_wes_mr_devel/runs/trio_7006504_run_00/trio_7006504/phase2/Child_7006504.hc.chrom_merged.vcf)] recal_file=(RodBinding name=recal_file source=/data/DIV5/KG/kg_wes_mr_devel/runs/trio_7006504_run_00/trio_7006504/phase2/recalibration_hc/Child_7006504.snp.recal) tranches_file=/data/DIV5/KG/kg_wes_mr_devel/runs/trio_7006504_run_00/trio_7006504/phase2/recalibration_hc/Child_7006504.snp.tranches out=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub no_cmdline_in_header=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub bcf=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub ts_filter_level=99.0 lodCutoff=null ignore_filter=null excludeFiltered=false mode=SNP filter_reads_with_N_cigar=false filter_mismatching_base_and_quals=false filter_bases_not_stored=false"> -##HaplotypeCallerCommandLine=<ID=ApplyRecalibration,Version=3.1-1-g07a4bf8,Date="Wed Jul 09 10:15:50 CEST 2014",Epoch=1404893750488,CommandLineOptions="analysis_type=ApplyRecalibration input_file=[] showFullBamList=false read_buffer_size=null phone_home=AWS gatk_key=null tag=NA read_filter=[] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_sequence=/data/DIV5/KG/references/gatk_bundle_2.5/hg19_nohap/ucsc.hg19_nohap.fasta nonDeterministicRandomSeed=false disableDithering=false maxRuntime=-1 maxRuntimeUnits=MINUTES downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=1000 baq=OFF baqGapOpenPenalty=40.0 fix_misencoded_quality_scores=false allow_potentially_misencoded_quality_scores=false useOriginalQualities=false defaultBaseQualities=-1 performanceLog=null BQSR=null quantize_quals=0 disable_indel_quals=false emit_original_quals=false preserve_qscores_less_than=6 globalQScorePrior=-1.0 validation_strictness=SILENT remove_program_records=false keep_program_records=false sample_rename_mapping_file=null unsafe=null disable_auto_index_creation_and_locking_when_reading_rods=false num_threads=1 num_cpu_threads_per_data_thread=1 num_io_threads=0 monitorThreadEfficiency=false num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false generateShadowBCF=false variant_index_type=DYNAMIC_SEEK variant_index_parameter=-1 logging_level=INFO log_to_file=null help=false version=false input=[(RodBinding name=input source=/data/DIV5/KG/kg_wes_mr_devel/runs/trio_7006504_run_00/trio_7006504/phase2/recalibration_hc/Child_7006504.snp.recalibrated.vcf)] recal_file=(RodBinding name=recal_file source=/data/DIV5/KG/kg_wes_mr_devel/runs/trio_7006504_run_00/trio_7006504/phase2/recalibration_hc/Child_7006504.indel.recal) tranches_file=/data/DIV5/KG/kg_wes_mr_devel/runs/trio_7006504_run_00/trio_7006504/phase2/recalibration_hc/Child_7006504.indel.tranches out=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub no_cmdline_in_header=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub bcf=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub ts_filter_level=99.0 lodCutoff=null ignore_filter=null excludeFiltered=false mode=INDEL filter_reads_with_N_cigar=false filter_mismatching_base_and_quals=false filter_bases_not_stored=false"> -##HaplotypeCallerCommandLine=<ID=HaplotypeCaller,CommandLineOptions="analysis_type=HaplotypeCaller input_file=[/data/DIV5/KG/kg_wes_mr_devel/runs/trio_7006504_run_00/trio_7006504/phase2/bams/Child_7006504.ready.bam, /data/DIV5/KG/kg_wes_mr_devel/runs/trio_7006504_run_00/trio_7006504/phase2/bams/Mother_7006508.ready.bam, /data/DIV5/KG/kg_wes_mr_devel/runs/trio_7006504_run_00/trio_7006504/phase2/bams/Father_7006506.ready.bam] showFullBamList=false read_buffer_size=null phone_home=AWS gatk_key=null tag=NA read_filter=[] intervals=[chrM] excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_sequence=/data/DIV5/KG/references/gatk_bundle_2.5/hg19_nohap/ucsc.hg19_nohap.fasta nonDeterministicRandomSeed=false disableDithering=false maxRuntime=-1 maxRuntimeUnits=MINUTES downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=250 baq=OFF baqGapOpenPenalty=40.0 fix_misencoded_quality_scores=false allow_potentially_misencoded_quality_scores=false useOriginalQualities=false defaultBaseQualities=-1 performanceLog=null BQSR=null quantize_quals=0 disable_indel_quals=false emit_original_quals=false preserve_qscores_less_than=6 globalQScorePrior=-1.0 validation_strictness=SILENT remove_program_records=false keep_program_records=false sample_rename_mapping_file=null unsafe=null disable_auto_index_creation_and_locking_when_reading_rods=false num_threads=1 num_cpu_threads_per_data_thread=1 num_io_threads=0 monitorThreadEfficiency=false num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false generateShadowBCF=false variant_index_type=DYNAMIC_SEEK variant_index_parameter=-1 logging_level=INFO log_to_file=null help=false version=false out=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub no_cmdline_in_header=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub bcf=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub likelihoodCalculationEngine=PairHMM heterogeneousKmerSizeResolution=COMBO_MIN graphOutput=null bamOutput=null bam_compression=null disable_bam_indexing=null generate_md5=null simplifyBAM=null bamWriterType=CALLED_HAPLOTYPES dbsnp=(RodBinding name=dbsnp source=/data/DIV5/KG/references/gatk_bundle_2.5/hg19_nohap/dbsnp_137.hg19_nohap.vcf) dontTrimActiveRegions=false maxDiscARExtension=25 maxGGAARExtension=300 paddingAroundIndels=150 paddingAroundSNPs=20 comp=[] annotation=[ClippingRankSumTest, DepthPerSampleHC] excludeAnnotation=[SpanningDeletions, TandemRepeatAnnotator] heterozygosity=0.001 indel_heterozygosity=1.25E-4 genotyping_mode=DISCOVERY standard_min_confidence_threshold_for_calling=20.0 standard_min_confidence_threshold_for_emitting=20.0 alleles=(RodBinding name= source=UNBOUND) max_alternate_alleles=6 input_prior=[] contamination_fraction_to_filter=0.0 contamination_fraction_per_sample_file=null p_nonref_model=EXACT_INDEPENDENT exactcallslog=null kmerSize=[10, 25] dontIncreaseKmerSizesForCycles=false numPruningSamples=1 recoverDanglingHeads=false dontRecoverDanglingTails=false consensus=false emitRefConfidence=NONE GVCFGQBands=[5, 20, 60] indelSizeToEliminateInRefModel=10 min_base_quality_score=10 minPruning=2 gcpHMM=10 includeUmappedReads=false useAllelesTrigger=false useFilteredReadsForAnnotations=false phredScaledGlobalReadMismappingRate=45 maxNumHaplotypesInPopulation=128 mergeVariantsViaLD=false pair_hmm_implementation=LOGLESS_CACHING keepRG=null justDetermineActiveRegions=false dontGenotype=false errorCorrectKmers=false debug=false debugGraphTransformations=false dontUseSoftClippedBases=false captureAssemblyFailureBAM=false allowCyclesInKmerGraphToGeneratePaths=false noFpga=false errorCorrectReads=false kmerLengthForReadErrorCorrection=25 minObservationsForKmerToBeSolid=20 pcr_indel_model=CONSERVATIVE activityProfileOut=null activeRegionOut=null activeRegionIn=null activeRegionExtension=null forceActive=false activeRegionMaxSize=null bandPassSigma=null min_mapping_quality_score=20 filter_reads_with_N_cigar=false filter_mismatching_base_and_quals=false filter_bases_not_stored=false",Date="Wed Jul 09 04:32:56 CEST 2014",Epoch=1404873176271,Version=3.1-1-g07a4bf8> ##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed"> ##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency, for each ALT allele, in the same order as listed"> ##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes"> @@ -130,5 +124,5 @@ ##contig=<ID=chrUn_gl000247,length=36422> ##contig=<ID=chrUn_gl000248,length=39786> ##contig=<ID=chrUn_gl000249,length=38502> -#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT Child_7006504 Father_7006506 Mother_7006508 +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT Sample_101 Sample_102 Sample_103 chr1 14599 . T A 35.13 VQSRTrancheSNP99.00to99.90 AC=4;AF=1.0;AN=4;BaseQRankSum=-0.736;DP=3;Dels=0.0;FS=0.0;HaplotypeScore=0.4161;MLEAC=4;MLEAF=1.0;MQ=20.77;MQ0=0;MQRankSum=0.736;QD=11.71;ReadPosRankSum=-0.736;VQSLOD=0.076;culprit=FS;GATKCaller=UG GT:AD:DP:GQ:PL ./.:.:.:.:. 1/1:0,1:1:3:32,3,0 1/1:1,1:2:3:27,3,0 diff --git a/public/biopet-tools/src/test/resources/unvepped.vcf.gz b/public/biopet-tools/src/test/resources/unvepped.vcf.gz index 836a5646314f87f60c541fd56d3fced47ab53294..f8d6fe3f8bfb1a1a4a4512eef76c53a8fc39c49a 100644 Binary files a/public/biopet-tools/src/test/resources/unvepped.vcf.gz and b/public/biopet-tools/src/test/resources/unvepped.vcf.gz differ diff --git a/public/biopet-tools/src/test/resources/unvepped.vcf.gz.tbi b/public/biopet-tools/src/test/resources/unvepped.vcf.gz.tbi index 1895ffffc8cafd9cda37d3b9afb81a9a1bfe4034..2580d3da8a38a5516bf475e240870a07626279a2 100644 Binary files a/public/biopet-tools/src/test/resources/unvepped.vcf.gz.tbi and b/public/biopet-tools/src/test/resources/unvepped.vcf.gz.tbi differ diff --git a/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/BastyGenerateFastaTest.scala b/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/BastyGenerateFastaTest.scala index d9d8c1bee6bf96bcc98a372585b5a89454775e08..70e9aefd46251ee3a01b6a133e9ade7f6cc28a09 100644 --- a/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/BastyGenerateFastaTest.scala +++ b/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/BastyGenerateFastaTest.scala @@ -35,7 +35,7 @@ class BastyGenerateFastaTest extends TestNGSuite with MockitoSugar with Matchers val tmppath = tmp.getAbsolutePath tmp.deleteOnExit() - val arguments = Array("-V", chrQ_path, "--outputVariants", tmppath, "--sampleName", "Child_7006504", "--reference", chrQRef_path, "--outputName", "test") + val arguments = Array("-V", chrQ_path, "--outputVariants", tmppath, "--sampleName", "Sample_101", "--reference", chrQRef_path, "--outputName", "test") main(arguments) } @@ -45,7 +45,7 @@ class BastyGenerateFastaTest extends TestNGSuite with MockitoSugar with Matchers val tmppath = tmp.getAbsolutePath tmp.deleteOnExit() - val arguments = Array("-V", chrQ_path, "--outputVariants", tmppath, "--bamFile", bam_path, "--sampleName", "Child_7006504", "--reference", chrQRef_path, "--outputName", "test") + val arguments = Array("-V", chrQ_path, "--outputVariants", tmppath, "--bamFile", bam_path, "--sampleName", "Sample_101", "--reference", chrQRef_path, "--outputName", "test") main(arguments) } @@ -55,7 +55,7 @@ class BastyGenerateFastaTest extends TestNGSuite with MockitoSugar with Matchers val tmppath = tmp.getAbsolutePath tmp.deleteOnExit() - val arguments = Array("-V", chrQ_path, "--outputConsensus", tmppath, "--outputConsensusVariants", tmppath, "--bamFile", bam_path, "--sampleName", "Child_7006504", "--reference", chrQRef_path, "--outputName", "test") + val arguments = Array("-V", chrQ_path, "--outputConsensus", tmppath, "--outputConsensusVariants", tmppath, "--bamFile", bam_path, "--sampleName", "Sample_101", "--reference", chrQRef_path, "--outputName", "test") main(arguments) } @@ -63,13 +63,13 @@ class BastyGenerateFastaTest extends TestNGSuite with MockitoSugar with Matchers val reader = new VCFFileReader(vepped, false) val record = reader.iterator().next() - val child = mock[Args] - when(child.sampleName) thenReturn "Child_7006504" - val father = mock[Args] - when(father.sampleName) thenReturn "Father_7006506" + val one = mock[Args] + when(one.sampleName) thenReturn "Sample_101" + val two = mock[Args] + when(two.sampleName) thenReturn "Sample_102" - getMaxAllele(record)(child) shouldBe "C-" - getMaxAllele(record)(father) shouldBe "CA" + getMaxAllele(record)(one) shouldBe "C-" + getMaxAllele(record)(two) shouldBe "CA" } diff --git a/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/CheckAllelesVcfInBamTest.scala b/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/CheckAllelesVcfInBamTest.scala index 476e8e6230caa6bd1051a683459ca28566516699..1ee6f38d316e076084479edbbface36eba73dc92 100644 --- a/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/CheckAllelesVcfInBamTest.scala +++ b/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/CheckAllelesVcfInBamTest.scala @@ -45,19 +45,25 @@ class CheckAllelesVcfInBamTest extends TestNGSuite with MockitoSugar with Matche val rand = new Random() @Test def testOutputTypeVcf() = { - val tmp_path = "/tmp/CheckAllesVcfInBam_" + rand.nextString(10) + ".vcf" + val tmp = File.createTempFile("CheckAllelesVcfInBam", ".vcf") + tmp.deleteOnExit() + val tmp_path = tmp.getAbsolutePath val arguments = Array("-I", vcf, "-b", bam, "-s", "sample01", "-o", tmp_path) main(arguments) } @Test def testOutputTypeVcfGz() = { - val tmp_path = "/tmp/CheckAllesVcfInBam_" + rand.nextString(10) + ".vcf.gz" + val tmp = File.createTempFile("CheckAllelesVcfInBam", ".vcf.gz") + tmp.deleteOnExit() + val tmp_path = tmp.getAbsolutePath val arguments = Array("-I", vcf, "-b", bam, "-s", "sample01", "-o", tmp_path) main(arguments) } @Test def testOutputTypeBcf() = { - val tmp_path = "/tmp/CheckAllesVcfInBam_" + rand.nextString(10) + ".bcf" + val tmp = File.createTempFile("CheckAllelesVcfInBam", ".bcf") + tmp.deleteOnExit() + val tmp_path = tmp.getAbsolutePath val arguments = Array("-I", vcf, "-b", bam, "-s", "sample01", "-o", tmp_path) main(arguments) } diff --git a/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/MergeAllelesTest.scala b/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/MergeAllelesTest.scala index e041679d0e4a631f5925f3e3f7e491c8b12f6196..b7ea9bb9c9b2d3e76b6d1c312106091a2684f9a2 100644 --- a/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/MergeAllelesTest.scala +++ b/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/MergeAllelesTest.scala @@ -15,6 +15,7 @@ */ package nl.lumc.sasc.biopet.tools +import java.io.File import java.nio.file.Paths import org.scalatest.Matchers @@ -48,19 +49,25 @@ class MergeAllelesTest extends TestNGSuite with MockitoSugar with Matchers { val rand = new Random() @Test def testOutputTypeVcf() = { - val tmp_path = "/tmp/MergeAlleles_" + rand.nextString(10) + ".vcf" + val tmp = File.createTempFile("MergeAlleles", ".vcf") + tmp.deleteOnExit() + val tmp_path = tmp.getAbsolutePath val arguments = Array("-I", vepped_path, "-o", tmp_path, "-R", reference) main(arguments) } @Test def testOutputTypeVcfGz() = { - val tmp_path = "/tmp/MergeAlleles_" + rand.nextString(10) + ".vcf.gz" + val tmp = File.createTempFile("MergeAlleles", ".vcf.gz") + tmp.deleteOnExit() + val tmp_path = tmp.getAbsolutePath val arguments = Array("-I", vepped_path, "-o", tmp_path, "-R", reference) main(arguments) } @Test def testOutputTypeBcf() = { - val tmp_path = "/tmp/MergeAlleles_" + rand.nextString(10) + ".bcf" + val tmp = File.createTempFile("MergeAlleles", ".bcf") + tmp.deleteOnExit() + val tmp_path = tmp.getAbsolutePath val arguments = Array("-I", vepped_path, "-o", tmp_path, "-R", reference) main(arguments) } diff --git a/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/VcfFilterTest.scala b/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/VcfFilterTest.scala index 80fe1980eccad9932e0472ae28242ae93e6b6420..7149ab194d03e1afd40ea99b39b3b39674b2533d 100644 --- a/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/VcfFilterTest.scala +++ b/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/VcfFilterTest.scala @@ -44,19 +44,25 @@ class VcfFilterTest extends TestNGSuite with MockitoSugar with Matchers { val rand = new Random() @Test def testOutputTypeVcf() = { - val tmp_path = "/tmp/VcfFilter_" + rand.nextString(10) + ".vcf" + val tmp = File.createTempFile("VcfFilter", ".vcf") + tmp.deleteOnExit() + val tmp_path = tmp.getAbsolutePath val arguments: Array[String] = Array("-I", vepped_path, "-o", tmp_path) main(arguments) } @Test def testOutputTypeBcf() = { - val tmp_path = "/tmp/VcfFilter_" + rand.nextString(10) + ".bcf" + val tmp = File.createTempFile("VcfFilter", ".bcf") + tmp.deleteOnExit() + val tmp_path = tmp.getAbsolutePath val arguments: Array[String] = Array("-I", vepped_path, "-o", tmp_path) main(arguments) } @Test def testOutputTypeVcfGz() = { - val tmp_path = "/tmp/VcfFilter_" + rand.nextString(10) + ".vcf.gz" + val tmp = File.createTempFile("VcfFilter", ".vcf.gz") + tmp.deleteOnExit() + val tmp_path = tmp.getAbsolutePath val arguments: Array[String] = Array("-I", vepped_path, "-o", tmp_path) main(arguments) } @@ -65,20 +71,20 @@ class VcfFilterTest extends TestNGSuite with MockitoSugar with Matchers { val reader = new VCFFileReader(vepped, false) val record = reader.iterator().next() - hasGenotype(record, List(("Child_7006504", GenotypeType.HET))) shouldBe true - hasGenotype(record, List(("Child_7006504", GenotypeType.HOM_VAR))) shouldBe false - hasGenotype(record, List(("Child_7006504", GenotypeType.HOM_REF))) shouldBe false - hasGenotype(record, List(("Child_7006504", GenotypeType.NO_CALL))) shouldBe false - hasGenotype(record, List(("Child_7006504", GenotypeType.MIXED))) shouldBe false + hasGenotype(record, List(("Sample_101", GenotypeType.HET))) shouldBe true + hasGenotype(record, List(("Sample_101", GenotypeType.HOM_VAR))) shouldBe false + hasGenotype(record, List(("Sample_101", GenotypeType.HOM_REF))) shouldBe false + hasGenotype(record, List(("Sample_101", GenotypeType.NO_CALL))) shouldBe false + hasGenotype(record, List(("Sample_101", GenotypeType.MIXED))) shouldBe false - hasGenotype(record, List(("Mother_7006508", GenotypeType.HET))) shouldBe false - hasGenotype(record, List(("Mother_7006508", GenotypeType.HOM_VAR))) shouldBe false - hasGenotype(record, List(("Mother_7006508", GenotypeType.HOM_REF))) shouldBe true - hasGenotype(record, List(("Mother_7006508", GenotypeType.NO_CALL))) shouldBe false - hasGenotype(record, List(("Mother_7006508", GenotypeType.MIXED))) shouldBe false + hasGenotype(record, List(("Sample_103", GenotypeType.HET))) shouldBe false + hasGenotype(record, List(("Sample_103", GenotypeType.HOM_VAR))) shouldBe false + hasGenotype(record, List(("Sample_103", GenotypeType.HOM_REF))) shouldBe true + hasGenotype(record, List(("Sample_103", GenotypeType.NO_CALL))) shouldBe false + hasGenotype(record, List(("Sample_103", GenotypeType.MIXED))) shouldBe false - hasGenotype(record, List(("Mother_7006508", GenotypeType.HOM_REF), ("Child_7006504", GenotypeType.HET))) shouldBe true - hasGenotype(record, List(("Mother_7006508", GenotypeType.HET), ("Child_7006504", GenotypeType.HOM_REF))) shouldBe false + hasGenotype(record, List(("Sample_103", GenotypeType.HOM_REF), ("Sample_101", GenotypeType.HET))) shouldBe true + hasGenotype(record, List(("Sample_103", GenotypeType.HET), ("Sample_101", GenotypeType.HOM_REF))) shouldBe false } @Test def testMinQualScore() = { @@ -142,42 +148,42 @@ class VcfFilterTest extends TestNGSuite with MockitoSugar with Matchers { val reader = new VCFFileReader(vepped, false) val record = reader.iterator().next() - mustHaveVariant(record, List("Child_7006504")) shouldBe true - mustHaveVariant(record, List("Child_7006504", "Father_7006506")) shouldBe true - mustHaveVariant(record, List("Child_7006504", "Father_7006506", "Mother_7006508")) shouldBe false + mustHaveVariant(record, List("Sample_101")) shouldBe true + mustHaveVariant(record, List("Sample_101", "Sample_102")) shouldBe true + mustHaveVariant(record, List("Sample_101", "Sample_102", "Sample_103")) shouldBe false } @Test def testSameGenotype() = { val reader = new VCFFileReader(vepped, false) val record = reader.iterator().next() - notSameGenotype(record, "Child_7006504", "Father_7006506") shouldBe false - notSameGenotype(record, "Child_7006504", "Mother_7006508") shouldBe true - notSameGenotype(record, "Father_7006506", "Mother_7006508") shouldBe true + notSameGenotype(record, "Sample_101", "Sample_102") shouldBe false + notSameGenotype(record, "Sample_101", "Sample_103") shouldBe true + notSameGenotype(record, "Sample_102", "Sample_103") shouldBe true } @Test def testfilterHetVarToHomVar() = { val reader = new VCFFileReader(vepped, false) val record = reader.iterator().next() - filterHetVarToHomVar(record, "Child_7006504", "Father_7006506") shouldBe true - filterHetVarToHomVar(record, "Child_7006504", "Mother_7006508") shouldBe true - filterHetVarToHomVar(record, "Father_7006506", "Mother_7006508") shouldBe true + filterHetVarToHomVar(record, "Sample_101", "Sample_102") shouldBe true + filterHetVarToHomVar(record, "Sample_101", "Sample_103") shouldBe true + filterHetVarToHomVar(record, "Sample_102", "Sample_103") shouldBe true } @Test def testDeNovo() = { val reader = new VCFFileReader(vepped, false) val record = reader.iterator().next() - denovoInSample(record, "Child_7006504") shouldBe false - denovoInSample(record, "Father_7006506") shouldBe false - denovoInSample(record, "Mother_7006508") shouldBe false + denovoInSample(record, "Sample_101") shouldBe false + denovoInSample(record, "Sample_102") shouldBe false + denovoInSample(record, "Sample_103") shouldBe false } @Test def testResToDom() = { val reader = new VCFFileReader(vepped, false) val record = reader.iterator().next() - val trio = new Trio("Child_7006504", "Father_7006506", "Mother_7006508") + val trio = new Trio("Sample_101", "Sample_102", "Sample_103") resToDom(record, List(trio)) shouldBe false } @@ -185,7 +191,7 @@ class VcfFilterTest extends TestNGSuite with MockitoSugar with Matchers { @Test def testTrioCompound = { val reader = new VCFFileReader(vepped, false) val record = reader.iterator().next() - val trio = new Trio("Child_7006504", "Father_7006506", "Mother_7006508") + val trio = new Trio("Sample_101", "Sample_102", "Sample_103") trioCompound(record, List(trio)) } @@ -193,7 +199,7 @@ class VcfFilterTest extends TestNGSuite with MockitoSugar with Matchers { @Test def testDeNovoTrio = { val reader = new VCFFileReader(vepped, false) val record = reader.iterator().next() - val trio = new Trio("Child_7006504", "Father_7006506", "Mother_7006508") + val trio = new Trio("Sample_101", "Sample_102", "Sample_103") denovoTrio(record, List(trio)) } diff --git a/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/VcfToTsvTest.scala b/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/VcfToTsvTest.scala index 106562a278735363780797734e4aaea25a7143d8..ee327392cf6f073088a5bea9b2d449593b7e3fd3 100644 --- a/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/VcfToTsvTest.scala +++ b/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/VcfToTsvTest.scala @@ -15,6 +15,7 @@ */ package nl.lumc.sasc.biopet.tools +import java.io.File import java.nio.file.Paths import org.scalatest.Matchers @@ -41,19 +42,25 @@ class VcfToTsvTest extends TestNGSuite with MockitoSugar with Matchers { val unvepped = resourcePath("/unvepped.vcf") @Test def testAllFields() = { - val tmp_path = "/tmp/VcfToTsv_" + rand.nextString(10) + ".tsv" + val tmp = File.createTempFile("VcfToTsv", ".tsv") + tmp.deleteOnExit() + val tmp_path = tmp.getAbsolutePath val arguments = Array("-I", unvepped, "-o", tmp_path, "--all_info") main(arguments) } @Test def testSpecificField() = { - val tmp_path = "/tmp/VcfToTsv_" + rand.nextString(10) + ".tsv" + val tmp = File.createTempFile("VcfToTsv", ".tsv") + tmp.deleteOnExit() + val tmp_path = tmp.getAbsolutePath val arguments = Array("-I", vepped, "-o", tmp_path, "-i", "CSQ") main(arguments) } @Test def testNewSeparators() = { - val tmp_path = "/tmp/VcfToTsv_" + rand.nextString(10) + ".tsv" + val tmp = File.createTempFile("VcfToTsv", ".tsv") + tmp.deleteOnExit() + val tmp_path = tmp.getAbsolutePath val arguments = Array("-I", vepped, "-o", tmp_path, "--all_info", "--separator", ",", "--list_separator", "|") main(arguments) } diff --git a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/extensions/CustomVarScan.scala b/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/extensions/CustomVarScan.scala index 8871f481f3075d7eef52784583ccb5aadaa5b715..f2f11a03e775ebe5a7fa4161813a0157c8f7976a 100644 --- a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/extensions/CustomVarScan.scala +++ b/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/extensions/CustomVarScan.scala @@ -48,6 +48,7 @@ class CustomVarScan(val root: Configurable) extends BiopetCommandLineFunction wi disableBaq = true depth = Option(1000000) outputMappingQuality = true + } private def fixMpileup = new PythonCommandLineFunction { @@ -96,7 +97,6 @@ class CustomVarScan(val root: Configurable) extends BiopetCommandLineFunction wi def cmdLine: String = { // FIXME: manual trigger of commandLine for version retrieval mpileup.commandLine - mpileup.cmdPipe + " | " + fixMpileup.commandLine + " | " + removeEmptyPile().commandLine + " | " + - varscan.commandLine + " && " + compress.commandLine + " && " + index.commandLine + (mpileup | fixMpileup | removeEmptyPile() | varscan).commandLine + " && " + compress.commandLine + " && " + index.commandLine } } diff --git a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaTrait.scala b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaTrait.scala index dafb2e1ccc5a72807aa37cc6858711afbfe78c23..5e80f114c82bd16fb1efd7788158f31d1c2c0f05 100644 --- a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaTrait.scala +++ b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaTrait.scala @@ -144,8 +144,8 @@ trait ShivaTrait extends MultiSampleQScript with SummaryQScript with Reference { config("bam_to_fastq", default = false).asBoolean match { case true => val samToFastq = SamToFastq(qscript, config("bam"), - new File(libDir, sampleId + "-" + libId + ".R1.fastq"), - new File(libDir, sampleId + "-" + libId + ".R2.fastq")) + new File(libDir, sampleId + "-" + libId + ".R1.fq.gz"), + new File(libDir, sampleId + "-" + libId + ".R2.fq.gz")) samToFastq.isIntermediate = true qscript.add(samToFastq) mapping.foreach(mapping => { @@ -198,8 +198,8 @@ trait ShivaTrait extends MultiSampleQScript with SummaryQScript with Reference { }) variantcalling.foreach(vc => { - vc.sampleId = Some(libId) - vc.libId = Some(sampleId) + vc.sampleId = Some(sampleId) + vc.libId = Some(libId) vc.outputDir = new File(libDir, "variantcalling") if (preProcessBam.isDefined) vc.inputBams = preProcessBam.get :: Nil else vc.inputBams = bamFile.get :: Nil diff --git a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcallingTrait.scala b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcallingTrait.scala index 26302af07fb2abac8007faaf9dbc55e16fca23e1..6eb2832c17acea6bcdc0d6766d48d3100dc73472 100644 --- a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcallingTrait.scala +++ b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcallingTrait.scala @@ -19,14 +19,13 @@ import java.io.File import nl.lumc.sasc.biopet.core.summary.SummaryQScript import nl.lumc.sasc.biopet.core.{ Reference, SampleLibraryTag } -import nl.lumc.sasc.biopet.extensions.bcftools.BcftoolsCall -import nl.lumc.sasc.biopet.extensions.gatk.CombineVariants +import nl.lumc.sasc.biopet.extensions.bcftools.{ BcftoolsCall, BcftoolsMerge } +import nl.lumc.sasc.biopet.extensions.gatk.{ GenotypeConcordance, CombineVariants } import nl.lumc.sasc.biopet.extensions.samtools.SamtoolsMpileup import nl.lumc.sasc.biopet.extensions.tools.{ MpileupToVcf, VcfFilter, VcfStats } -import nl.lumc.sasc.biopet.extensions.{ Bgzip, Tabix } -import nl.lumc.sasc.biopet.utils.{ ConfigUtils, Logging } -import org.broadinstitute.gatk.queue.function.CommandLineFunction -import org.broadinstitute.gatk.utils.commandline.{ Input, Output } +import nl.lumc.sasc.biopet.extensions.{ Ln, Bgzip, Tabix } +import nl.lumc.sasc.biopet.utils.Logging +import org.broadinstitute.gatk.utils.commandline.Input /** * Common trait for ShivaVariantcalling @@ -39,6 +38,10 @@ trait ShivaVariantcallingTrait extends SummaryQScript with SampleLibraryTag with @Input(doc = "Bam files (should be deduped bams)", shortName = "BAM", required = true) var inputBams: List[File] = Nil + var referenceVcf: Option[File] = config("reference_vcf") + + var referenceVcfRegions: Option[File] = config("reference_vcf_regions") + /** Name prefix, can override this methods if neeeded */ def namePrefix: String = { (sampleId, libId) match { @@ -48,6 +51,8 @@ trait ShivaVariantcallingTrait extends SummaryQScript with SampleLibraryTag with } } + override def defaults = Map("bcftoolscall" -> Map("f" -> List("GQ"))) + /** Executed before script */ def init(): Unit = { } @@ -84,6 +89,16 @@ trait ShivaVariantcallingTrait extends SummaryQScript with SampleLibraryTag with vcfStats.setOutputDir(new File(caller.outputDir, "vcfstats")) add(vcfStats) addSummarizable(vcfStats, namePrefix + "-vcfstats-" + caller.name) + + referenceVcf.foreach(referenceVcfFile => { + val gc = new GenotypeConcordance(this) + gc.evalFile = caller.outputFile + gc.compFile = referenceVcfFile + gc.outputFile = new File(caller.outputDir, s"$namePrefix-genotype_concordance.${caller.name}.txt") + referenceVcfRegions.foreach(gc.intervals ::= _) + add(gc) + addSummarizable(gc, s"$namePrefix-genotype_concordance-${caller.name}") + }) } add(cv) @@ -94,11 +109,21 @@ trait ShivaVariantcallingTrait extends SummaryQScript with SampleLibraryTag with add(vcfStats) addSummarizable(vcfStats, namePrefix + "-vcfstats-final") + referenceVcf.foreach(referenceVcfFile => { + val gc = new GenotypeConcordance(this) + gc.evalFile = finalFile + gc.compFile = referenceVcfFile + gc.outputFile = new File(outputDir, s"$namePrefix-genotype_concordance.final.txt") + referenceVcfRegions.foreach(gc.intervals ::= _) + add(gc) + addSummarizable(gc, s"$namePrefix-genotype_concordance-final") + }) + addSummaryJobs() } /** Will generate all available variantcallers */ - protected def callersList: List[Variantcaller] = List(new Freebayes, new RawVcf, new Bcftools) + protected def callersList: List[Variantcaller] = List(new Freebayes, new RawVcf, new Bcftools, new BcftoolsSingleSample) /** General trait for a variantcaller mode */ trait Variantcaller { @@ -161,22 +186,52 @@ trait ShivaVariantcallingTrait extends SummaryQScript with SampleLibraryTag with val mp = new SamtoolsMpileup(qscript) mp.input = inputBams mp.u = true + mp.reference = referenceFasta() val bt = new BcftoolsCall(qscript) - bt.O = "z" + bt.O = Some("z") bt.v = true bt.c = true - //TODO: add proper class with piping support, see also issue #114 - add(new CommandLineFunction { - @Input - var input = inputBams + add(mp | bt > outputFile) + add(Tabix(qscript, outputFile)) + } + } + + /** default mode of bcftools */ + class BcftoolsSingleSample extends Variantcaller { + val name = "bcftools_singlesample" + protected val defaultPrio = 8 - @Output - var output = outputFile + /** Final output file of this mode */ + def outputFile = new File(outputDir, namePrefix + ".bcftools_singlesample.vcf.gz") - def commandLine: String = mp.cmdPipe + " | " + bt.cmdPipeInput + " > " + outputFile + " && tabix -p vcf " + outputFile - }) + def addJobs() { + val sampleVcfs = for (inputBam <- inputBams) yield { + val mp = new SamtoolsMpileup(qscript) + mp.input :+= inputBam + mp.u = true + mp.reference = referenceFasta() + + val bt = new BcftoolsCall(qscript) + bt.O = Some("z") + bt.v = true + bt.c = true + bt.output = new File(outputDir, inputBam.getName + ".vcf.gz") + + add(mp | bt) + add(Tabix(qscript, bt.output)) + bt.output + } + + if (sampleVcfs.size > 1) { + val bcfmerge = new BcftoolsMerge(qscript) + bcfmerge.input = sampleVcfs + bcfmerge.output = outputFile + bcfmerge.O = Some("z") + add(bcfmerge) + } else add(Ln.apply(qscript, sampleVcfs.head, outputFile)) + add(Tabix(qscript, outputFile)) } } @@ -192,10 +247,16 @@ trait ShivaVariantcallingTrait extends SummaryQScript with SampleLibraryTag with def addJobs() { val rawFiles = inputBams.map(bamFile => { + val mp = new SamtoolsMpileup(qscript) { + override def configName = "samtoolsmpileup" + override def defaults = Map("samtoolsmpileup" -> Map("disable_baq" -> true, "min_map_quality" -> 1)) + } + mp.input :+= bamFile + val m2v = new MpileupToVcf(qscript) m2v.inputBam = bamFile m2v.output = new File(outputDir, bamFile.getName.stripSuffix(".bam") + ".raw.vcf") - add(m2v) + add(mp | m2v) val vcfFilter = new VcfFilter(qscript) { override def configName = "vcffilter" diff --git a/public/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcallingTest.scala b/public/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcallingTest.scala index dae0b974329950501369990e3a53bae077f8f071..54f5a455364dcd09878667d3a52ca139222190b6 100644 --- a/public/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcallingTest.scala +++ b/public/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcallingTest.scala @@ -49,21 +49,32 @@ class ShivaVariantcallingTest extends TestNGSuite with Matchers { @DataProvider(name = "shivaVariantcallingOptions") def shivaVariantcallingOptions = { val bool = Array(true, false) - (for (bams <- 0 to 3; raw <- bool; bcftools <- bool; freebayes <- bool) yield Array(bams, raw, bcftools, freebayes)).toArray + (for ( + bams <- 0 to 3; + raw <- bool; + bcftools <- bool; + bcftools_singlesample <- bool; + freebayes <- bool + ) yield Array(bams, raw, bcftools, bcftools_singlesample, freebayes)).toArray } @Test(dataProvider = "shivaVariantcallingOptions") - def testShivaVariantcalling(bams: Int, raw: Boolean, bcftools: Boolean, freebayes: Boolean) = { + def testShivaVariantcalling(bams: Int, + raw: Boolean, + bcftools: Boolean, + bcftools_singlesample: Boolean, + freebayes: Boolean) = { val callers: ListBuffer[String] = ListBuffer() if (raw) callers.append("raw") if (bcftools) callers.append("bcftools") + if (bcftools_singlesample) callers.append("bcftools_singlesample") if (freebayes) callers.append("freebayes") val map = Map("variantcallers" -> callers.toList) val pipeline = initPipeline(map) pipeline.inputBams = (for (n <- 1 to bams) yield ShivaVariantcallingTest.inputTouch("bam_" + n + ".bam")).toList - val illegalArgumentException = pipeline.inputBams.isEmpty || (!raw && !bcftools && !freebayes) + val illegalArgumentException = pipeline.inputBams.isEmpty || (!raw && !bcftools && !bcftools_singlesample && !freebayes) if (illegalArgumentException) intercept[IllegalArgumentException] { pipeline.script() @@ -76,7 +87,7 @@ class ShivaVariantcallingTest extends TestNGSuite with Matchers { //pipeline.functions.count(_.isInstanceOf[Bcftools]) shouldBe (if (bcftools) 1 else 0) //FIXME: Can not check for bcftools because of piping pipeline.functions.count(_.isInstanceOf[Freebayes]) shouldBe (if (freebayes) 1 else 0) - pipeline.functions.count(_.isInstanceOf[MpileupToVcf]) shouldBe (if (raw) bams else 0) + //pipeline.functions.count(_.isInstanceOf[MpileupToVcf]) shouldBe (if (raw) bams else 0) pipeline.functions.count(_.isInstanceOf[VcfFilter]) shouldBe (if (raw) bams else 0) } }