From 1d463d8278d6dac62be613141227ae24c428800b Mon Sep 17 00:00:00 2001 From: Wai Yi Leung <w.y.leung@lumc.nl> Date: Wed, 9 Mar 2016 15:23:06 +0100 Subject: [PATCH] Change executable from pysvtools for SV merging. Add all project VCF file merge into 1 global merge file --- .../sasc/biopet/extensions/Pysvtools.scala | 2 +- .../nl/lumc/sasc/biopet/tools/SeqStat.scala | 10 +++++----- .../lumc/sasc/biopet/utils/BamUtilsTest.scala | 4 ++-- .../pipelines/shiva/ShivaSvCalling.scala | 19 ++++++++++++++++--- 4 files changed, 24 insertions(+), 11 deletions(-) diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Pysvtools.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Pysvtools.scala index 59f8f1ef4..ebc09a328 100644 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Pysvtools.scala +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Pysvtools.scala @@ -28,7 +28,7 @@ class Pysvtools(val root: Configurable) extends BiopetCommandLineFunction { var bedoutput: File = _ var regionsoutput: File = _ - executable = config("exe", default = "mergevcf") + executable = config("exe", default = "vcf_merge_sv_events") def versionRegex = """PySVtools (.*)""".r def versionCommand = executable + " --version" diff --git a/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/SeqStat.scala b/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/SeqStat.scala index 69b8e7a8a..fcfcf9e25 100644 --- a/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/SeqStat.scala +++ b/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/SeqStat.scala @@ -89,10 +89,10 @@ object SeqStat extends ToolCommand { (qual_low_boundery < 59, qual_high_boundery > 74) match { case (false, true) => phredEncoding = Solexa - // TODO: check this later on - // complex case, we cannot tell wheter this is a sanger or solexa - // but since the qual_high_boundery exceeds any Sanger/Illumina1.8 quals, we can `assume` this is solexa - // New @ 2016/01/26: Illumina X ten samples can contain Phred=Q42 (qual_high_boundery==75/K) + // TODO: check this later on + // complex case, we cannot tell wheter this is a sanger or solexa + // but since the qual_high_boundery exceeds any Sanger/Illumina1.8 quals, we can `assume` this is solexa + // New @ 2016/01/26: Illumina X ten samples can contain Phred=Q42 (qual_high_boundery==75/K) case (true, true) => phredEncoding = Solexa // this is definite a sanger sequence, the lower end is sanger only case (true, false) => phredEncoding = Sanger @@ -181,7 +181,7 @@ object SeqStat extends ToolCommand { quals ++= mutable.ArrayBuffer.fill(baseStats(pos).qual.length - quals.length)(0) } if (nucs.length <= baseStats(pos).nucs.length) { - nucs ++= mutable.ArrayBuffer.fill( baseStats(pos).nucs.length - nucs.length )(0) + nucs ++= mutable.ArrayBuffer.fill(baseStats(pos).nucs.length - nucs.length)(0) } // count into the quals baseStats(pos).qual.zipWithIndex foreach { case (value, index) => quals(index) += value } diff --git a/public/biopet-utils/src/test/scala/nl/lumc/sasc/biopet/utils/BamUtilsTest.scala b/public/biopet-utils/src/test/scala/nl/lumc/sasc/biopet/utils/BamUtilsTest.scala index 355d0d8df..75021402f 100644 --- a/public/biopet-utils/src/test/scala/nl/lumc/sasc/biopet/utils/BamUtilsTest.scala +++ b/public/biopet-utils/src/test/scala/nl/lumc/sasc/biopet/utils/BamUtilsTest.scala @@ -3,11 +3,11 @@ package nl.lumc.sasc.biopet.utils import java.io.File import htsjdk.samtools._ -import org.mockito.Mockito.{inOrder => inOrd} +import org.mockito.Mockito.{ inOrder => inOrd } import org.scalatest.Matchers import org.scalatest.mock.MockitoSugar import org.scalatest.testng.TestNGSuite -import org.testng.annotations.{BeforeClass, Test} +import org.testng.annotations.{ BeforeClass, Test } /** * Created by wyleung on 22-2-16. diff --git a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaSvCalling.scala b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaSvCalling.scala index 6c7652170..4407a9091 100644 --- a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaSvCalling.scala +++ b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaSvCalling.scala @@ -33,6 +33,9 @@ class ShivaSvCalling(val root: Configurable) extends QScript with SummaryQScript def this() = this(null) + var outputMergedVCFbySample: Map[String, File] = Map() + var outputMergedVCF: File = _ + @Input(doc = "Bam files (should be deduped bams)", shortName = "BAM", required = true) protected[shiva] var inputBamsArg: List[File] = Nil @@ -41,6 +44,7 @@ class ShivaSvCalling(val root: Configurable) extends QScript with SummaryQScript /** Executed before script */ def init(): Unit = { if (inputBamsArg.nonEmpty) inputBams = BamUtils.sampleBamMap(inputBamsArg) + outputMergedVCF = new File(outputDir, "allsamples.merged.vcf") } /** Variantcallers requested by the config */ @@ -56,7 +60,7 @@ class ShivaSvCalling(val root: Configurable) extends QScript with SummaryQScript val callers = callersList.filter(x => configCallers.contains(x.name)) require(inputBams.nonEmpty, "No input bams found") - require(callers.nonEmpty, "must select at least 1 SV caller, choices are: " + callersList.map(_.name).mkString(", ")) + require(callers.nonEmpty, "Please select at least 1 SV caller, choices are: " + callersList.map(_.name).mkString(", ")) callers.foreach { caller => caller.inputBams = inputBams @@ -74,9 +78,15 @@ class ShivaSvCalling(val root: Configurable) extends QScript with SummaryQScript mergeSVcalls.input = sampleVCFS.flatten mergeSVcalls.output = new File(outputDir, sample + ".merged.vcf") add(mergeSVcalls) - // outputFiles += (sample -> mergeSVcalls.output) + outputMergedVCFbySample += (sample -> mergeSVcalls.output) } + // merge all files from all samples in project + val mergeSVcallsProject = new Pysvtools(this) + mergeSVcallsProject.input = outputMergedVCFbySample.values.toList + mergeSVcallsProject.output = outputMergedVCF + add(mergeSVcallsProject) + // merging the VCF calls by project // basicly this will do all samples from this pipeline run // group by "tags" @@ -98,7 +108,10 @@ class ShivaSvCalling(val root: Configurable) extends QScript with SummaryQScript def summaryFiles: Map[String, File] = { val callers: Set[String] = configCallers //callersList.filter(x => callers.contains(x.name)).map(x => x.name -> x.outputFile).toMap + ("final" -> finalFile) - Map() + Map( + "final_mergedvcf" -> outputMergedVCF + + ) } } -- GitLab