diff --git a/bam2wig/pom.xml b/bam2wig/pom.xml index 5afb29d42f96f6bda92b4d8c6a35d2b09308f558..82fc7fff5fd91f4c46fd22421fdb54c89ccf3871 100644 --- a/bam2wig/pom.xml +++ b/bam2wig/pom.xml @@ -45,6 +45,18 @@ <artifactId>BiopetExtensions</artifactId> <version>${project.version}</version> </dependency> + <dependency> + <groupId>org.scalatest</groupId> + <artifactId>scalatest_2.10</artifactId> + <version>2.2.1</version> + <scope>test</scope> + </dependency> + <dependency> + <groupId>org.testng</groupId> + <artifactId>testng</artifactId> + <version>6.8</version> + <scope>test</scope> + </dependency> </dependencies> </project> \ No newline at end of file diff --git a/bam2wig/src/test/resources/empty.bam b/bam2wig/src/test/resources/empty.bam new file mode 100644 index 0000000000000000000000000000000000000000..bd4073d80cbf73c689e97d2814f9b1fd00d63895 Binary files /dev/null and b/bam2wig/src/test/resources/empty.bam differ diff --git a/bam2wig/src/test/scala/nl/lumc/sasc/biopet/pipelines/bamtobigwig/BamToChromSizesTest.scala b/bam2wig/src/test/scala/nl/lumc/sasc/biopet/pipelines/bamtobigwig/BamToChromSizesTest.scala new file mode 100644 index 0000000000000000000000000000000000000000..eaf2ae37a5d844ba93669f2fef4f0a5d98ab962d --- /dev/null +++ b/bam2wig/src/test/scala/nl/lumc/sasc/biopet/pipelines/bamtobigwig/BamToChromSizesTest.scala @@ -0,0 +1,30 @@ +package nl.lumc.sasc.biopet.pipelines.bamtobigwig + +import java.io.File +import java.nio.file.Paths + +import org.scalatest.Matchers +import org.scalatest.testng.TestNGSuite +import org.testng.annotations.Test + +import scala.io.Source + +/** + * Created by pjvanthof on 09/05/16. + */ +class BamToChromSizesTest extends TestNGSuite with Matchers { + private def resourcePath(p: String): String = { + Paths.get(getClass.getResource(p).toURI).toString + } + + @Test + def testChromSizes: Unit = { + val bamFile = new File(resourcePath("/empty.bam")) + val bamToChromSizes = new BamToChromSizes(null) + bamToChromSizes.bamFile = bamFile + bamToChromSizes.chromSizesFile = File.createTempFile("chrom.", ".sizes") + bamToChromSizes.chromSizesFile.deleteOnExit() + bamToChromSizes.run() + Source.fromFile(bamToChromSizes.chromSizesFile).getLines().toList shouldBe List("chrQ\t10000", "chrR\t10000") + } +} diff --git a/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/WriteDependencies.scala b/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/WriteDependencies.scala index a43cb3890bcf3521e3be6645b4e175c59f84d143..529e6830db48b454499a7344c8606229669acab9 100644 --- a/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/WriteDependencies.scala +++ b/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/WriteDependencies.scala @@ -30,18 +30,16 @@ import scala.collection.mutable.ListBuffer */ object WriteDependencies extends Logging with Configurable { val root: Configurable = null - private val functionNames: mutable.Map[QFunction, String] = mutable.Map() - - private def createFunctionNames(functions: Seq[QFunction]): Unit = { + private def createFunctionNames(functions: Seq[QFunction]): Map[QFunction, String] = { val cache: mutable.Map[String, Int] = mutable.Map() - for (function <- functions) { + (for (function <- functions) yield { val baseName = function match { case f: Configurable => f.configNamespace case f => f.getClass.getSimpleName } cache += baseName -> (cache.getOrElse(baseName, 0) + 1) - functionNames += function -> s"$baseName-${cache(baseName)}" - } + function -> s"$baseName-${cache(baseName)}" + }).toMap } /** @@ -55,7 +53,7 @@ object WriteDependencies extends Logging with Configurable { val errorOnMissingInput: Boolean = config("error_on_missing_input", false) - createFunctionNames(functions) + val functionNames = createFunctionNames(functions) case class QueueFile(file: File) { private val inputJobs: ListBuffer[QFunction] = ListBuffer() diff --git a/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/WriteDependenciesTest.scala b/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/WriteDependenciesTest.scala new file mode 100644 index 0000000000000000000000000000000000000000..b8dee445d737323893c1f5a85dce8b75f7feec72 --- /dev/null +++ b/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/WriteDependenciesTest.scala @@ -0,0 +1,60 @@ +package nl.lumc.sasc.biopet.core + +import java.io.File +import java.nio.file.Files + +import nl.lumc.sasc.biopet.utils.ConfigUtils +import org.broadinstitute.gatk.queue.function.QFunction +import org.scalatest.Matchers +import org.scalatest.testng.TestNGSuite +import org.testng.annotations.Test + +import scala.io.Source + +/** + * Created by pjvanthof on 09/05/16. + */ +class WriteDependenciesTest extends TestNGSuite with Matchers { + + import WriteDependenciesTest._ + + case class Qfunc(in: Seq[File], out: Seq[File]) extends QFunction { + override def inputs = in + override def outputs = out + override def doneOutputs = out.map(x => new File(x.getParentFile, s".${x.getName}.done")) + override def failOutputs = out.map(x => new File(x.getParentFile, s".${x.getName}.fail")) + jobOutputFile = new File(out.head + ".out") + } + + @Test + def testDeps: Unit = { + val outputFile = File.createTempFile("deps.", ".json") + outputFile.deleteOnExit() + val func1 = Qfunc(file1 :: Nil, file2 :: Nil) + val func2 = Qfunc(file2 :: Nil, file3 :: Nil) + WriteDependencies.writeDependencies(func1 :: func2 :: Nil, outputFile) + val deps = ConfigUtils.fileToConfigMap(outputFile) + deps("jobs") shouldBe a[Map[_, _]] + val jobs = deps("jobs").asInstanceOf[Map[String, Map[String, Any]]] + jobs.count(_._1.contains("Qfunc")) shouldBe 2 + + deps("files") shouldBe a[List[_]] + val files = deps("files").asInstanceOf[List[Map[String, Any]]] + val paths = files.map(x => x.get("path")).flatten + assert(paths.contains(file1.toString)) + assert(paths.contains(file2.toString)) + assert(paths.contains(file3.toString)) + + files.find(_.get("path") == Some(file1.toString)).flatMap(_.get("pipeline_input")) shouldBe Some(true) + files.find(_.get("path") == Some(file2.toString)).flatMap(_.get("pipeline_input")) shouldBe Some(false) + files.find(_.get("path") == Some(file3.toString)).flatMap(_.get("pipeline_input")) shouldBe Some(false) + } +} + +object WriteDependenciesTest { + val tempDir = Files.createTempDirectory("test").toFile + tempDir.deleteOnExit() + val file1 = new File(tempDir, "file1.txt") + val file2 = new File(tempDir, "file2.txt") + val file3 = new File(tempDir, "file3.txt") +} \ No newline at end of file diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CombineGVCFs.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CombineGVCFs.scala index ed0065e589e17000a4e4fb9742d4bca71f6622a1..1460f3f6de300d2fed31e573d7d9ea49aa77d854 100644 --- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CombineGVCFs.scala +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CombineGVCFs.scala @@ -69,7 +69,7 @@ class CombineGVCFs(val root: Configurable) extends CommandLineGATK with ScatterG override def cmdLine = super.cmdLine + repeat("-A", annotation, spaceSeparated = true, escape = true, format = "%s") + repeat("-G", group, spaceSeparated = true, escape = true, format = "%s") + - optional(TaggedFile.formatCommandLineParameter("-D", dbsnp), dbsnp, spaceSeparated = true, escape = true, format = "%s") + + optional(TaggedFile.formatCommandLineParameter("-D", dbsnp.getOrElse(null)), dbsnp, spaceSeparated = true, escape = true, format = "%s") + repeat("-V", variant, formatPrefix = TaggedFile.formatCommandLineParameter, spaceSeparated = true, escape = true, format = "%s") + optional("-o", out, spaceSeparated = true, escape = true, format = "%s") + conditional(convertToBasePairResolution, "-bpResolution", escape = true, format = "%s") + diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/GenotypeGVCFs.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/GenotypeGVCFs.scala index 650340d63c1be0ba9195609a616ddddb4abee8ef..ac939b07842e301928a4daf5d9528318b9595bf5 100644 --- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/GenotypeGVCFs.scala +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/GenotypeGVCFs.scala @@ -106,16 +106,12 @@ class GenotypeGVCFs(val root: Configurable) extends CommandLineGATK with Scatter @Gather(enabled = false) private var outputIndex: File = _ - @Output - @Gather(enabled = false) - private var dbsnpIndex: File = _ - override def beforeGraph() { super.beforeGraph() deps ++= variant.filter(orig => orig != null && (!orig.getName.endsWith(".list"))).map(orig => VcfUtils.getVcfIndexFile(orig)) if (out != null && !org.broadinstitute.gatk.utils.io.IOUtils.isSpecialFile(out)) outputIndex = VcfUtils.getVcfIndexFile(out) - dbsnp.foreach(x => dbsnpIndex = VcfUtils.getVcfIndexFile(x)) + dbsnp.foreach(x => deps :+= VcfUtils.getVcfIndexFile(x)) } override def cmdLine = super.cmdLine + @@ -133,7 +129,7 @@ class GenotypeGVCFs(val root: Configurable) extends CommandLineGATK with Scatter optional("-ploidy", sample_ploidy, spaceSeparated = true, escape = true, format = "%s") + repeat("-A", annotation, spaceSeparated = true, escape = true, format = "%s") + repeat("-G", group, spaceSeparated = true, escape = true, format = "%s") + - optional(TaggedFile.formatCommandLineParameter("-D", dbsnp), dbsnp, spaceSeparated = true, escape = true, format = "%s") + + optional(TaggedFile.formatCommandLineParameter("-D", dbsnp.getOrElse(null)), dbsnp, spaceSeparated = true, escape = true, format = "%s") + conditional(filter_reads_with_N_cigar, "-filterRNC", escape = true, format = "%s") + conditional(filter_mismatching_base_and_quals, "-filterMBQ", escape = true, format = "%s") + conditional(filter_bases_not_stored, "-filterNoBases", escape = true, format = "%s") diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/HaplotypeCaller.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/HaplotypeCaller.scala index 9eac2ba9e3d4974e7b479addcb3d0f1dadf5ef56..403bee79fea30f6ec521ce9dd36927be745f8640 100644 --- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/HaplotypeCaller.scala +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/HaplotypeCaller.scala @@ -419,7 +419,7 @@ class HaplotypeCaller(val root: Configurable) extends CommandLineGATK with Scatt optional("-o", out, spaceSeparated = true, escape = true, format = "%s") + optional("-likelihoodEngine", likelihoodCalculationEngine, spaceSeparated = true, escape = true, format = "%s") + optional("-hksr", heterogeneousKmerSizeResolution, spaceSeparated = true, escape = true, format = "%s") + - optional(TaggedFile.formatCommandLineParameter("-D", dbsnp), dbsnp, spaceSeparated = true, escape = true, format = "%s") + + optional(TaggedFile.formatCommandLineParameter("-D", dbsnp.getOrElse(null)), dbsnp, spaceSeparated = true, escape = true, format = "%s") + conditional(dontTrimActiveRegions, "-dontTrimActiveRegions", escape = true, format = "%s") + optional("-maxDiscARExtension", maxDiscARExtension, spaceSeparated = true, escape = true, format = "%s") + optional("-maxGGAARExtension", maxGGAARExtension, spaceSeparated = true, escape = true, format = "%s") + @@ -444,7 +444,7 @@ class HaplotypeCaller(val root: Configurable) extends CommandLineGATK with Scatt repeat("-inputPrior", input_prior, spaceSeparated = true, escape = true, format = "%s") + optional("-ploidy", sample_ploidy, spaceSeparated = true, escape = true, format = "%s") + optional("-gt_mode", genotyping_mode, spaceSeparated = true, escape = true, format = "%s") + - optional(TaggedFile.formatCommandLineParameter("-alleles", alleles), alleles, spaceSeparated = true, escape = true, format = "%s") + + optional(TaggedFile.formatCommandLineParameter("-alleles", alleles.getOrElse(null)), alleles, spaceSeparated = true, escape = true, format = "%s") + optional("-contamination", contamination_fraction_to_filter, spaceSeparated = true, escape = true, format = contamination_fraction_to_filterFormat) + optional("-contaminationFile", contamination_fraction_per_sample_file, spaceSeparated = true, escape = true, format = "%s") + optional("-pnrm", p_nonref_model, spaceSeparated = true, escape = true, format = "%s") + diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/SelectVariants.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/SelectVariants.scala index a1ed7b732f9b72d1660c9ea8c1995e5fc0137a68..37885ac9c1958635d532e4ad2fc70ee5b9817cfb 100644 --- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/SelectVariants.scala +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/SelectVariants.scala @@ -218,8 +218,8 @@ class SelectVariants(val root: Configurable) extends CommandLineGATK with Scatte override def cmdLine = super.cmdLine + required(TaggedFile.formatCommandLineParameter("-V", variant), variant, spaceSeparated = true, escape = true, format = "%s") + - optional(TaggedFile.formatCommandLineParameter("-disc", discordance), discordance, spaceSeparated = true, escape = true, format = "%s") + - optional(TaggedFile.formatCommandLineParameter("-conc", concordance), concordance, spaceSeparated = true, escape = true, format = "%s") + + optional(TaggedFile.formatCommandLineParameter("-disc", discordance.getOrElse(null)), discordance, spaceSeparated = true, escape = true, format = "%s") + + optional(TaggedFile.formatCommandLineParameter("-conc", concordance.getOrElse(null)), concordance, spaceSeparated = true, escape = true, format = "%s") + optional("-o", out, spaceSeparated = true, escape = true, format = "%s") + repeat("-sn", sample_name, spaceSeparated = true, escape = true, format = "%s") + repeat("-se", sample_expressions, spaceSeparated = true, escape = true, format = "%s") + diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/UnifiedGenotyper.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/UnifiedGenotyper.scala index 0edfe5260fe2fb9101fd92d01fbcebce94ba0441..42daf5e64018c1c8c51582cb3f2bbf9844bf1e37 100644 --- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/UnifiedGenotyper.scala +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/UnifiedGenotyper.scala @@ -276,7 +276,7 @@ class UnifiedGenotyper(val root: Configurable) extends CommandLineGATK with Scat conditional(ignoreSNPAlleles, "-ignoreSNPAlleles", escape = true, format = "%s") + conditional(allReadsSP, "-dl", escape = true, format = "%s") + conditional(ignoreLaneInfo, "-ignoreLane", escape = true, format = "%s") + - optional(TaggedFile.formatCommandLineParameter("-referenceCalls", reference_sample_calls), reference_sample_calls, spaceSeparated = true, escape = true, format = "%s") + + optional(TaggedFile.formatCommandLineParameter("-referenceCalls", reference_sample_calls.getOrElse(null)), reference_sample_calls, spaceSeparated = true, escape = true, format = "%s") + optional("-refsample", reference_sample_name, spaceSeparated = true, escape = true, format = "%s") + optional("-minqs", min_quality_score, spaceSeparated = true, escape = true, format = "%s") + optional("-maxqs", max_quality_score, spaceSeparated = true, escape = true, format = "%s") + @@ -291,14 +291,14 @@ class UnifiedGenotyper(val root: Configurable) extends CommandLineGATK with Scat repeat("-inputPrior", input_prior, spaceSeparated = true, escape = true, format = "%s") + optional("-ploidy", sample_ploidy, spaceSeparated = true, escape = true, format = "%s") + optional("-gt_mode", genotyping_mode, spaceSeparated = true, escape = true, format = "%s") + - optional(TaggedFile.formatCommandLineParameter("-alleles", alleles), alleles, spaceSeparated = true, escape = true, format = "%s") + + optional(TaggedFile.formatCommandLineParameter("-alleles", alleles.getOrElse(null)), alleles, spaceSeparated = true, escape = true, format = "%s") + optional("-contamination", contamination_fraction_to_filter, spaceSeparated = true, escape = true, format = contamination_fraction_to_filterFormat) + optional("-contaminationFile", contamination_fraction_per_sample_file, spaceSeparated = true, escape = true, format = "%s") + optional("-pnrm", p_nonref_model, spaceSeparated = true, escape = true, format = "%s") + optional("-logExactCalls", exactcallslog, spaceSeparated = true, escape = true, format = "%s") + optional("-out_mode", output_mode, spaceSeparated = true, escape = true, format = "%s") + conditional(allSitePLs, "-allSitePLs", escape = true, format = "%s") + - optional(TaggedFile.formatCommandLineParameter("-D", dbsnp), dbsnp, spaceSeparated = true, escape = true, format = "%s") + + optional(TaggedFile.formatCommandLineParameter("-D", dbsnp.getOrElse(null)), dbsnp, spaceSeparated = true, escape = true, format = "%s") + repeat("-comp", comp, formatPrefix = TaggedFile.formatCommandLineParameter, spaceSeparated = true, escape = true, format = "%s") + optional("-o", out, spaceSeparated = true, escape = true, format = "%s") + repeat("-onlyEmitSamples", onlyEmitSamples, spaceSeparated = true, escape = true, format = "%s") + diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/VariantAnnotator.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/VariantAnnotator.scala index d98a55a49eb6e34c00588fddde66dc00cddfe610..0e35d9b81050cddd8d762e340c2f984bdd9c59ec 100644 --- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/VariantAnnotator.scala +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/VariantAnnotator.scala @@ -104,8 +104,8 @@ class VariantAnnotator(val root: Configurable) extends CommandLineGATK with Scat override def cmdLine = super.cmdLine + required(TaggedFile.formatCommandLineParameter("-V", variant), variant, spaceSeparated = true, escape = true, format = "%s") + - optional(TaggedFile.formatCommandLineParameter("-snpEffFile", snpEffFile), snpEffFile, spaceSeparated = true, escape = true, format = "%s") + - optional(TaggedFile.formatCommandLineParameter("-D", dbsnp), dbsnp, spaceSeparated = true, escape = true, format = "%s") + + optional(TaggedFile.formatCommandLineParameter("-snpEffFile", snpEffFile.getOrElse()), snpEffFile, spaceSeparated = true, escape = true, format = "%s") + + optional(TaggedFile.formatCommandLineParameter("-D", dbsnp.getOrElse()), dbsnp, spaceSeparated = true, escape = true, format = "%s") + repeat("-comp", comp, formatPrefix = TaggedFile.formatCommandLineParameter, spaceSeparated = true, escape = true, format = "%s") + repeat("-resource", resource, formatPrefix = TaggedFile.formatCommandLineParameter, spaceSeparated = true, escape = true, format = "%s") + optional("-o", out, spaceSeparated = true, escape = true, format = "%s") + diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/VariantEval.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/VariantEval.scala index cf1c362c7dd06b2eec25104fdfd3fb5a06dea2f8..182007076b311d97ff8c58e404eb498bc0f2b90b 100644 --- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/VariantEval.scala +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/VariantEval.scala @@ -135,8 +135,8 @@ class VariantEval(val root: Configurable) extends CommandLineGATK { optional("-o", out, spaceSeparated = true, escape = true, format = "%s") + repeat("-eval", eval, formatPrefix = TaggedFile.formatCommandLineParameter, spaceSeparated = true, escape = true, format = "%s") + repeat("-comp", comp, formatPrefix = TaggedFile.formatCommandLineParameter, spaceSeparated = true, escape = true, format = "%s") + - optional(TaggedFile.formatCommandLineParameter("-D", dbsnp), dbsnp, spaceSeparated = true, escape = true, format = "%s") + - optional(TaggedFile.formatCommandLineParameter("-gold", goldStandard), goldStandard, spaceSeparated = true, escape = true, format = "%s") + + optional(TaggedFile.formatCommandLineParameter("-D", dbsnp.getOrElse(null)), dbsnp, spaceSeparated = true, escape = true, format = "%s") + + optional(TaggedFile.formatCommandLineParameter("-gold", goldStandard.getOrElse(null)), goldStandard, spaceSeparated = true, escape = true, format = "%s") + repeat("-select", select_exps, spaceSeparated = true, escape = true, format = "%s") + repeat("-selectName", select_names, spaceSeparated = true, escape = true, format = "%s") + repeat("-sn", sample, spaceSeparated = true, escape = true, format = "%s") + diff --git a/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/summary/Summary.scala b/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/summary/Summary.scala index 6b863f81d16d4f26754ea87ddf5c703f35fd4588..301fdf1f31bb8475c1092fb76b69e2c667448ec1 100644 --- a/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/summary/Summary.scala +++ b/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/summary/Summary.scala @@ -61,7 +61,7 @@ class Summary(file: File) { } /** Executes given function for each sample */ - def getSampleValues(function: (Summary, String) => Option[Any]): Map[String, Option[Any]] = { + def getSampleValues[T](function: (Summary, String) => Option[T]): Map[String, Option[T]] = { (for (sample <- samples) yield sample -> function(this, sample)).toMap } @@ -95,7 +95,7 @@ class Summary(file: File) { * @param function Function to execute * @return (sampleId, libId) -> value */ - def getLibraryValues(function: (Summary, String, String) => Option[Any]): Map[(String, String), Option[Any]] = { + def getLibraryValues[T](function: (Summary, String, String) => Option[T]): Map[(String, String), Option[T]] = { (for (sample <- samples; lib <- libraries.getOrElse(sample, Set())) yield { (sample, lib) -> function(this, sample, lib) }).toMap diff --git a/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/summary/SummaryValue.scala b/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/summary/SummaryValue.scala index dbce03915e2ed8512e9e737f9189147670b76a5e..9ad67419137321027aceaa14a88781a32c74f0ff 100644 --- a/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/summary/SummaryValue.scala +++ b/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/summary/SummaryValue.scala @@ -33,7 +33,6 @@ case class SummaryValue(value: Option[Any]) { }) } - //TODO: Calculations are not yet used somewhere, needs more testing def +(that: SummaryValue): SummaryValue = { (this.value, that.value) match { case (Some(a: Double), Some(b)) => SummaryValue(Some(a + b.toString.toDouble)) diff --git a/biopet-utils/src/test/scala/nl/lumc/sasc/biopet/utils/IoUtilsTest.scala b/biopet-utils/src/test/scala/nl/lumc/sasc/biopet/utils/IoUtilsTest.scala new file mode 100644 index 0000000000000000000000000000000000000000..cd319fee6bcb0da8bccb7a1c474f1c566ab0dab5 --- /dev/null +++ b/biopet-utils/src/test/scala/nl/lumc/sasc/biopet/utils/IoUtilsTest.scala @@ -0,0 +1,81 @@ +package nl.lumc.sasc.biopet.utils + +import java.io.{ File, FileNotFoundException, PrintWriter } +import java.nio.file.Files + +import org.scalatest.Matchers +import org.scalatest.testng.TestNGSuite +import org.testng.annotations.Test + +import scala.io.Source + +/** + * Created by pjvanthof on 05/05/16. + */ +class IoUtilsTest extends TestNGSuite with Matchers { + + def createTempTestFile(file: File): Unit = { + file.getParentFile.mkdirs() + val writer = new PrintWriter(file) + writer.println("test") + writer.close() + file.deleteOnExit() + } + + @Test + def testCopyFile: Unit = { + val temp1 = File.createTempFile("test.", ".txt") + temp1.deleteOnExit() + val temp2 = File.createTempFile("test.", ".txt") + temp2.deleteOnExit() + createTempTestFile(temp1) + IoUtils.copyFile(temp1, temp2) + val reader = Source.fromFile(temp2) + reader.getLines().toList shouldBe List("test") + reader.close() + } + + @Test + def testCopyFileNonExistingDir: Unit = { + val temp1 = File.createTempFile("test.", ".txt") + val tempDir = new File(Files.createTempDirectory("test").toFile, "non-exist") + tempDir.deleteOnExit() + tempDir shouldNot exist + val temp2 = new File(tempDir, "test.txt") + createTempTestFile(temp1) + intercept[FileNotFoundException] { + IoUtils.copyFile(temp1, temp2) + } + IoUtils.copyFile(temp1, temp2, true) + val reader = Source.fromFile(temp2) + reader.getLines().toList shouldBe List("test") + reader.close() + } + + @Test + def testCopyDir: Unit = { + val tempDir1 = Files.createTempDirectory("test").toFile + tempDir1.deleteOnExit() + val tempDir2 = Files.createTempDirectory("test").toFile + tempDir2.deleteOnExit() + val relativePaths: List[String] = List( + "test1.txt", + "test2.txt", + "dir1" + File.separator + "test1.txt", + "dir1" + File.separator + "test2.txt", + "dir2" + File.separator + "test1.txt", + "dir2" + File.separator + "test2.txt") + relativePaths.foreach { x => + createTempTestFile(new File(tempDir1, x)) + new File(tempDir2, x) shouldNot exist + } + IoUtils.copyDir(tempDir1, tempDir2) + relativePaths.foreach { x => + val file = new File(tempDir2, x) + file should exist + val reader = Source.fromFile(file) + reader.getLines().toList shouldBe List("test") + reader.close() + } + } +} diff --git a/biopet-utils/src/test/scala/nl/lumc/sasc/biopet/utils/summary/SummaryTest.scala b/biopet-utils/src/test/scala/nl/lumc/sasc/biopet/utils/summary/SummaryTest.scala new file mode 100644 index 0000000000000000000000000000000000000000..45f650496cfa4e04784c4b4a9748d5b6ab18be5f --- /dev/null +++ b/biopet-utils/src/test/scala/nl/lumc/sasc/biopet/utils/summary/SummaryTest.scala @@ -0,0 +1,96 @@ +package nl.lumc.sasc.biopet.utils.summary + +import java.io.{ File, PrintWriter } + +import nl.lumc.sasc.biopet.utils.ConfigUtils +import org.scalatest.Matchers +import org.scalatest.testng.TestNGSuite +import org.testng.annotations.Test + +/** + * Created by pjvanthof on 06/05/16. + */ +class SummaryTest extends TestNGSuite with Matchers { + @Test + def testSamples: Unit = { + val summary = new Summary(SummaryTest.testSummaryFile) + summary.samples shouldBe Set("sample1") + } + + @Test + def testLibraries: Unit = { + val summary = new Summary(SummaryTest.testSummaryFile) + summary.libraries shouldBe Map("sample1" -> Set("lib1")) + } + + @Test + def testValue: Unit = { + val summary = new Summary(SummaryTest.testSummaryFile) + summary.getValue("key_1") shouldBe Some("test_1") + summary.getValue("key_x") shouldBe None + summary.getValue("samples", "sample1", "key_2") shouldBe Some("test_2") + summary.getValue("samples", "sample1", "libraries", "lib1", "key_3") shouldBe Some("test_3") + + summary.getValue(Some("sample1"), None, "key_2") shouldBe Some("test_2") + summary.getValue(Some("sample1"), None, "key_x") shouldBe None + summary.getValue(Some("sample1"), Some("lib1"), "key_3") shouldBe Some("test_3") + summary.getValue(Some("sample1"), Some("lib1"), "key_x") shouldBe None + summary.getValue(None, None, "key_1") shouldBe Some("test_1") + summary.getValue(None, None, "key_x") shouldBe None + } + + @Test + def testSampleValue: Unit = { + val summary = new Summary(SummaryTest.testSummaryFile) + summary.getSampleValue("sample1", "key_2") shouldBe Some("test_2") + summary.getSampleValue("sample1", "key_x") shouldBe None + summary.getSampleValue("samplex", "key_x") shouldBe None + } + + @Test + def testSampleValues: Unit = { + val summary = new Summary(SummaryTest.testSummaryFile) + summary.getSampleValues("key_2") shouldBe Map("sample1" -> Some("test_2")) + summary.getSampleValues("key_x") shouldBe Map("sample1" -> None) + summary.getSampleValues((summary, sample) => summary.getSampleValue(sample, "key_2")) shouldBe Map("sample1" -> Some("test_2")) + + } + + @Test + def testLibraryValue: Unit = { + val summary = new Summary(SummaryTest.testSummaryFile) + summary.getLibraryValue("sample1", "lib1", "key_3") shouldBe Some("test_3") + summary.getLibraryValue("sample1", "lib1", "key_x") shouldBe None + summary.getLibraryValue("samplex", "libx", "key_x") shouldBe None + } + + @Test + def testLibraryValues: Unit = { + val summary = new Summary(SummaryTest.testSummaryFile) + summary.getLibraryValues("key_3") shouldBe Map(("sample1", "lib1") -> Some("test_3")) + summary.getLibraryValues("key_x") shouldBe Map(("sample1", "lib1") -> None) + summary.getLibraryValues((summary, sample, lib) => summary.getLibraryValue(sample, lib, "key_3")) shouldBe Map(("sample1", "lib1") -> Some("test_3")) + } + +} + +object SummaryTest { + val testSummary = Map( + "key_1" -> "test_1", + "samples" -> Map( + "sample1" -> Map( + "key_2" -> "test_2", + "libraries" -> Map( + "lib1" -> Map("key_3" -> "test_3") + ) + ) + ) + ) + + val testSummaryFile = File.createTempFile("summary.", ".json") + testSummaryFile.deleteOnExit() + + val writer = new PrintWriter(testSummaryFile) + writer.println(ConfigUtils.mapToJson(testSummary).nospaces) + writer.close() +} diff --git a/biopet-utils/src/test/scala/nl/lumc/sasc/biopet/utils/summary/SummaryValueTest.scala b/biopet-utils/src/test/scala/nl/lumc/sasc/biopet/utils/summary/SummaryValueTest.scala new file mode 100644 index 0000000000000000000000000000000000000000..f2996d522025debf22430cffdc3feae29e7b0737 --- /dev/null +++ b/biopet-utils/src/test/scala/nl/lumc/sasc/biopet/utils/summary/SummaryValueTest.scala @@ -0,0 +1,95 @@ +package nl.lumc.sasc.biopet.utils.summary + +import java.io.{ File, PrintWriter } + +import nl.lumc.sasc.biopet.utils.ConfigUtils +import org.scalatest.Matchers +import org.scalatest.testng.TestNGSuite +import org.testng.annotations.Test + +/** + * Created by pjvanthof on 06/05/16. + */ +class SummaryValueTest extends TestNGSuite with Matchers { + @Test + def testConstructor: Unit = { + val summary = new Summary(SummaryValueTest.testSummaryFile) + new SummaryValue(None).value shouldBe None + new SummaryValue(Some(1)).value shouldBe Some(1) + new SummaryValue(List("key_1"), summary, None, None).value shouldBe Some("test_1") + new SummaryValue(List("key_2"), summary, Some("sample1"), None).value shouldBe Some("test_2") + new SummaryValue(List("key_3"), summary, Some("sample1"), Some("lib1")).value shouldBe Some("test_3") + } + + @Test + def testPlus: Unit = { + new SummaryValue(Some(1.0)) + new SummaryValue(Some(1.0)) shouldBe new SummaryValue(Some(2.0)) + new SummaryValue(Some(1.0)) + new SummaryValue(Some(1)) shouldBe new SummaryValue(Some(2.0)) + new SummaryValue(Some(1)) + new SummaryValue(Some(1)) shouldBe new SummaryValue(Some(2)) + new SummaryValue(Some("1")) + new SummaryValue(Some(1)) shouldBe new SummaryValue(Some(2.0)) + new SummaryValue(Some("1")) + new SummaryValue(Some(1.0)) shouldBe new SummaryValue(Some(2.0)) + new SummaryValue(None) + new SummaryValue(Some(1.0)) shouldBe new SummaryValue(None) + } + + @Test + def testMin: Unit = { + new SummaryValue(Some(1.0)) - new SummaryValue(Some(1.0)) shouldBe new SummaryValue(Some(0.0)) + new SummaryValue(Some(1.0)) - new SummaryValue(Some(1)) shouldBe new SummaryValue(Some(0.0)) + new SummaryValue(Some(1)) - new SummaryValue(Some(1)) shouldBe new SummaryValue(Some(0)) + new SummaryValue(Some("1")) - new SummaryValue(Some(1)) shouldBe new SummaryValue(Some(0.0)) + new SummaryValue(Some("1")) - new SummaryValue(Some(1.0)) shouldBe new SummaryValue(Some(0.0)) + new SummaryValue(None) - new SummaryValue(Some(1.0)) shouldBe new SummaryValue(None) + } + + @Test + def testMultiply: Unit = { + new SummaryValue(Some(1.0)) * new SummaryValue(Some(2.0)) shouldBe new SummaryValue(Some(2.0)) + new SummaryValue(Some(1.0)) * new SummaryValue(Some(2)) shouldBe new SummaryValue(Some(2.0)) + new SummaryValue(Some(1)) * new SummaryValue(Some(2)) shouldBe new SummaryValue(Some(2)) + new SummaryValue(Some("1")) * new SummaryValue(Some(2)) shouldBe new SummaryValue(Some(2.0)) + new SummaryValue(Some("1")) * new SummaryValue(Some(2.0)) shouldBe new SummaryValue(Some(2.0)) + new SummaryValue(None) * new SummaryValue(Some(2.0)) shouldBe new SummaryValue(None) + } + + @Test + def testDivide: Unit = { + new SummaryValue(Some(2.0)) / new SummaryValue(Some(1.0)) shouldBe new SummaryValue(Some(2.0)) + new SummaryValue(Some(2.0)) / new SummaryValue(Some(1)) shouldBe new SummaryValue(Some(2.0)) + new SummaryValue(Some(2)) / new SummaryValue(Some(1)) shouldBe new SummaryValue(Some(2)) + new SummaryValue(Some("2")) / new SummaryValue(Some(1)) shouldBe new SummaryValue(Some(2.0)) + new SummaryValue(Some("2")) / new SummaryValue(Some(1.0)) shouldBe new SummaryValue(Some(2.0)) + new SummaryValue(None) / new SummaryValue(Some(1.0)) shouldBe new SummaryValue(None) + } + + @Test + def testLeft: Unit = { + new SummaryValue(Some(2.0)) % new SummaryValue(Some(1.0)) shouldBe new SummaryValue(Some(0)) + new SummaryValue(Some(2.0)) % new SummaryValue(Some(1)) shouldBe new SummaryValue(Some(0)) + new SummaryValue(Some(2)) % new SummaryValue(Some(1)) shouldBe new SummaryValue(Some(0)) + new SummaryValue(Some("2")) % new SummaryValue(Some(1)) shouldBe new SummaryValue(Some(0)) + new SummaryValue(Some("2")) % new SummaryValue(Some(1.0)) shouldBe new SummaryValue(Some(0)) + new SummaryValue(None) % new SummaryValue(Some(1.0)) shouldBe new SummaryValue(None) + } + +} + +object SummaryValueTest { + val testSummary = Map( + "key_1" -> "test_1", + "samples" -> Map( + "sample1" -> Map( + "key_2" -> "test_2", + "libraries" -> Map( + "lib1" -> Map("key_3" -> "test_3") + ) + ) + ) + ) + + val testSummaryFile = File.createTempFile("summary.", ".json") + testSummaryFile.deleteOnExit() + + val writer = new PrintWriter(testSummaryFile) + writer.println(ConfigUtils.mapToJson(testSummary).nospaces) + writer.close() +} diff --git a/carp/src/main/scala/nl/lumc/sasc/biopet/pipelines/carp/Carp.scala b/carp/src/main/scala/nl/lumc/sasc/biopet/pipelines/carp/Carp.scala index 8d3e47bdd9182a24d493c9083944f2fe98dc5287..d6b0f8c1d4f96a3c5af267341336cc8d0f2c452f 100644 --- a/carp/src/main/scala/nl/lumc/sasc/biopet/pipelines/carp/Carp.scala +++ b/carp/src/main/scala/nl/lumc/sasc/biopet/pipelines/carp/Carp.scala @@ -58,7 +58,7 @@ class Carp(val root: Configurable) extends QScript with MultisampleMappingTrait override def makeSample(id: String) = new Sample(id) class Sample(sampleId: String) extends super.Sample(sampleId) { - override def preProcessBam = Some(createFile(".filter.bam")) + override def preProcessBam = Some(createFile("filter.bam")) val controls: List[String] = config("control", default = Nil) diff --git a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcalling.scala b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcalling.scala index c38cb8564ddb0f9243443bab60f6cb41ceec5b53..4f84d061198ee9449811cd414c72e661ee1e501b 100644 --- a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcalling.scala +++ b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcalling.scala @@ -73,7 +73,7 @@ class ShivaVariantcalling(val root: Configurable) extends QScript /** Variantcallers requested by the config */ protected val configCallers: Set[String] = config("variantcallers") - protected val callers: List[Variantcaller] = { + val callers: List[Variantcaller] = { (for (name <- configCallers) yield { if (!callersList.exists(_.name == name)) Logging.addError(s"variantcaller '$name' does not exist, possible to use: " + callersList.map(_.name).mkString(", ")) diff --git a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/HaplotypeCallerGvcf.scala b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/HaplotypeCallerGvcf.scala index 2f7b8446b4c0a64b1348edb396f43fedcaaab4d3..a08703e95dc4ce61437641df80dd3c8da1641394 100644 --- a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/HaplotypeCallerGvcf.scala +++ b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/HaplotypeCallerGvcf.scala @@ -13,14 +13,21 @@ class HaplotypeCallerGvcf(val root: Configurable) extends Variantcaller { val name = "haplotypecaller_gvcf" protected def defaultPrio = 5 + /** + * Map of sample name -> gvcf. May be empty. + */ + protected var gVcfFiles: Map[String, File] = Map() + + def getGvcfs = gVcfFiles + def biopetScript() { val gvcfFiles = for ((sample, inputBam) <- inputBams) yield { val hc = gatk.HaplotypeCaller.gvcf(this, inputBam, new File(outputDir, sample + ".gvcf.vcf.gz")) add(hc) - hc.out + sample -> hc.out } - val genotypeGVCFs = gatk.GenotypeGVCFs(this, gvcfFiles.toList, outputFile) + val genotypeGVCFs = gatk.GenotypeGVCFs(this, gvcfFiles.values.toList, outputFile) add(genotypeGVCFs) } } diff --git a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/Variantcaller.scala b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/Variantcaller.scala index a2e13a46602dc9ea6bee2b3cfd19e2c251d8f905..c484171187fc31c27a06c7bd69d10433c9a9bb86 100644 --- a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/Variantcaller.scala +++ b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/Variantcaller.scala @@ -28,6 +28,9 @@ trait Variantcaller extends QScript with BiopetQScript with Reference { var namePrefix: String = _ + /** + * Map of samplename -> (preprocessed) bam file + */ var inputBams: Map[String, File] = _ def init() = {} diff --git a/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaTest.scala b/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaTest.scala index eb1d40ece24ce6862023eae3343b84c840ea3e42..976fa91996d820ff38fcf22357a358f46277a848 100644 --- a/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaTest.scala +++ b/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaTest.scala @@ -58,7 +58,9 @@ trait ShivaTestTrait extends TestNGSuite with Matchers { def multisampleCalling: Boolean = true def sampleCalling = false def libraryCalling = false - def dbsnp: Boolean = true + def dbsnp = true + def svCalling = false + def annotation = false @Test(dataProvider = "shivaOptions") def testShiva(f: String, sample1: Boolean, sample2: Boolean, @@ -73,7 +75,9 @@ trait ShivaTestTrait extends TestNGSuite with Matchers { "single_sample_variantcalling" -> sampleCalling, "library_variantcalling" -> libraryCalling, "use_indel_realigner" -> realign, - "use_base_recalibration" -> baseRecalibration), m) + "use_base_recalibration" -> baseRecalibration, + "sv_calling" -> svCalling, + "annotation" -> annotation), m) } @@ -96,6 +100,21 @@ trait ShivaTestTrait extends TestNGSuite with Matchers { pipeline.functions.count(_.isInstanceOf[BaseRecalibrator]) shouldBe (if (dbsnp && baseRecalibration) (numberLibs * 2) else 0) pipeline.functions.count(_.isInstanceOf[PrintReads]) shouldBe (if (dbsnp && baseRecalibration) numberLibs else 0) + pipeline.summarySettings.get("annotation") shouldBe Some(annotation) + pipeline.summarySettings.get("sv_calling") shouldBe Some(svCalling) + + pipeline.samples foreach { + case (sampleId, sample) => + sample.summarySettings.get("single_sample_variantcalling") shouldBe Some(sampleCalling) + sample.summarySettings.get("use_indel_realigner") shouldBe Some(realign) + sample.libraries.foreach { + case (libId, lib) => + lib.summarySettings.get("library_variantcalling") shouldBe Some(libraryCalling) + lib.summarySettings.get("use_indel_realigner") shouldBe Some(realign) + lib.summarySettings.get("use_base_recalibration") shouldBe Some(baseRecalibration && dbsnp) + } + } + pipeline.functions.count(_.isInstanceOf[VcfStats]) shouldBe ( (if (multisampleCalling) 2 else 0) + (if (sampleCalling) numberSamples * 2 else 0) + @@ -125,6 +144,20 @@ class ShivaSampleCallingTest extends ShivaTestTrait { override def baseRecalibrationProvider = Array(false) override def sampleCalling = true } +class ShivaWithSvCallingTest extends ShivaTestTrait { + override def sample1 = Array(true) + override def sample2 = Array(false) + override def realignProvider = Array(false) + override def baseRecalibrationProvider = Array(false) + override def svCalling = true +} +class ShivaWithAnnotationTest extends ShivaTestTrait { + override def sample1 = Array(true) + override def sample2 = Array(false) + override def realignProvider = Array(false) + override def baseRecalibrationProvider = Array(false) + override def annotation = true +} object ShivaTest { val outputDir = Files.createTempDir() @@ -170,7 +203,18 @@ object ShivaTest { "wigtobigwig" -> Map("exe" -> "test"), "md5sum" -> Map("exe" -> "test"), "bgzip" -> Map("exe" -> "test"), - "tabix" -> Map("exe" -> "test") + "tabix" -> Map("exe" -> "test"), + "breakdancerconfig" -> Map("exe" -> "test"), + "breakdancercaller" -> Map("exe" -> "test"), + "pindelconfig" -> Map("exe" -> "test"), + "pindelcaller" -> Map("exe" -> "test"), + "pindelvcf" -> Map("exe" -> "test"), + "clever" -> Map("exe" -> "test"), + "delly" -> Map("exe" -> "test"), + "pysvtools" -> Map( + "exe" -> "test", + "exclusion_regions" -> "test", + "translocations_only" -> false) ) val sample1 = Map(