diff --git a/bam2wig/pom.xml b/bam2wig/pom.xml index 5afb29d42f96f6bda92b4d8c6a35d2b09308f558..82fc7fff5fd91f4c46fd22421fdb54c89ccf3871 100644 --- a/bam2wig/pom.xml +++ b/bam2wig/pom.xml @@ -45,6 +45,18 @@ <artifactId>BiopetExtensions</artifactId> <version>${project.version}</version> </dependency> + <dependency> + <groupId>org.scalatest</groupId> + <artifactId>scalatest_2.10</artifactId> + <version>2.2.1</version> + <scope>test</scope> + </dependency> + <dependency> + <groupId>org.testng</groupId> + <artifactId>testng</artifactId> + <version>6.8</version> + <scope>test</scope> + </dependency> </dependencies> </project> \ No newline at end of file diff --git a/bam2wig/src/test/resources/empty.bam b/bam2wig/src/test/resources/empty.bam new file mode 100644 index 0000000000000000000000000000000000000000..bd4073d80cbf73c689e97d2814f9b1fd00d63895 Binary files /dev/null and b/bam2wig/src/test/resources/empty.bam differ diff --git a/bam2wig/src/test/scala/nl/lumc/sasc/biopet/pipelines/bamtobigwig/BamToChromSizesTest.scala b/bam2wig/src/test/scala/nl/lumc/sasc/biopet/pipelines/bamtobigwig/BamToChromSizesTest.scala new file mode 100644 index 0000000000000000000000000000000000000000..eaf2ae37a5d844ba93669f2fef4f0a5d98ab962d --- /dev/null +++ b/bam2wig/src/test/scala/nl/lumc/sasc/biopet/pipelines/bamtobigwig/BamToChromSizesTest.scala @@ -0,0 +1,30 @@ +package nl.lumc.sasc.biopet.pipelines.bamtobigwig + +import java.io.File +import java.nio.file.Paths + +import org.scalatest.Matchers +import org.scalatest.testng.TestNGSuite +import org.testng.annotations.Test + +import scala.io.Source + +/** + * Created by pjvanthof on 09/05/16. + */ +class BamToChromSizesTest extends TestNGSuite with Matchers { + private def resourcePath(p: String): String = { + Paths.get(getClass.getResource(p).toURI).toString + } + + @Test + def testChromSizes: Unit = { + val bamFile = new File(resourcePath("/empty.bam")) + val bamToChromSizes = new BamToChromSizes(null) + bamToChromSizes.bamFile = bamFile + bamToChromSizes.chromSizesFile = File.createTempFile("chrom.", ".sizes") + bamToChromSizes.chromSizesFile.deleteOnExit() + bamToChromSizes.run() + Source.fromFile(bamToChromSizes.chromSizesFile).getLines().toList shouldBe List("chrQ\t10000", "chrR\t10000") + } +} diff --git a/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/WriteDependencies.scala b/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/WriteDependencies.scala index a43cb3890bcf3521e3be6645b4e175c59f84d143..529e6830db48b454499a7344c8606229669acab9 100644 --- a/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/WriteDependencies.scala +++ b/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/WriteDependencies.scala @@ -30,18 +30,16 @@ import scala.collection.mutable.ListBuffer */ object WriteDependencies extends Logging with Configurable { val root: Configurable = null - private val functionNames: mutable.Map[QFunction, String] = mutable.Map() - - private def createFunctionNames(functions: Seq[QFunction]): Unit = { + private def createFunctionNames(functions: Seq[QFunction]): Map[QFunction, String] = { val cache: mutable.Map[String, Int] = mutable.Map() - for (function <- functions) { + (for (function <- functions) yield { val baseName = function match { case f: Configurable => f.configNamespace case f => f.getClass.getSimpleName } cache += baseName -> (cache.getOrElse(baseName, 0) + 1) - functionNames += function -> s"$baseName-${cache(baseName)}" - } + function -> s"$baseName-${cache(baseName)}" + }).toMap } /** @@ -55,7 +53,7 @@ object WriteDependencies extends Logging with Configurable { val errorOnMissingInput: Boolean = config("error_on_missing_input", false) - createFunctionNames(functions) + val functionNames = createFunctionNames(functions) case class QueueFile(file: File) { private val inputJobs: ListBuffer[QFunction] = ListBuffer() diff --git a/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/WriteDependenciesTest.scala b/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/WriteDependenciesTest.scala new file mode 100644 index 0000000000000000000000000000000000000000..b8dee445d737323893c1f5a85dce8b75f7feec72 --- /dev/null +++ b/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/WriteDependenciesTest.scala @@ -0,0 +1,60 @@ +package nl.lumc.sasc.biopet.core + +import java.io.File +import java.nio.file.Files + +import nl.lumc.sasc.biopet.utils.ConfigUtils +import org.broadinstitute.gatk.queue.function.QFunction +import org.scalatest.Matchers +import org.scalatest.testng.TestNGSuite +import org.testng.annotations.Test + +import scala.io.Source + +/** + * Created by pjvanthof on 09/05/16. + */ +class WriteDependenciesTest extends TestNGSuite with Matchers { + + import WriteDependenciesTest._ + + case class Qfunc(in: Seq[File], out: Seq[File]) extends QFunction { + override def inputs = in + override def outputs = out + override def doneOutputs = out.map(x => new File(x.getParentFile, s".${x.getName}.done")) + override def failOutputs = out.map(x => new File(x.getParentFile, s".${x.getName}.fail")) + jobOutputFile = new File(out.head + ".out") + } + + @Test + def testDeps: Unit = { + val outputFile = File.createTempFile("deps.", ".json") + outputFile.deleteOnExit() + val func1 = Qfunc(file1 :: Nil, file2 :: Nil) + val func2 = Qfunc(file2 :: Nil, file3 :: Nil) + WriteDependencies.writeDependencies(func1 :: func2 :: Nil, outputFile) + val deps = ConfigUtils.fileToConfigMap(outputFile) + deps("jobs") shouldBe a[Map[_, _]] + val jobs = deps("jobs").asInstanceOf[Map[String, Map[String, Any]]] + jobs.count(_._1.contains("Qfunc")) shouldBe 2 + + deps("files") shouldBe a[List[_]] + val files = deps("files").asInstanceOf[List[Map[String, Any]]] + val paths = files.map(x => x.get("path")).flatten + assert(paths.contains(file1.toString)) + assert(paths.contains(file2.toString)) + assert(paths.contains(file3.toString)) + + files.find(_.get("path") == Some(file1.toString)).flatMap(_.get("pipeline_input")) shouldBe Some(true) + files.find(_.get("path") == Some(file2.toString)).flatMap(_.get("pipeline_input")) shouldBe Some(false) + files.find(_.get("path") == Some(file3.toString)).flatMap(_.get("pipeline_input")) shouldBe Some(false) + } +} + +object WriteDependenciesTest { + val tempDir = Files.createTempDirectory("test").toFile + tempDir.deleteOnExit() + val file1 = new File(tempDir, "file1.txt") + val file2 = new File(tempDir, "file2.txt") + val file3 = new File(tempDir, "file3.txt") +} \ No newline at end of file diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/VariantEffectPredictor.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/VariantEffectPredictor.scala index 20b4ae8422936bfc5a8b3b8310074b01b46445a6..281712a2969578652ba11c9ff269771d2d54cfb5 100644 --- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/VariantEffectPredictor.scala +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/VariantEffectPredictor.scala @@ -18,10 +18,9 @@ package nl.lumc.sasc.biopet.extensions import java.io.File import nl.lumc.sasc.biopet.core.summary.Summarizable -import nl.lumc.sasc.biopet.utils.Logging +import nl.lumc.sasc.biopet.utils.{ Logging, VcfUtils, tryToParseNumber } import nl.lumc.sasc.biopet.utils.config.Configurable -import nl.lumc.sasc.biopet.core.{ Version, BiopetCommandLineFunction, Reference } -import nl.lumc.sasc.biopet.utils.tryToParseNumber +import nl.lumc.sasc.biopet.core.{ BiopetCommandLineFunction, Reference, Version } import org.broadinstitute.gatk.utils.commandline.{ Input, Output } import scala.io.Source @@ -164,147 +163,149 @@ class VariantEffectPredictor(val root: Configurable) extends BiopetCommandLineFu } /** Returns command to execute */ - def cmdLine = required(executable) + - required(vepScript) + - required("-i", input) + - required("-o", output) + - conditional(v, "-v") + - conditional(q, "-q") + - conditional(offline, "--offline") + - conditional(noProgress, "--no_progress") + - conditional(everything, "--everything") + - conditional(force, "--force_overwrite") + - conditional(noStats, "--no_stats") + - conditional(statsText, "--stats_text") + - conditional(html, "--html") + - conditional(cache, "--cache") + - conditional(humdiv, "--humdiv") + - conditional(regulatory, "--regulatory") + - conditional(cellType, "--cel_type") + - conditional(phased, "--phased") + - conditional(alleleNumber, "--allele_number") + - conditional(numbers, "--numbers") + - conditional(domains, "--domains") + - conditional(noEscape, "--no_escape") + - conditional(hgvs, "--hgvs") + - conditional(protein, "--protein") + - conditional(symbol, "--symbol") + - conditional(ccds, "--ccds") + - conditional(uniprot, "--uniprot") + - conditional(tsl, "--tsl") + - conditional(canonical, "--canonical") + - conditional(biotype, "--biotype") + - conditional(xrefRefseq, "--xref_refseq") + - conditional(checkExisting, "--check_existing") + - conditional(checkAlleles, "--check_alleles") + - conditional(checkSvs, "--check_svs") + - conditional(gmaf, "--gmaf") + - conditional(maf1kg, "--maf_1kg") + - conditional(mafEsp, "--maf_esp") + - conditional(pubmed, "--pubmed") + - conditional(vcf, "--vcf") + - conditional(json, "--json") + - conditional(gvf, "--gvf") + - conditional(checkRef, "--check_ref") + - conditional(codingOnly, "--coding_only") + - conditional(noIntergenic, "--no_intergenic") + - conditional(pick, "--pick") + - conditional(pickAllele, "--pick_allele") + - conditional(flagPick, "--flag_pick") + - conditional(flagPickAllele, "--flag_pick_allele") + - conditional(perGene, "--per_gene") + - conditional(mostSevere, "--most_severe") + - conditional(summary, "--summary") + - conditional(filterCommon, "--filter_common") + - conditional(checkFrequency, "--check_frequency") + - conditional(allowNonVariant, "--allow_non_variant") + - conditional(database, "--database") + - conditional(genomes, "--genomes") + - conditional(gencodeBasic, "--gencode_basic") + - conditional(refseq, "--refseq") + - conditional(merged, "--merged") + - conditional(allRefseq, "--all_refseq") + - conditional(lrg, "--lrg") + - conditional(noWholeGenome, "--no_whole_genome") + - conditional(skibDbCheck, "--skip_db_check") + - optional("--config", vepConfig) + - optional("--species", species) + - optional("--assembly", assembly) + - optional("--format", format) + - optional("--dir", dir) + - optional("--dir_cache", dirCache) + - optional("--dir_plugins", dirPlugins) + - optional("--fasta", fasta) + - optional("--sift", sift) + - optional("--polyphen", polyphen) + - repeat("--custom", custom) + - repeat("--plugin", plugin) + - optional("--individual", individual) + - optional("--fields", fields) + - optional("--convert", convert) + - optional("--terms", terms) + - optional("--chr", chr) + - optional("--pick_order", pickOrder) + - optional("--freq_pop", freqPop) + - optional("--freq_gt_lt", freqGtLt) + - optional("--freq_filter", freqFilter) + - optional("--filter", filter) + - optional("--host", host) + - optional("--user", user) + - optional("--password", password) + - optional("--registry", registry) + - optional("--build", build) + - optional("--compress", compress) + - optional("--cache_region_size", cacheRegionSize) + - optional("--fork", threads) + - optional("--cache_version", cacheVersion) + - optional("--freq_freq", freqFreq) + - optional("--port", port) + - optional("--db_version", dbVersion) + - optional("--buffer_size", bufferSize) + - optional("--failed", failed) + def cmdLine = { + if (input.exists() && VcfUtils.vcfFileIsEmpty(input)) { + val zcat = Zcat(this, input, output) + zcat.cmdLine + } else required(executable) + + required(vepScript) + + required("-i", input) + + required("-o", output) + + conditional(v, "-v") + + conditional(q, "-q") + + conditional(offline, "--offline") + + conditional(noProgress, "--no_progress") + + conditional(everything, "--everything") + + conditional(force, "--force_overwrite") + + conditional(noStats, "--no_stats") + + conditional(statsText, "--stats_text") + + conditional(html, "--html") + + conditional(cache, "--cache") + + conditional(humdiv, "--humdiv") + + conditional(regulatory, "--regulatory") + + conditional(cellType, "--cel_type") + + conditional(phased, "--phased") + + conditional(alleleNumber, "--allele_number") + + conditional(numbers, "--numbers") + + conditional(domains, "--domains") + + conditional(noEscape, "--no_escape") + + conditional(hgvs, "--hgvs") + + conditional(protein, "--protein") + + conditional(symbol, "--symbol") + + conditional(ccds, "--ccds") + + conditional(uniprot, "--uniprot") + + conditional(tsl, "--tsl") + + conditional(canonical, "--canonical") + + conditional(biotype, "--biotype") + + conditional(xrefRefseq, "--xref_refseq") + + conditional(checkExisting, "--check_existing") + + conditional(checkAlleles, "--check_alleles") + + conditional(checkSvs, "--check_svs") + + conditional(gmaf, "--gmaf") + + conditional(maf1kg, "--maf_1kg") + + conditional(mafEsp, "--maf_esp") + + conditional(pubmed, "--pubmed") + + conditional(vcf, "--vcf") + + conditional(json, "--json") + + conditional(gvf, "--gvf") + + conditional(checkRef, "--check_ref") + + conditional(codingOnly, "--coding_only") + + conditional(noIntergenic, "--no_intergenic") + + conditional(pick, "--pick") + + conditional(pickAllele, "--pick_allele") + + conditional(flagPick, "--flag_pick") + + conditional(flagPickAllele, "--flag_pick_allele") + + conditional(perGene, "--per_gene") + + conditional(mostSevere, "--most_severe") + + conditional(summary, "--summary") + + conditional(filterCommon, "--filter_common") + + conditional(checkFrequency, "--check_frequency") + + conditional(allowNonVariant, "--allow_non_variant") + + conditional(database, "--database") + + conditional(genomes, "--genomes") + + conditional(gencodeBasic, "--gencode_basic") + + conditional(refseq, "--refseq") + + conditional(merged, "--merged") + + conditional(allRefseq, "--all_refseq") + + conditional(lrg, "--lrg") + + conditional(noWholeGenome, "--no_whole_genome") + + conditional(skibDbCheck, "--skip_db_check") + + optional("--config", vepConfig) + + optional("--species", species) + + optional("--assembly", assembly) + + optional("--format", format) + + optional("--dir", dir) + + optional("--dir_cache", dirCache) + + optional("--dir_plugins", dirPlugins) + + optional("--fasta", fasta) + + optional("--sift", sift) + + optional("--polyphen", polyphen) + + repeat("--custom", custom) + + repeat("--plugin", plugin) + + optional("--individual", individual) + + optional("--fields", fields) + + optional("--convert", convert) + + optional("--terms", terms) + + optional("--chr", chr) + + optional("--pick_order", pickOrder) + + optional("--freq_pop", freqPop) + + optional("--freq_gt_lt", freqGtLt) + + optional("--freq_filter", freqFilter) + + optional("--filter", filter) + + optional("--host", host) + + optional("--user", user) + + optional("--password", password) + + optional("--registry", registry) + + optional("--build", build) + + optional("--compress", compress) + + optional("--cache_region_size", cacheRegionSize) + + optional("--fork", threads) + + optional("--cache_version", cacheVersion) + + optional("--freq_freq", freqFreq) + + optional("--port", port) + + optional("--db_version", dbVersion) + + optional("--buffer_size", bufferSize) + + optional("--failed", failed) + } def summaryFiles: Map[String, File] = Map() def summaryStats: Map[String, Any] = { - if (statsText) { - val statsFile: File = new File(output.getAbsolutePath + "_summary.txt") - parseStatsFile(statsFile) - } else { - Map() - } + val statsFile = new File(output.getAbsolutePath + "_summary.txt") + if (statsText && statsFile.exists()) parseStatsFile(statsFile) + else Map() } - def parseStatsFile(file: File): Map[String, Any] = { - val contents = Source.fromFile(file).getLines().toList - val headers = getHeadersFromStatsFile(contents) - headers.foldLeft(Map.empty[String, Any])((acc, x) => acc + (x.replace(" ", "_") -> getBlockFromStatsFile(contents, x))) - } + protected val removeOnConflict = Set("Output_file", "Command_line_options", "Run_time", "Start_time", "End_time", "Input_file_(format)", "Novel_/_existing_variants") + protected val nonNumber = Set("VEP_version_(API)", "Cache/Database", "Species") - def getBlockFromStatsFile(contents: List[String], header: String): Map[String, Any] = { - var inBlock = false - var theMap: Map[String, Any] = Map() - for (x <- contents) { - val stripped = x.stripPrefix("[").stripSuffix("]") - if (stripped == header) { - inBlock = true - } else { - if (inBlock) { - val key = stripped.split('\t').head.replace(" ", "_") - val value = stripped.split('\t').last - theMap ++= Map(key -> tryToParseNumber(value, fallBack = true).getOrElse(value)) - } - } - if (stripped == "") { - inBlock = false + override def resolveSummaryConflict(v1: Any, v2: Any, key: String): Any = { + if (removeOnConflict.contains(key)) None + else if (nonNumber.contains(key)) v1 + else { + (v1, v2) match { + case (x1: Int, x2: Int) => x1 + x2 + case _ => throw new IllegalStateException(s"Value are not Int's, unable to sum them up, key: $key, v1: $v1, v2: $v2") } } - theMap } - def getHeadersFromStatsFile(contents: List[String]): List[String] = { - // block headers are of format '[block]' - contents.filter(_.startsWith("[")).filter(_.endsWith("]")).map(_.stripPrefix("[")).map(_.stripSuffix("]")) - } + def parseStatsFile(file: File): Map[String, Any] = { + val reader = Source.fromFile(file) + val contents = reader.getLines().filter(_ != "").toArray + reader.close() + + def isHeader(line: String) = line.startsWith("[") && line.endsWith("]") + val headers = contents.zipWithIndex + .filter(x => x._1.startsWith("[") && x._1.endsWith("]")) + + (for ((header, headerIndex) <- headers) yield { + val name = header.stripPrefix("[").stripSuffix("]") + name.replaceAll(" ", "_") -> (contents.drop(headerIndex + 1).takeWhile(!isHeader(_)).map { line => + val values = line.split("\t", 2) + values.head.replaceAll(" ", "_") -> tryToParseNumber(values.last).getOrElse(0) + }.toMap) + }).toMap + } } diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CatVariants.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CatVariants.scala index 4d712a8407abb8f09b8e6e7fdcceaba8d11d2bb4..904a0fc08069a53954b7d0525e86d18f199ed37b 100644 --- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CatVariants.scala +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CatVariants.scala @@ -2,11 +2,11 @@ package nl.lumc.sasc.biopet.extensions.gatk import java.io.File -import nl.lumc.sasc.biopet.core.BiopetJavaCommandLineFunction +import nl.lumc.sasc.biopet.core.{ BiopetJavaCommandLineFunction, Reference } import nl.lumc.sasc.biopet.utils.config.Configurable import org.broadinstitute.gatk.utils.commandline.{ Argument, Gather, Input, Output } -class CatVariants(val root: Configurable) extends BiopetJavaCommandLineFunction { +class CatVariants(val root: Configurable) extends BiopetJavaCommandLineFunction with Reference { analysisName = "CatVariants" javaMainClass = "org.broadinstitute.gatk.tools.CatVariants" @@ -44,6 +44,11 @@ class CatVariants(val root: Configurable) extends BiopetJavaCommandLineFunction @Gather(classOf[org.broadinstitute.gatk.queue.function.scattergather.SimpleTextGatherFunction]) var log_to_file: File = _ + override def beforeGraph() = { + super.beforeGraph() + if (reference == null) reference = referenceFasta() + } + override def cmdLine = super.cmdLine + required("-R", reference, spaceSeparated = true, escape = true, format = "%s") + repeat("-V", variant, spaceSeparated = true, escape = true, format = "%s") + diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CombineGVCFs.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CombineGVCFs.scala index ed0065e589e17000a4e4fb9742d4bca71f6622a1..1460f3f6de300d2fed31e573d7d9ea49aa77d854 100644 --- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CombineGVCFs.scala +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CombineGVCFs.scala @@ -69,7 +69,7 @@ class CombineGVCFs(val root: Configurable) extends CommandLineGATK with ScatterG override def cmdLine = super.cmdLine + repeat("-A", annotation, spaceSeparated = true, escape = true, format = "%s") + repeat("-G", group, spaceSeparated = true, escape = true, format = "%s") + - optional(TaggedFile.formatCommandLineParameter("-D", dbsnp), dbsnp, spaceSeparated = true, escape = true, format = "%s") + + optional(TaggedFile.formatCommandLineParameter("-D", dbsnp.getOrElse(null)), dbsnp, spaceSeparated = true, escape = true, format = "%s") + repeat("-V", variant, formatPrefix = TaggedFile.formatCommandLineParameter, spaceSeparated = true, escape = true, format = "%s") + optional("-o", out, spaceSeparated = true, escape = true, format = "%s") + conditional(convertToBasePairResolution, "-bpResolution", escape = true, format = "%s") + diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/GenotypeGVCFs.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/GenotypeGVCFs.scala index 650340d63c1be0ba9195609a616ddddb4abee8ef..ac939b07842e301928a4daf5d9528318b9595bf5 100644 --- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/GenotypeGVCFs.scala +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/GenotypeGVCFs.scala @@ -106,16 +106,12 @@ class GenotypeGVCFs(val root: Configurable) extends CommandLineGATK with Scatter @Gather(enabled = false) private var outputIndex: File = _ - @Output - @Gather(enabled = false) - private var dbsnpIndex: File = _ - override def beforeGraph() { super.beforeGraph() deps ++= variant.filter(orig => orig != null && (!orig.getName.endsWith(".list"))).map(orig => VcfUtils.getVcfIndexFile(orig)) if (out != null && !org.broadinstitute.gatk.utils.io.IOUtils.isSpecialFile(out)) outputIndex = VcfUtils.getVcfIndexFile(out) - dbsnp.foreach(x => dbsnpIndex = VcfUtils.getVcfIndexFile(x)) + dbsnp.foreach(x => deps :+= VcfUtils.getVcfIndexFile(x)) } override def cmdLine = super.cmdLine + @@ -133,7 +129,7 @@ class GenotypeGVCFs(val root: Configurable) extends CommandLineGATK with Scatter optional("-ploidy", sample_ploidy, spaceSeparated = true, escape = true, format = "%s") + repeat("-A", annotation, spaceSeparated = true, escape = true, format = "%s") + repeat("-G", group, spaceSeparated = true, escape = true, format = "%s") + - optional(TaggedFile.formatCommandLineParameter("-D", dbsnp), dbsnp, spaceSeparated = true, escape = true, format = "%s") + + optional(TaggedFile.formatCommandLineParameter("-D", dbsnp.getOrElse(null)), dbsnp, spaceSeparated = true, escape = true, format = "%s") + conditional(filter_reads_with_N_cigar, "-filterRNC", escape = true, format = "%s") + conditional(filter_mismatching_base_and_quals, "-filterMBQ", escape = true, format = "%s") + conditional(filter_bases_not_stored, "-filterNoBases", escape = true, format = "%s") diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/HaplotypeCaller.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/HaplotypeCaller.scala index 9eac2ba9e3d4974e7b479addcb3d0f1dadf5ef56..403bee79fea30f6ec521ce9dd36927be745f8640 100644 --- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/HaplotypeCaller.scala +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/HaplotypeCaller.scala @@ -419,7 +419,7 @@ class HaplotypeCaller(val root: Configurable) extends CommandLineGATK with Scatt optional("-o", out, spaceSeparated = true, escape = true, format = "%s") + optional("-likelihoodEngine", likelihoodCalculationEngine, spaceSeparated = true, escape = true, format = "%s") + optional("-hksr", heterogeneousKmerSizeResolution, spaceSeparated = true, escape = true, format = "%s") + - optional(TaggedFile.formatCommandLineParameter("-D", dbsnp), dbsnp, spaceSeparated = true, escape = true, format = "%s") + + optional(TaggedFile.formatCommandLineParameter("-D", dbsnp.getOrElse(null)), dbsnp, spaceSeparated = true, escape = true, format = "%s") + conditional(dontTrimActiveRegions, "-dontTrimActiveRegions", escape = true, format = "%s") + optional("-maxDiscARExtension", maxDiscARExtension, spaceSeparated = true, escape = true, format = "%s") + optional("-maxGGAARExtension", maxGGAARExtension, spaceSeparated = true, escape = true, format = "%s") + @@ -444,7 +444,7 @@ class HaplotypeCaller(val root: Configurable) extends CommandLineGATK with Scatt repeat("-inputPrior", input_prior, spaceSeparated = true, escape = true, format = "%s") + optional("-ploidy", sample_ploidy, spaceSeparated = true, escape = true, format = "%s") + optional("-gt_mode", genotyping_mode, spaceSeparated = true, escape = true, format = "%s") + - optional(TaggedFile.formatCommandLineParameter("-alleles", alleles), alleles, spaceSeparated = true, escape = true, format = "%s") + + optional(TaggedFile.formatCommandLineParameter("-alleles", alleles.getOrElse(null)), alleles, spaceSeparated = true, escape = true, format = "%s") + optional("-contamination", contamination_fraction_to_filter, spaceSeparated = true, escape = true, format = contamination_fraction_to_filterFormat) + optional("-contaminationFile", contamination_fraction_per_sample_file, spaceSeparated = true, escape = true, format = "%s") + optional("-pnrm", p_nonref_model, spaceSeparated = true, escape = true, format = "%s") + diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/SelectVariants.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/SelectVariants.scala index a1ed7b732f9b72d1660c9ea8c1995e5fc0137a68..37885ac9c1958635d532e4ad2fc70ee5b9817cfb 100644 --- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/SelectVariants.scala +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/SelectVariants.scala @@ -218,8 +218,8 @@ class SelectVariants(val root: Configurable) extends CommandLineGATK with Scatte override def cmdLine = super.cmdLine + required(TaggedFile.formatCommandLineParameter("-V", variant), variant, spaceSeparated = true, escape = true, format = "%s") + - optional(TaggedFile.formatCommandLineParameter("-disc", discordance), discordance, spaceSeparated = true, escape = true, format = "%s") + - optional(TaggedFile.formatCommandLineParameter("-conc", concordance), concordance, spaceSeparated = true, escape = true, format = "%s") + + optional(TaggedFile.formatCommandLineParameter("-disc", discordance.getOrElse(null)), discordance, spaceSeparated = true, escape = true, format = "%s") + + optional(TaggedFile.formatCommandLineParameter("-conc", concordance.getOrElse(null)), concordance, spaceSeparated = true, escape = true, format = "%s") + optional("-o", out, spaceSeparated = true, escape = true, format = "%s") + repeat("-sn", sample_name, spaceSeparated = true, escape = true, format = "%s") + repeat("-se", sample_expressions, spaceSeparated = true, escape = true, format = "%s") + diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/UnifiedGenotyper.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/UnifiedGenotyper.scala index 0edfe5260fe2fb9101fd92d01fbcebce94ba0441..42daf5e64018c1c8c51582cb3f2bbf9844bf1e37 100644 --- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/UnifiedGenotyper.scala +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/UnifiedGenotyper.scala @@ -276,7 +276,7 @@ class UnifiedGenotyper(val root: Configurable) extends CommandLineGATK with Scat conditional(ignoreSNPAlleles, "-ignoreSNPAlleles", escape = true, format = "%s") + conditional(allReadsSP, "-dl", escape = true, format = "%s") + conditional(ignoreLaneInfo, "-ignoreLane", escape = true, format = "%s") + - optional(TaggedFile.formatCommandLineParameter("-referenceCalls", reference_sample_calls), reference_sample_calls, spaceSeparated = true, escape = true, format = "%s") + + optional(TaggedFile.formatCommandLineParameter("-referenceCalls", reference_sample_calls.getOrElse(null)), reference_sample_calls, spaceSeparated = true, escape = true, format = "%s") + optional("-refsample", reference_sample_name, spaceSeparated = true, escape = true, format = "%s") + optional("-minqs", min_quality_score, spaceSeparated = true, escape = true, format = "%s") + optional("-maxqs", max_quality_score, spaceSeparated = true, escape = true, format = "%s") + @@ -291,14 +291,14 @@ class UnifiedGenotyper(val root: Configurable) extends CommandLineGATK with Scat repeat("-inputPrior", input_prior, spaceSeparated = true, escape = true, format = "%s") + optional("-ploidy", sample_ploidy, spaceSeparated = true, escape = true, format = "%s") + optional("-gt_mode", genotyping_mode, spaceSeparated = true, escape = true, format = "%s") + - optional(TaggedFile.formatCommandLineParameter("-alleles", alleles), alleles, spaceSeparated = true, escape = true, format = "%s") + + optional(TaggedFile.formatCommandLineParameter("-alleles", alleles.getOrElse(null)), alleles, spaceSeparated = true, escape = true, format = "%s") + optional("-contamination", contamination_fraction_to_filter, spaceSeparated = true, escape = true, format = contamination_fraction_to_filterFormat) + optional("-contaminationFile", contamination_fraction_per_sample_file, spaceSeparated = true, escape = true, format = "%s") + optional("-pnrm", p_nonref_model, spaceSeparated = true, escape = true, format = "%s") + optional("-logExactCalls", exactcallslog, spaceSeparated = true, escape = true, format = "%s") + optional("-out_mode", output_mode, spaceSeparated = true, escape = true, format = "%s") + conditional(allSitePLs, "-allSitePLs", escape = true, format = "%s") + - optional(TaggedFile.formatCommandLineParameter("-D", dbsnp), dbsnp, spaceSeparated = true, escape = true, format = "%s") + + optional(TaggedFile.formatCommandLineParameter("-D", dbsnp.getOrElse(null)), dbsnp, spaceSeparated = true, escape = true, format = "%s") + repeat("-comp", comp, formatPrefix = TaggedFile.formatCommandLineParameter, spaceSeparated = true, escape = true, format = "%s") + optional("-o", out, spaceSeparated = true, escape = true, format = "%s") + repeat("-onlyEmitSamples", onlyEmitSamples, spaceSeparated = true, escape = true, format = "%s") + diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/VariantAnnotator.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/VariantAnnotator.scala index d98a55a49eb6e34c00588fddde66dc00cddfe610..0e35d9b81050cddd8d762e340c2f984bdd9c59ec 100644 --- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/VariantAnnotator.scala +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/VariantAnnotator.scala @@ -104,8 +104,8 @@ class VariantAnnotator(val root: Configurable) extends CommandLineGATK with Scat override def cmdLine = super.cmdLine + required(TaggedFile.formatCommandLineParameter("-V", variant), variant, spaceSeparated = true, escape = true, format = "%s") + - optional(TaggedFile.formatCommandLineParameter("-snpEffFile", snpEffFile), snpEffFile, spaceSeparated = true, escape = true, format = "%s") + - optional(TaggedFile.formatCommandLineParameter("-D", dbsnp), dbsnp, spaceSeparated = true, escape = true, format = "%s") + + optional(TaggedFile.formatCommandLineParameter("-snpEffFile", snpEffFile.getOrElse()), snpEffFile, spaceSeparated = true, escape = true, format = "%s") + + optional(TaggedFile.formatCommandLineParameter("-D", dbsnp.getOrElse()), dbsnp, spaceSeparated = true, escape = true, format = "%s") + repeat("-comp", comp, formatPrefix = TaggedFile.formatCommandLineParameter, spaceSeparated = true, escape = true, format = "%s") + repeat("-resource", resource, formatPrefix = TaggedFile.formatCommandLineParameter, spaceSeparated = true, escape = true, format = "%s") + optional("-o", out, spaceSeparated = true, escape = true, format = "%s") + diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/VariantEval.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/VariantEval.scala index cf1c362c7dd06b2eec25104fdfd3fb5a06dea2f8..182007076b311d97ff8c58e404eb498bc0f2b90b 100644 --- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/VariantEval.scala +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/VariantEval.scala @@ -135,8 +135,8 @@ class VariantEval(val root: Configurable) extends CommandLineGATK { optional("-o", out, spaceSeparated = true, escape = true, format = "%s") + repeat("-eval", eval, formatPrefix = TaggedFile.formatCommandLineParameter, spaceSeparated = true, escape = true, format = "%s") + repeat("-comp", comp, formatPrefix = TaggedFile.formatCommandLineParameter, spaceSeparated = true, escape = true, format = "%s") + - optional(TaggedFile.formatCommandLineParameter("-D", dbsnp), dbsnp, spaceSeparated = true, escape = true, format = "%s") + - optional(TaggedFile.formatCommandLineParameter("-gold", goldStandard), goldStandard, spaceSeparated = true, escape = true, format = "%s") + + optional(TaggedFile.formatCommandLineParameter("-D", dbsnp.getOrElse(null)), dbsnp, spaceSeparated = true, escape = true, format = "%s") + + optional(TaggedFile.formatCommandLineParameter("-gold", goldStandard.getOrElse(null)), goldStandard, spaceSeparated = true, escape = true, format = "%s") + repeat("-select", select_exps, spaceSeparated = true, escape = true, format = "%s") + repeat("-selectName", select_names, spaceSeparated = true, escape = true, format = "%s") + repeat("-sn", sample, spaceSeparated = true, escape = true, format = "%s") + diff --git a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/VcfWithVcf.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/VcfWithVcf.scala index 71f7e2b12f831e699e991af51792692e43abf68a..6c046a2b4002336fed7f031d2ab1d5e3295a4b31 100644 --- a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/VcfWithVcf.scala +++ b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/VcfWithVcf.scala @@ -24,6 +24,7 @@ import htsjdk.variant.variantcontext.writer.{ AsyncVariantContextWriter, Variant import htsjdk.variant.vcf._ import nl.lumc.sasc.biopet.utils.ToolCommand import nl.lumc.sasc.biopet.utils.VcfUtils.scalaListToJavaObjectArrayList +import nl.lumc.sasc.biopet.utils.BamUtils.SamDictCheck import scala.collection.JavaConversions._ @@ -89,14 +90,14 @@ object VcfWithVcf extends ToolCommand { val header = reader.getFileHeader val vcfDict = header.getSequenceDictionary match { case r if r != null => - r.assertSameDictionary(referenceDict) + r.assertSameDictionary(referenceDict, true) r case _ => referenceDict } val secondHeader = secondaryReader.getFileHeader secondHeader.getSequenceDictionary match { - case r if r != null => r.assertSameDictionary(referenceDict) + case r if r != null => r.assertSameDictionary(referenceDict, true) case _ => } @@ -123,6 +124,7 @@ object VcfWithVcf extends ToolCommand { var counter = 0 for (record <- reader) { + require(vcfDict.getSequence(record.getContig) != null, s"Contig ${record.getContig} does not exist on reference") val secondaryRecords = getSecondaryRecords(secondaryReader, record, commandArgs.matchAllele) val fieldMap = createFieldMap(commandArgs.fields, secondaryRecords) @@ -201,7 +203,6 @@ object VcfWithVcf extends ToolCommand { } case FieldMethod.unique => scalaListToJavaObjectArrayList(attribute._2.distinct) case _ => { - print(attribute._2.getClass.toString) scalaListToJavaObjectArrayList(attribute._2) } }) diff --git a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/VepNormalizer.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/VepNormalizer.scala index 50893046b949792ddccedf00752b34c632dc00ee..89d87c2825ee2b730d698deea8b6e3cad5a1dd27 100644 --- a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/VepNormalizer.scala +++ b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/VepNormalizer.scala @@ -24,7 +24,6 @@ import htsjdk.variant.vcf._ import nl.lumc.sasc.biopet.utils.ToolCommand import scala.collection.JavaConversions._ -import scala.collection.mutable.{ Map => MMap } /** * This tool parses a VEP annotated VCF into a standard VCF file. @@ -57,31 +56,37 @@ object VepNormalizer extends ToolCommand { } val header = reader.getFileHeader - logger.debug("Checking for CSQ tag") - csqCheck(header) - logger.debug("CSQ tag OK") - logger.debug("Checkion VCF version") - versionCheck(header) - logger.debug("VCF version OK") - logger.debug("Parsing header") - val newInfos = parseCsq(header) - header.setWriteCommandLine(true) val writer = new AsyncVariantContextWriter(new VariantContextWriterBuilder(). setOutputFile(output).setReferenceDictionary(header.getSequenceDictionary) build ()) - for (info <- newInfos) { - val tmpheaderline = new VCFInfoHeaderLine(info, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "A VEP annotation") - header.addMetaDataLine(tmpheaderline) - } - logger.debug("Header parsing done") + if (reader.iterator().hasNext) { + logger.debug("Checking for CSQ tag") + csqCheck(header) + logger.debug("CSQ tag OK") + logger.debug("Checkion VCF version") + versionCheck(header) + logger.debug("VCF version OK") + logger.debug("Parsing header") + val newInfos = parseCsq(header) + header.setWriteCommandLine(true) + + for (info <- newInfos) { + val tmpheaderline = new VCFInfoHeaderLine(info, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "A VEP annotation") + header.addMetaDataLine(tmpheaderline) + } + logger.debug("Header parsing done") - logger.debug("Writing header to file") + logger.debug("Writing header to file") - writer.writeHeader(header) - logger.debug("Wrote header to file") + writer.writeHeader(header) + logger.debug("Wrote header to file") - normalize(reader, writer, newInfos, commandArgs.mode, commandArgs.removeCSQ) + normalize(reader, writer, newInfos, commandArgs.mode, commandArgs.removeCSQ) + } else { + logger.debug("No variants found, skipping normalize step") + writer.writeHeader(header) + } writer.close() logger.debug("Closed writer") reader.close() @@ -91,6 +96,7 @@ object VepNormalizer extends ToolCommand { /** * Normalizer + * * @param reader input VCF VCFFileReader * @param writer output VCF AsyncVariantContextWriter * @param newInfos array of string containing names of new info fields @@ -118,6 +124,7 @@ object VepNormalizer extends ToolCommand { /** * Checks whether header has a CSQ tag + * * @param header VCF header */ def csqCheck(header: VCFHeader) = { @@ -131,6 +138,7 @@ object VepNormalizer extends ToolCommand { * Checks whether version of input VCF is at least 4.0 * VEP is known to cause issues below 4.0 * Throws exception if not + * * @param header VCFHeader of input VCF */ def versionCheck(header: VCFHeader) = { @@ -149,6 +157,7 @@ object VepNormalizer extends ToolCommand { /** * Parses the CSQ tag in the header + * * @param header the VCF header * @return list of strings with new info fields */ @@ -160,6 +169,7 @@ object VepNormalizer extends ToolCommand { /** * Explode a single VEP-annotated record to multiple normal records * Based on the number of annotated transcripts in the CSQ tag + * * @param record the record as a VariantContext object * @param csqInfos An array with names of new info tags * @return An array with the new records diff --git a/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/BamUtils.scala b/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/BamUtils.scala index df8068c0c7e28b57e3344d736aa7cc04abe2b8f2..2cedc0ef3bddbb81d634c25c466d22843ccd1ed2 100644 --- a/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/BamUtils.scala +++ b/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/BamUtils.scala @@ -17,7 +17,7 @@ package nl.lumc.sasc.biopet.utils import java.io.File -import htsjdk.samtools.{ SamReader, SamReaderFactory } +import htsjdk.samtools.{ SAMSequenceDictionary, SamReader, SamReaderFactory } import nl.lumc.sasc.biopet.utils.intervals.{ BedRecord, BedRecordList } import scala.collection.JavaConversions._ @@ -129,11 +129,8 @@ object BamUtils { val counts = bamInsertSizes.flatMap(x => x) // avoid division by zero - if (counts.size != 0) { - counts.sum / counts.size - } else { - 0 - } + if (counts.size != 0) counts.sum / counts.size + else 0 } /** @@ -146,4 +143,21 @@ object BamUtils { bamFile -> sampleBamInsertSize(bamFile, samplingSize, binSize) }.toMap + /** This class will add functionality to [[SAMSequenceDictionary]] */ + implicit class SamDictCheck(samDics: SAMSequenceDictionary) { + /** + * This method will check if all contig and sizes are the same without looking at the order of the contigs + * + * @throws AssertionError + * @param that Dict to compare to + * @param ignoreOrder When true the order of the contig does not matter + */ + def assertSameDictionary(that: SAMSequenceDictionary, ignoreOrder: Boolean): Unit = { + if (ignoreOrder) { + assert(samDics.getReferenceLength == that.getReferenceLength) + val thisContigNames = samDics.getSequences.map(x => (x.getSequenceName, x.getSequenceLength)).sorted.toSet + assert(thisContigNames == that.getSequences.map(x => (x.getSequenceName, x.getSequenceLength)).sorted.toSet) + } else samDics.assertSameDictionary(that) + } + } } diff --git a/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/VcfUtils.scala b/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/VcfUtils.scala index 5b100606f0dc00bcd637c667871cb5c2d8da99cd..d4a05a7fd9beb48d01040f6188cf67bdef080043 100644 --- a/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/VcfUtils.scala +++ b/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/VcfUtils.scala @@ -134,4 +134,11 @@ object VcfUtils { else if (name.endsWith(".vcf.gz")) new File(name + ".tbi") else throw new IllegalArgumentException(s"File given is no vcf file: $vcfFile") } + + def vcfFileIsEmpty(file: File): Boolean = { + val reader = new VCFFileReader(file, false) + val hasNext = reader.iterator().hasNext + reader.close() + !hasNext + } } diff --git a/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/summary/Summary.scala b/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/summary/Summary.scala index 6b863f81d16d4f26754ea87ddf5c703f35fd4588..301fdf1f31bb8475c1092fb76b69e2c667448ec1 100644 --- a/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/summary/Summary.scala +++ b/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/summary/Summary.scala @@ -61,7 +61,7 @@ class Summary(file: File) { } /** Executes given function for each sample */ - def getSampleValues(function: (Summary, String) => Option[Any]): Map[String, Option[Any]] = { + def getSampleValues[T](function: (Summary, String) => Option[T]): Map[String, Option[T]] = { (for (sample <- samples) yield sample -> function(this, sample)).toMap } @@ -95,7 +95,7 @@ class Summary(file: File) { * @param function Function to execute * @return (sampleId, libId) -> value */ - def getLibraryValues(function: (Summary, String, String) => Option[Any]): Map[(String, String), Option[Any]] = { + def getLibraryValues[T](function: (Summary, String, String) => Option[T]): Map[(String, String), Option[T]] = { (for (sample <- samples; lib <- libraries.getOrElse(sample, Set())) yield { (sample, lib) -> function(this, sample, lib) }).toMap diff --git a/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/summary/SummaryValue.scala b/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/summary/SummaryValue.scala index dbce03915e2ed8512e9e737f9189147670b76a5e..9ad67419137321027aceaa14a88781a32c74f0ff 100644 --- a/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/summary/SummaryValue.scala +++ b/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/summary/SummaryValue.scala @@ -33,7 +33,6 @@ case class SummaryValue(value: Option[Any]) { }) } - //TODO: Calculations are not yet used somewhere, needs more testing def +(that: SummaryValue): SummaryValue = { (this.value, that.value) match { case (Some(a: Double), Some(b)) => SummaryValue(Some(a + b.toString.toDouble)) diff --git a/biopet-utils/src/test/scala/nl/lumc/sasc/biopet/utils/IoUtilsTest.scala b/biopet-utils/src/test/scala/nl/lumc/sasc/biopet/utils/IoUtilsTest.scala new file mode 100644 index 0000000000000000000000000000000000000000..cd319fee6bcb0da8bccb7a1c474f1c566ab0dab5 --- /dev/null +++ b/biopet-utils/src/test/scala/nl/lumc/sasc/biopet/utils/IoUtilsTest.scala @@ -0,0 +1,81 @@ +package nl.lumc.sasc.biopet.utils + +import java.io.{ File, FileNotFoundException, PrintWriter } +import java.nio.file.Files + +import org.scalatest.Matchers +import org.scalatest.testng.TestNGSuite +import org.testng.annotations.Test + +import scala.io.Source + +/** + * Created by pjvanthof on 05/05/16. + */ +class IoUtilsTest extends TestNGSuite with Matchers { + + def createTempTestFile(file: File): Unit = { + file.getParentFile.mkdirs() + val writer = new PrintWriter(file) + writer.println("test") + writer.close() + file.deleteOnExit() + } + + @Test + def testCopyFile: Unit = { + val temp1 = File.createTempFile("test.", ".txt") + temp1.deleteOnExit() + val temp2 = File.createTempFile("test.", ".txt") + temp2.deleteOnExit() + createTempTestFile(temp1) + IoUtils.copyFile(temp1, temp2) + val reader = Source.fromFile(temp2) + reader.getLines().toList shouldBe List("test") + reader.close() + } + + @Test + def testCopyFileNonExistingDir: Unit = { + val temp1 = File.createTempFile("test.", ".txt") + val tempDir = new File(Files.createTempDirectory("test").toFile, "non-exist") + tempDir.deleteOnExit() + tempDir shouldNot exist + val temp2 = new File(tempDir, "test.txt") + createTempTestFile(temp1) + intercept[FileNotFoundException] { + IoUtils.copyFile(temp1, temp2) + } + IoUtils.copyFile(temp1, temp2, true) + val reader = Source.fromFile(temp2) + reader.getLines().toList shouldBe List("test") + reader.close() + } + + @Test + def testCopyDir: Unit = { + val tempDir1 = Files.createTempDirectory("test").toFile + tempDir1.deleteOnExit() + val tempDir2 = Files.createTempDirectory("test").toFile + tempDir2.deleteOnExit() + val relativePaths: List[String] = List( + "test1.txt", + "test2.txt", + "dir1" + File.separator + "test1.txt", + "dir1" + File.separator + "test2.txt", + "dir2" + File.separator + "test1.txt", + "dir2" + File.separator + "test2.txt") + relativePaths.foreach { x => + createTempTestFile(new File(tempDir1, x)) + new File(tempDir2, x) shouldNot exist + } + IoUtils.copyDir(tempDir1, tempDir2) + relativePaths.foreach { x => + val file = new File(tempDir2, x) + file should exist + val reader = Source.fromFile(file) + reader.getLines().toList shouldBe List("test") + reader.close() + } + } +} diff --git a/biopet-utils/src/test/scala/nl/lumc/sasc/biopet/utils/summary/SummaryTest.scala b/biopet-utils/src/test/scala/nl/lumc/sasc/biopet/utils/summary/SummaryTest.scala new file mode 100644 index 0000000000000000000000000000000000000000..45f650496cfa4e04784c4b4a9748d5b6ab18be5f --- /dev/null +++ b/biopet-utils/src/test/scala/nl/lumc/sasc/biopet/utils/summary/SummaryTest.scala @@ -0,0 +1,96 @@ +package nl.lumc.sasc.biopet.utils.summary + +import java.io.{ File, PrintWriter } + +import nl.lumc.sasc.biopet.utils.ConfigUtils +import org.scalatest.Matchers +import org.scalatest.testng.TestNGSuite +import org.testng.annotations.Test + +/** + * Created by pjvanthof on 06/05/16. + */ +class SummaryTest extends TestNGSuite with Matchers { + @Test + def testSamples: Unit = { + val summary = new Summary(SummaryTest.testSummaryFile) + summary.samples shouldBe Set("sample1") + } + + @Test + def testLibraries: Unit = { + val summary = new Summary(SummaryTest.testSummaryFile) + summary.libraries shouldBe Map("sample1" -> Set("lib1")) + } + + @Test + def testValue: Unit = { + val summary = new Summary(SummaryTest.testSummaryFile) + summary.getValue("key_1") shouldBe Some("test_1") + summary.getValue("key_x") shouldBe None + summary.getValue("samples", "sample1", "key_2") shouldBe Some("test_2") + summary.getValue("samples", "sample1", "libraries", "lib1", "key_3") shouldBe Some("test_3") + + summary.getValue(Some("sample1"), None, "key_2") shouldBe Some("test_2") + summary.getValue(Some("sample1"), None, "key_x") shouldBe None + summary.getValue(Some("sample1"), Some("lib1"), "key_3") shouldBe Some("test_3") + summary.getValue(Some("sample1"), Some("lib1"), "key_x") shouldBe None + summary.getValue(None, None, "key_1") shouldBe Some("test_1") + summary.getValue(None, None, "key_x") shouldBe None + } + + @Test + def testSampleValue: Unit = { + val summary = new Summary(SummaryTest.testSummaryFile) + summary.getSampleValue("sample1", "key_2") shouldBe Some("test_2") + summary.getSampleValue("sample1", "key_x") shouldBe None + summary.getSampleValue("samplex", "key_x") shouldBe None + } + + @Test + def testSampleValues: Unit = { + val summary = new Summary(SummaryTest.testSummaryFile) + summary.getSampleValues("key_2") shouldBe Map("sample1" -> Some("test_2")) + summary.getSampleValues("key_x") shouldBe Map("sample1" -> None) + summary.getSampleValues((summary, sample) => summary.getSampleValue(sample, "key_2")) shouldBe Map("sample1" -> Some("test_2")) + + } + + @Test + def testLibraryValue: Unit = { + val summary = new Summary(SummaryTest.testSummaryFile) + summary.getLibraryValue("sample1", "lib1", "key_3") shouldBe Some("test_3") + summary.getLibraryValue("sample1", "lib1", "key_x") shouldBe None + summary.getLibraryValue("samplex", "libx", "key_x") shouldBe None + } + + @Test + def testLibraryValues: Unit = { + val summary = new Summary(SummaryTest.testSummaryFile) + summary.getLibraryValues("key_3") shouldBe Map(("sample1", "lib1") -> Some("test_3")) + summary.getLibraryValues("key_x") shouldBe Map(("sample1", "lib1") -> None) + summary.getLibraryValues((summary, sample, lib) => summary.getLibraryValue(sample, lib, "key_3")) shouldBe Map(("sample1", "lib1") -> Some("test_3")) + } + +} + +object SummaryTest { + val testSummary = Map( + "key_1" -> "test_1", + "samples" -> Map( + "sample1" -> Map( + "key_2" -> "test_2", + "libraries" -> Map( + "lib1" -> Map("key_3" -> "test_3") + ) + ) + ) + ) + + val testSummaryFile = File.createTempFile("summary.", ".json") + testSummaryFile.deleteOnExit() + + val writer = new PrintWriter(testSummaryFile) + writer.println(ConfigUtils.mapToJson(testSummary).nospaces) + writer.close() +} diff --git a/biopet-utils/src/test/scala/nl/lumc/sasc/biopet/utils/summary/SummaryValueTest.scala b/biopet-utils/src/test/scala/nl/lumc/sasc/biopet/utils/summary/SummaryValueTest.scala new file mode 100644 index 0000000000000000000000000000000000000000..f2996d522025debf22430cffdc3feae29e7b0737 --- /dev/null +++ b/biopet-utils/src/test/scala/nl/lumc/sasc/biopet/utils/summary/SummaryValueTest.scala @@ -0,0 +1,95 @@ +package nl.lumc.sasc.biopet.utils.summary + +import java.io.{ File, PrintWriter } + +import nl.lumc.sasc.biopet.utils.ConfigUtils +import org.scalatest.Matchers +import org.scalatest.testng.TestNGSuite +import org.testng.annotations.Test + +/** + * Created by pjvanthof on 06/05/16. + */ +class SummaryValueTest extends TestNGSuite with Matchers { + @Test + def testConstructor: Unit = { + val summary = new Summary(SummaryValueTest.testSummaryFile) + new SummaryValue(None).value shouldBe None + new SummaryValue(Some(1)).value shouldBe Some(1) + new SummaryValue(List("key_1"), summary, None, None).value shouldBe Some("test_1") + new SummaryValue(List("key_2"), summary, Some("sample1"), None).value shouldBe Some("test_2") + new SummaryValue(List("key_3"), summary, Some("sample1"), Some("lib1")).value shouldBe Some("test_3") + } + + @Test + def testPlus: Unit = { + new SummaryValue(Some(1.0)) + new SummaryValue(Some(1.0)) shouldBe new SummaryValue(Some(2.0)) + new SummaryValue(Some(1.0)) + new SummaryValue(Some(1)) shouldBe new SummaryValue(Some(2.0)) + new SummaryValue(Some(1)) + new SummaryValue(Some(1)) shouldBe new SummaryValue(Some(2)) + new SummaryValue(Some("1")) + new SummaryValue(Some(1)) shouldBe new SummaryValue(Some(2.0)) + new SummaryValue(Some("1")) + new SummaryValue(Some(1.0)) shouldBe new SummaryValue(Some(2.0)) + new SummaryValue(None) + new SummaryValue(Some(1.0)) shouldBe new SummaryValue(None) + } + + @Test + def testMin: Unit = { + new SummaryValue(Some(1.0)) - new SummaryValue(Some(1.0)) shouldBe new SummaryValue(Some(0.0)) + new SummaryValue(Some(1.0)) - new SummaryValue(Some(1)) shouldBe new SummaryValue(Some(0.0)) + new SummaryValue(Some(1)) - new SummaryValue(Some(1)) shouldBe new SummaryValue(Some(0)) + new SummaryValue(Some("1")) - new SummaryValue(Some(1)) shouldBe new SummaryValue(Some(0.0)) + new SummaryValue(Some("1")) - new SummaryValue(Some(1.0)) shouldBe new SummaryValue(Some(0.0)) + new SummaryValue(None) - new SummaryValue(Some(1.0)) shouldBe new SummaryValue(None) + } + + @Test + def testMultiply: Unit = { + new SummaryValue(Some(1.0)) * new SummaryValue(Some(2.0)) shouldBe new SummaryValue(Some(2.0)) + new SummaryValue(Some(1.0)) * new SummaryValue(Some(2)) shouldBe new SummaryValue(Some(2.0)) + new SummaryValue(Some(1)) * new SummaryValue(Some(2)) shouldBe new SummaryValue(Some(2)) + new SummaryValue(Some("1")) * new SummaryValue(Some(2)) shouldBe new SummaryValue(Some(2.0)) + new SummaryValue(Some("1")) * new SummaryValue(Some(2.0)) shouldBe new SummaryValue(Some(2.0)) + new SummaryValue(None) * new SummaryValue(Some(2.0)) shouldBe new SummaryValue(None) + } + + @Test + def testDivide: Unit = { + new SummaryValue(Some(2.0)) / new SummaryValue(Some(1.0)) shouldBe new SummaryValue(Some(2.0)) + new SummaryValue(Some(2.0)) / new SummaryValue(Some(1)) shouldBe new SummaryValue(Some(2.0)) + new SummaryValue(Some(2)) / new SummaryValue(Some(1)) shouldBe new SummaryValue(Some(2)) + new SummaryValue(Some("2")) / new SummaryValue(Some(1)) shouldBe new SummaryValue(Some(2.0)) + new SummaryValue(Some("2")) / new SummaryValue(Some(1.0)) shouldBe new SummaryValue(Some(2.0)) + new SummaryValue(None) / new SummaryValue(Some(1.0)) shouldBe new SummaryValue(None) + } + + @Test + def testLeft: Unit = { + new SummaryValue(Some(2.0)) % new SummaryValue(Some(1.0)) shouldBe new SummaryValue(Some(0)) + new SummaryValue(Some(2.0)) % new SummaryValue(Some(1)) shouldBe new SummaryValue(Some(0)) + new SummaryValue(Some(2)) % new SummaryValue(Some(1)) shouldBe new SummaryValue(Some(0)) + new SummaryValue(Some("2")) % new SummaryValue(Some(1)) shouldBe new SummaryValue(Some(0)) + new SummaryValue(Some("2")) % new SummaryValue(Some(1.0)) shouldBe new SummaryValue(Some(0)) + new SummaryValue(None) % new SummaryValue(Some(1.0)) shouldBe new SummaryValue(None) + } + +} + +object SummaryValueTest { + val testSummary = Map( + "key_1" -> "test_1", + "samples" -> Map( + "sample1" -> Map( + "key_2" -> "test_2", + "libraries" -> Map( + "lib1" -> Map("key_3" -> "test_3") + ) + ) + ) + ) + + val testSummaryFile = File.createTempFile("summary.", ".json") + testSummaryFile.deleteOnExit() + + val writer = new PrintWriter(testSummaryFile) + writer.println(ConfigUtils.mapToJson(testSummary).nospaces) + writer.close() +} diff --git a/carp/src/main/scala/nl/lumc/sasc/biopet/pipelines/carp/Carp.scala b/carp/src/main/scala/nl/lumc/sasc/biopet/pipelines/carp/Carp.scala index 8d3e47bdd9182a24d493c9083944f2fe98dc5287..d6b0f8c1d4f96a3c5af267341336cc8d0f2c452f 100644 --- a/carp/src/main/scala/nl/lumc/sasc/biopet/pipelines/carp/Carp.scala +++ b/carp/src/main/scala/nl/lumc/sasc/biopet/pipelines/carp/Carp.scala @@ -58,7 +58,7 @@ class Carp(val root: Configurable) extends QScript with MultisampleMappingTrait override def makeSample(id: String) = new Sample(id) class Sample(sampleId: String) extends super.Sample(sampleId) { - override def preProcessBam = Some(createFile(".filter.bam")) + override def preProcessBam = Some(createFile("filter.bam")) val controls: List[String] = config("control", default = Nil) diff --git a/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Fastqc.scala b/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Fastqc.scala index de0690dbba6a19e279e89113dec4a44729ea9886..1a8df14734eccaa8adecdb92401cc2bcd04e3715 100644 --- a/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Fastqc.scala +++ b/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Fastqc.scala @@ -35,6 +35,7 @@ class Fastqc(root: Configurable) extends nl.lumc.sasc.biopet.extensions.Fastqc(r /** Allow reporting of all found (potentially adapter) sequences in the FastQC */ var sensitiveAdapterSearch: Boolean = config("sensitiveAdapterSearch", default = false) + var enableRCtrimming: Boolean = config("enableRCtrimming", default = false) /** Class for storing a single FastQC module result */ protected case class FastQCModule(name: String, status: String, lines: Seq[String]) @@ -188,6 +189,11 @@ class Fastqc(root: Configurable) extends nl.lumc.sasc.biopet.extensions.Fastqc(r val fromKnownList: Set[AdapterSequence] = (adapterSet ++ contaminantSet) .filter(x => foundAdapterNames.exists(_.startsWith(x.name))) + val fromKnownListRC: Set[AdapterSequence] = if (enableRCtrimming) fromKnownList.map { + x => AdapterSequence(x.name + "_RC", reverseComplement(x.seq)) + } + else Set.empty + // list all sequences found by FastQC val fastQCFoundSequences: Seq[AdapterSequence] = if (sensitiveAdapterSearch) { qcModules.get("Overrepresented sequences") match { diff --git a/flexiprep/src/test/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/CutadaptTest.scala b/flexiprep/src/test/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/CutadaptTest.scala index 2b537d9767cbc1ddbd9f2e528a1c122dfe973d7c..b9f85239a5e7460708d47dd049d44dcab9878e41 100644 --- a/flexiprep/src/test/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/CutadaptTest.scala +++ b/flexiprep/src/test/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/CutadaptTest.scala @@ -27,7 +27,7 @@ class CutadaptTest extends FastqcV0101Test { val fqc = new Fastqc(null) fqc.output = outputv0101 fqc.contaminants = Option(resourceFile("fqc_contaminants_v0112.txt")) - // fqc.beforeGraph() + fqc.enableRCtrimming = true fqc } diff --git a/flexiprep/src/test/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/FastqcV0101Test.scala b/flexiprep/src/test/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/FastqcV0101Test.scala index 3cf24e8c60a570e8e51fe528ece4f81d0b66a01a..b519dc2f04a8ca38b3340d259c323b7d15747078 100644 --- a/flexiprep/src/test/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/FastqcV0101Test.scala +++ b/flexiprep/src/test/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/FastqcV0101Test.scala @@ -73,11 +73,15 @@ class FastqcV0101Test extends TestNGSuite with Matchers { fqc.contaminants = Option(resourceFile("fqc_contaminants_v0101.txt")) // found adapters also contain the adapters in reverse complement (done within flexiprep/fastqc only) val adapters = fqc.foundAdapters - // we find 1 adapter which comes with the Reverse Complement counterpart - adapters.size shouldBe 2 - adapters.head.name shouldEqual "TruSeq Adapter, Index 1_RC" - adapters.head.seq shouldEqual "CAAGCAGAAGACGGCATACGAGATCGTGATGTGACTGGAGTTCAGACGTGTGCTCTTCCGATC" + if (fqc.enableRCtrimming) { + // we find 1 adapter which comes with the Reverse Complement counterpart + adapters.size shouldBe 2 + adapters.head.name shouldEqual "TruSeq Adapter, Index 1_RC" + adapters.head.seq shouldEqual "CAAGCAGAAGACGGCATACGAGATCGTGATGTGACTGGAGTTCAGACGTGTGCTCTTCCGATC" + } else { + adapters.size shouldBe 1 + } adapters.last.name shouldEqual "TruSeq Adapter, Index 1" adapters.last.seq shouldEqual "GATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTCGTATGCCGTCTTCTGCTTG" diff --git a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/Shiva.scala b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/Shiva.scala index ed0e1318d96c615346172b5e1add0df6dc4476d0..582afcab9af84048cf5393341243a6aca234cc2e 100644 --- a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/Shiva.scala +++ b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/Shiva.scala @@ -178,7 +178,7 @@ class Shiva(val root: Configurable) extends QScript with MultisampleMappingTrait annotation.foreach { toucan => toucan.outputDir = new File(outputDir, "annotation") - toucan.inputVCF = vc.finalFile + toucan.inputVcf = vc.finalFile add(toucan) } }) diff --git a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcalling.scala b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcalling.scala index c38cb8564ddb0f9243443bab60f6cb41ceec5b53..4f84d061198ee9449811cd414c72e661ee1e501b 100644 --- a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcalling.scala +++ b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcalling.scala @@ -73,7 +73,7 @@ class ShivaVariantcalling(val root: Configurable) extends QScript /** Variantcallers requested by the config */ protected val configCallers: Set[String] = config("variantcallers") - protected val callers: List[Variantcaller] = { + val callers: List[Variantcaller] = { (for (name <- configCallers) yield { if (!callersList.exists(_.name == name)) Logging.addError(s"variantcaller '$name' does not exist, possible to use: " + callersList.map(_.name).mkString(", ")) diff --git a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/HaplotypeCallerGvcf.scala b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/HaplotypeCallerGvcf.scala index 2f7b8446b4c0a64b1348edb396f43fedcaaab4d3..a08703e95dc4ce61437641df80dd3c8da1641394 100644 --- a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/HaplotypeCallerGvcf.scala +++ b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/HaplotypeCallerGvcf.scala @@ -13,14 +13,21 @@ class HaplotypeCallerGvcf(val root: Configurable) extends Variantcaller { val name = "haplotypecaller_gvcf" protected def defaultPrio = 5 + /** + * Map of sample name -> gvcf. May be empty. + */ + protected var gVcfFiles: Map[String, File] = Map() + + def getGvcfs = gVcfFiles + def biopetScript() { val gvcfFiles = for ((sample, inputBam) <- inputBams) yield { val hc = gatk.HaplotypeCaller.gvcf(this, inputBam, new File(outputDir, sample + ".gvcf.vcf.gz")) add(hc) - hc.out + sample -> hc.out } - val genotypeGVCFs = gatk.GenotypeGVCFs(this, gvcfFiles.toList, outputFile) + val genotypeGVCFs = gatk.GenotypeGVCFs(this, gvcfFiles.values.toList, outputFile) add(genotypeGVCFs) } } diff --git a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/Variantcaller.scala b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/Variantcaller.scala index a2e13a46602dc9ea6bee2b3cfd19e2c251d8f905..c484171187fc31c27a06c7bd69d10433c9a9bb86 100644 --- a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/Variantcaller.scala +++ b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/Variantcaller.scala @@ -28,6 +28,9 @@ trait Variantcaller extends QScript with BiopetQScript with Reference { var namePrefix: String = _ + /** + * Map of samplename -> (preprocessed) bam file + */ var inputBams: Map[String, File] = _ def init() = {} diff --git a/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaTest.scala b/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaTest.scala index eb1d40ece24ce6862023eae3343b84c840ea3e42..976fa91996d820ff38fcf22357a358f46277a848 100644 --- a/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaTest.scala +++ b/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaTest.scala @@ -58,7 +58,9 @@ trait ShivaTestTrait extends TestNGSuite with Matchers { def multisampleCalling: Boolean = true def sampleCalling = false def libraryCalling = false - def dbsnp: Boolean = true + def dbsnp = true + def svCalling = false + def annotation = false @Test(dataProvider = "shivaOptions") def testShiva(f: String, sample1: Boolean, sample2: Boolean, @@ -73,7 +75,9 @@ trait ShivaTestTrait extends TestNGSuite with Matchers { "single_sample_variantcalling" -> sampleCalling, "library_variantcalling" -> libraryCalling, "use_indel_realigner" -> realign, - "use_base_recalibration" -> baseRecalibration), m) + "use_base_recalibration" -> baseRecalibration, + "sv_calling" -> svCalling, + "annotation" -> annotation), m) } @@ -96,6 +100,21 @@ trait ShivaTestTrait extends TestNGSuite with Matchers { pipeline.functions.count(_.isInstanceOf[BaseRecalibrator]) shouldBe (if (dbsnp && baseRecalibration) (numberLibs * 2) else 0) pipeline.functions.count(_.isInstanceOf[PrintReads]) shouldBe (if (dbsnp && baseRecalibration) numberLibs else 0) + pipeline.summarySettings.get("annotation") shouldBe Some(annotation) + pipeline.summarySettings.get("sv_calling") shouldBe Some(svCalling) + + pipeline.samples foreach { + case (sampleId, sample) => + sample.summarySettings.get("single_sample_variantcalling") shouldBe Some(sampleCalling) + sample.summarySettings.get("use_indel_realigner") shouldBe Some(realign) + sample.libraries.foreach { + case (libId, lib) => + lib.summarySettings.get("library_variantcalling") shouldBe Some(libraryCalling) + lib.summarySettings.get("use_indel_realigner") shouldBe Some(realign) + lib.summarySettings.get("use_base_recalibration") shouldBe Some(baseRecalibration && dbsnp) + } + } + pipeline.functions.count(_.isInstanceOf[VcfStats]) shouldBe ( (if (multisampleCalling) 2 else 0) + (if (sampleCalling) numberSamples * 2 else 0) + @@ -125,6 +144,20 @@ class ShivaSampleCallingTest extends ShivaTestTrait { override def baseRecalibrationProvider = Array(false) override def sampleCalling = true } +class ShivaWithSvCallingTest extends ShivaTestTrait { + override def sample1 = Array(true) + override def sample2 = Array(false) + override def realignProvider = Array(false) + override def baseRecalibrationProvider = Array(false) + override def svCalling = true +} +class ShivaWithAnnotationTest extends ShivaTestTrait { + override def sample1 = Array(true) + override def sample2 = Array(false) + override def realignProvider = Array(false) + override def baseRecalibrationProvider = Array(false) + override def annotation = true +} object ShivaTest { val outputDir = Files.createTempDir() @@ -170,7 +203,18 @@ object ShivaTest { "wigtobigwig" -> Map("exe" -> "test"), "md5sum" -> Map("exe" -> "test"), "bgzip" -> Map("exe" -> "test"), - "tabix" -> Map("exe" -> "test") + "tabix" -> Map("exe" -> "test"), + "breakdancerconfig" -> Map("exe" -> "test"), + "breakdancercaller" -> Map("exe" -> "test"), + "pindelconfig" -> Map("exe" -> "test"), + "pindelcaller" -> Map("exe" -> "test"), + "pindelvcf" -> Map("exe" -> "test"), + "clever" -> Map("exe" -> "test"), + "delly" -> Map("exe" -> "test"), + "pysvtools" -> Map( + "exe" -> "test", + "exclusion_regions" -> "test", + "translocations_only" -> false) ) val sample1 = Map( diff --git a/toucan/src/main/scala/nl/lumc/sasc/biopet/pipelines/toucan/Toucan.scala b/toucan/src/main/scala/nl/lumc/sasc/biopet/pipelines/toucan/Toucan.scala index 58dcaf82447daef751eae0d21ac7c43387026149..396154174e18b617ebff84ae3d7ff3704fec336f 100644 --- a/toucan/src/main/scala/nl/lumc/sasc/biopet/pipelines/toucan/Toucan.scala +++ b/toucan/src/main/scala/nl/lumc/sasc/biopet/pipelines/toucan/Toucan.scala @@ -15,15 +15,20 @@ */ package nl.lumc.sasc.biopet.pipelines.toucan +import java.io.File + +import htsjdk.samtools.reference.FastaSequenceFile import nl.lumc.sasc.biopet.core._ import nl.lumc.sasc.biopet.core.summary.SummaryQScript import nl.lumc.sasc.biopet.extensions.bcftools.BcftoolsView import nl.lumc.sasc.biopet.extensions.bedtools.{ BedtoolsIntersect, BedtoolsMerge } +import nl.lumc.sasc.biopet.extensions.gatk.{ CatVariants, SelectVariants } import nl.lumc.sasc.biopet.extensions.manwe.{ ManweAnnotateVcf, ManweSamplesImport } import nl.lumc.sasc.biopet.extensions.tools.{ GvcfToBed, VcfWithVcf, VepNormalizer } import nl.lumc.sasc.biopet.extensions.{ Bgzip, Ln, VariantEffectPredictor } import nl.lumc.sasc.biopet.utils.VcfUtils import nl.lumc.sasc.biopet.utils.config.Configurable +import nl.lumc.sasc.biopet.utils.intervals.BedRecordList import org.broadinstitute.gatk.queue.QScript /** @@ -35,22 +40,43 @@ class Toucan(val root: Configurable) extends QScript with BiopetQScript with Sum def this() = this(null) @Input(doc = "Input VCF file", shortName = "Input", required = true) - var inputVCF: File = _ + var inputVcf: File = _ @Input(doc = "Input GVCF file", shortName = "gvcf", required = false) var inputGvcf: Option[File] = None - var outputVcf: Option[File] = None + def outputName = inputVcf.getName.stripSuffix(".vcf.gz") + + def outputVcf: File = (gonlVcfFile, exacVcfFile) match { + case (Some(_), Some(_)) => new File(outputDir, s"$outputName.vep.normalized.gonl.exac.vcf.gz") + case (Some(_), _) => new File(outputDir, s"$outputName.vep.normalized.gonl.vcf.gz") + case (_, Some(_)) => new File(outputDir, s"$outputName.vep.normalized.exac.vcf.gz") + case _ => new File(outputDir, s"$outputName.vep.normalized.vcf.gz") + } + + lazy val minScatterGenomeSize: Long = config("min_scatter_genome_size", default = 75000000) + + lazy val enableScatter: Boolean = config("enable_scatter", default = { + val ref = new FastaSequenceFile(referenceFasta(), true) + val refLenght = ref.getSequenceDictionary.getReferenceLength + ref.close() + refLenght > minScatterGenomeSize + }) def sampleInfo: Map[String, Map[String, Any]] = root match { case m: MultiSampleQScript => m.samples.map { case (sampleId, sample) => sampleId -> sample.sampleTags } - case null => VcfUtils.getSampleIds(inputVCF).map(x => x -> Map[String, Any]()).toMap + case null => VcfUtils.getSampleIds(inputVcf).map(x => x -> Map[String, Any]()).toMap case s: SampleLibraryTag => s.sampleId.map(x => x -> Map[String, Any]()).toMap case _ => throw new IllegalArgumentException("") } + lazy val gonlVcfFile: Option[File] = config("gonl_vcf") + lazy val exacVcfFile: Option[File] = config("exac_vcf") + lazy val doVarda: Boolean = config("use_varda", default = false) + def init(): Unit = { - inputFiles :+= new InputFile(inputVCF) + require(inputVcf != null, "No Input vcf given") + inputFiles :+= new InputFile(inputVcf) } override def defaults = Map( @@ -58,60 +84,91 @@ class Toucan(val root: Configurable) extends QScript with BiopetQScript with Sum ) def biopetScript(): Unit = { - val doVarda: Boolean = config("use_varda", default = false) val useVcf: File = if (doVarda) { inputGvcf match { - case Some(s) => varda(inputVCF, s) + case Some(s) => varda(inputVcf, s) case _ => throw new IllegalArgumentException("You have not specified a GVCF file") } - } else inputVCF + } else inputVcf + + if (enableScatter) { + val outputVcfFiles = BedRecordList.fromReference(referenceFasta()) + .scatter(config("bin_size", default = 50000000)) + .allRecords.map { region => + + val chunkName = s"${region.chr}-${region.start}-${region.end}" + val chunkDir = new File(outputDir, "chunk" + File.separator + chunkName) + chunkDir.mkdirs() + val bedFile = new File(chunkDir, chunkName + ".bed") + BedRecordList.fromList(List(region)).writeToFile(bedFile) + bedFile.deleteOnExit() + val sv = new SelectVariants(this) + sv.variant = useVcf + sv.out = new File(chunkDir, chunkName + ".vcf.gz") + sv.intervals :+= bedFile + sv.isIntermediate = true + add(sv) + + runChunk(sv.out, chunkDir, chunkName) + } + + val cv = new CatVariants(this) + cv.variant = outputVcfFiles.toList + cv.outputFile = outputVcf + add(cv) + } else runChunk(useVcf, outputDir, outputName) + + addSummaryJobs() + } + + protected def runChunk(file: File, chunkDir: File, chunkName: String): File = { val vep = new VariantEffectPredictor(this) - vep.input = useVcf - vep.output = new File(outputDir, inputVCF.getName.stripSuffix(".gz").stripSuffix(".vcf") + ".vep.vcf") + vep.input = file + vep.output = new File(chunkDir, chunkName + ".vep.vcf") vep.isIntermediate = true add(vep) addSummarizable(vep, "variant_effect_predictor") val normalizer = new VepNormalizer(this) normalizer.inputVCF = vep.output - normalizer.outputVcf = swapExt(outputDir, vep.output, ".vcf", ".normalized.vcf.gz") + normalizer.outputVcf = new File(chunkDir, chunkName + ".vep.normalized.vcf.gz") + normalizer.isIntermediate = enableScatter || gonlVcfFile.isDefined || exacVcfFile.isDefined add(normalizer) - // Optional annotation steps, depend is some files existing in the config - val gonlVcfFile: Option[File] = config("gonl_vcf") - val exacVcfFile: Option[File] = config("exac_vcf") - - outputVcf = Some(normalizer.outputVcf) + var outputFile = normalizer.outputVcf gonlVcfFile match { case Some(gonlFile) => val vcfWithVcf = new VcfWithVcf(this) - vcfWithVcf.input = outputVcf.getOrElse(new File("")) + vcfWithVcf.input = outputFile vcfWithVcf.secondaryVcf = gonlFile - vcfWithVcf.output = swapExt(outputDir, normalizer.outputVcf, ".vcf.gz", ".gonl.vcf.gz") + vcfWithVcf.output = swapExt(chunkDir, normalizer.outputVcf, ".vcf.gz", ".gonl.vcf.gz") vcfWithVcf.fields ::= ("AF", "AF_gonl", None) + vcfWithVcf.isIntermediate = enableScatter || exacVcfFile.isDefined add(vcfWithVcf) - outputVcf = Some(vcfWithVcf.output) + outputFile = vcfWithVcf.output case _ => } exacVcfFile match { case Some(exacFile) => val vcfWithVcf = new VcfWithVcf(this) - vcfWithVcf.input = outputVcf.getOrElse(new File("")) + vcfWithVcf.input = outputFile vcfWithVcf.secondaryVcf = exacFile - vcfWithVcf.output = swapExt(outputDir, outputVcf.getOrElse(new File("")), ".vcf.gz", ".exac.vcf.gz") + vcfWithVcf.output = swapExt(chunkDir, outputFile, ".vcf.gz", ".exac.vcf.gz") vcfWithVcf.fields ::= ("AF", "AF_exac", None) + vcfWithVcf.isIntermediate = enableScatter add(vcfWithVcf) - outputVcf = Some(vcfWithVcf.output) + outputFile = vcfWithVcf.output case _ => } - addSummaryJobs() + outputFile } /** * Performs the varda import and activate for one sample + * * @param sampleID the sampleID to be used * @param inputVcf the input VCF * @param gVCF the gVCF for coverage @@ -144,8 +201,8 @@ class Toucan(val root: Configurable) extends QScript with BiopetQScript with Sum add(bgzippedBed) val singleVcf = new BcftoolsView(this) - singleVcf.input = inputVCF - singleVcf.output = swapExt(outputDir, inputVCF, ".vcf.gz", s""".$sampleID.vcf.gz""") + singleVcf.input = inputVcf + singleVcf.output = swapExt(outputDir, inputVcf, ".vcf.gz", s""".$sampleID.vcf.gz""") singleVcf.samples = List(sampleID) singleVcf.minAC = Some(1) singleVcf.isIntermediate = true @@ -182,6 +239,7 @@ class Toucan(val root: Configurable) extends QScript with BiopetQScript with Sum /** * Perform varda analysis + * * @param vcf input vcf * @param gVcf The gVCF to be used for coverage calculations * @return return vcf @@ -225,7 +283,7 @@ class Toucan(val root: Configurable) extends QScript with BiopetQScript with Sum def summaryFile = new File(outputDir, "Toucan.summary.json") - def summaryFiles = Map() + def summaryFiles = Map("input_vcf" -> inputVcf, "outputVcf" -> outputVcf) def summarySettings = Map() } diff --git a/toucan/src/test/resources/chrQ2.vcf.gz b/toucan/src/test/resources/chrQ2.vcf.gz new file mode 100644 index 0000000000000000000000000000000000000000..94a70075ea0258cf6132cf9a7a7dfedb18542fac Binary files /dev/null and b/toucan/src/test/resources/chrQ2.vcf.gz differ diff --git a/toucan/src/test/resources/fake_chrQ.dict b/toucan/src/test/resources/fake_chrQ.dict new file mode 100644 index 0000000000000000000000000000000000000000..e2b0e2af7579a994afedb68a5c495ba794a445df --- /dev/null +++ b/toucan/src/test/resources/fake_chrQ.dict @@ -0,0 +1,2 @@ +@HD VN:1.4 SO:unsorted +@SQ SN:chrQ LN:16571 M5:94445ec460a68206ae9781f71697d3db UR:file:/home/ahbbollen/fake_chrQ.fa diff --git a/toucan/src/test/resources/fake_chrQ.fa b/toucan/src/test/resources/fake_chrQ.fa new file mode 100644 index 0000000000000000000000000000000000000000..171b3737bd458bba03d7c457b6ba51e5ef4f774f --- /dev/null +++ b/toucan/src/test/resources/fake_chrQ.fa @@ -0,0 +1,2 @@ +>chrQ +TCGGCATCTAGAAACTCCAAGTCTGCAGATCTATAACACACGAGGGTATTCAGCCCTAAAGCATTGCGTCTAGGCATGGGTCTTTAATCTCGCCCCGGCAACCTCTACGCGAGCTCCTACCAGTCAACGTGATTGATCCCTCGTACCTAGCATATTCCACGGCGTTTCATAGCCTTGACGGGCACCTTATCCATTTTTACCTAGTTTGACATTAATTGCTTCAAGACTACTGCGGTACGGGTCCCTGCATCCAGAAAGGATGACAAAAAGTGTGACAGTGCGTTCAATCCATAGATTCCCGTCGGTGCGAGGGCTGAAGTTGAGCCCTACAACCCGGCGATAAAAATTGTAACCCTATGCATTGCGTCAAGTTAAGACCCTCCATCTTCGCCACGCGATAGGAATTTGAAACATCGGCGTGCTTAGAAAGGCTATTAGCGCGGGTGGCCTGATCGGTATCCGATAAAGGCGTTGCCACAAATTTTGCCGATTGACCTGCAATTACATCGGGTAAGTGCATTTCCGCCGTTAGAGTATTAAGTCATGACGTTTTCGACATGGGCATTTAATTGGCCCAAGGTGGAATATTCCTGATGTGCCTAAGTATGAGCTCCCCATGAGAATTTGAAGATCATGAAGACTTGAGTCAATTCTAAATGTAGTTACCATCACTTAAGTTCGTTCGCCTTCTGGGCAGGCTATGTCTAACGAGCGACGTCCGCAGTGTGACAATCTAGATATTGGTTGAGCAGGAATACCAAGGAGGTGCTTGAAGTTTCTCTTATGGAACCAACTTCAATCAATAGGTAGTCTCCGTTTCGTTTCCACTGAGACATTTGTGCAGTGGCACAGTATGTGGTCGTAAGCGCATGGTGTTGTTGGAACGAAGACTCTCACTTTTGTTTCCTTTGGTAGTTTAATTTAGCAGTATCCTGGTCGTTGACCAATTTGTGAATCTCCACGGTCTCTCTTTATCAGTCATACCTCTTAAACCGGCTTCCGCTCCTGCGCACATTTATATCCTACTTTCCGTCAATGTGAAAGAGAGATCAACATAATTTCGCGCACGATCTGTCCCATTTCAGACGCCATCGACGGGTCCCGCCTTCACAAAAAACCTGCCACCATGCAGTAGGTACCTCCATATGCTGAGCGTCCGTTACCGGAAGGGTTAAGTCACTGTTTAAAGGATAAGGCGAGGTTTCCCTTGGTGGTGTACTACTGCTCACGTTCCTGCTTTACTCTCCGAGTATTGTTTTAAAAGTGGGCACGCTGGAACGCAGCACCCTATAAGAAGACTCAGTTAGGTCCCCTTCACGAGCATGTTTGAGCTCTCGGACCTAAGACGTCCGAACTCGGTTACGACGGTTAAAGACAAAGCTCCCAGCATGGTATCAGAAGCCACCACGCGTGAGGATGGCGGGGCCCTGGGCACATTGGCCCCGAATTCTTCGCCCACTTAAGTCGGACTCACCACGGGAGACTACGCCCTAACCGGATAATATCTTTTAGATACCATACAGGGTGCAACCATCGCGGCGTCAGCTGAATGTGAGGACAGAGCCCCACACACAGCACTAACAGATCCATGATTTTACTTCGTTTGGGCGACGTGCGAGCCTATTCGGCCTGTCGGACTTTGTGTAGATTCGGGATTTGACCTAGCGTATAGGCCTTTGGTATACTCGAATTATCAGGGTCAAACTACCCTCAAGGAATCCTATTTCAGACGGACGTTTGCCACCCCGGAACGTTGTCGGATCGCTTCTTGCCGCAGGAGATTCATGGAGTGATAATCGCTGGACTCACAAGTGTCAGCGGACTTTCGGTGTCTTGTGGCCTACTTGCAGTGAACACCACCAAACGAGTAGTATTATGGATTGCCGGCGTGTGTTTGTGGCCATGATTGGTTGATGCGACGGCCTAGATTCTCCTACGGGAATCTACCAGGCCCAAAAGCGCTGACTTTTATGTATTTGAGGGGCCGAAATTACATAGTAACCCAGAACAAATACCCGTTAGTTATAAAGTGAGCGCATAAGTTTGGTCGATCCGGCAGTCGAACCATTGCGGTCGGACATATCCGCAGTAGTACACTAAGGCGGAATAGACTGCCGAGTCAACGCTCCCTCATTCTTGCTACCTTAGATCTCGCAGGTTCGACCATTGCTGAAGCCGCTGAATTACACGAGTTGTTTTGTTAACCCCCGGAATGTAGTTCGTACGCCTCAACTGATTCTTCAAAAGCTCACTGCACGTGACTTGTCATGTGTTCCTAAAACATACCTCATCTGTGGGTCTGGTCCCATAAGCATGGAATGTCCGTCGACGCAACATGGAAACCCACTCGCTCGCTATACGTTTATGGTGAGACAGAAACACACTGTATTAGACTGCCACTGATAGCCCCAGTAGCAAGGTGATGTGGCAGGCATGGTACCCAAACGTCTATCGTTTTGGGAACCAAGGGGAGTGCTAATAGGTCCGGCCACGTAGAATGACATAACCTCCAGAGGAGCGCAGGAGTTGATGCATTAAAAGATCCATACTAAACGTTAGCTTAATGCCTTCAAGGCACCAGCTACCTCCATGACAAGGAGATTTCGGAAGGGGTAAGATTTACTTCTGTCCCAAAAGGGTAATGACCCGTAGGGATGGAATCATTGATGAACTCTAAAGGGACTCAGCCGACTAGCCGAGAGGGCTGGACGATCATTTGATGGGAGAATACGCATACATCTAAGTGTCAAGTATTAAATCGGTAATCTCCGTAAAGGCGTGAAGTTCACAGGGCGCAGTTTCCAGTTACTCCAAGAAACTACCGGTTCAGTTATCGCTTCCGGTGCCTTCACAGCAAACATCATTCGCTCAAGAAAGTGCTGTCGCTGCGTGTGGATATTTCCCCCTCGTTCTGGAAAGTGCTCTCAGCGACGCACGTAAACATGCTGGCGACGAGAGAGGCGTCAACACGGTCCGTTACCAAACTGCGGCATTTACCACGAACCTGATTGCAAAGTGAGATTTCCGTAAGGAGGTTAGCCAAATATTACGTAGAGTGTTCCACACCAAATCCGTCGTCCACATTCGCGACGGCAGTCTAGACGTGTAATTCCCCGGATAATCCAGTTACTACATGCTGATGCAGTCATAGTGCACGCAAATGCGCAACTTAACAAGCACGACCTGAAACAGAGAACCCCTGTGTAGTCAATATAGGATGACGGACACACACACTTGCTGCTGCAATCTTACATTCTGCGAACGAGTGCAAAGTTGAAATCATGACGAACAGCCTTGCTTTTCAGAGTCTCTATCGAACTCCTTTACACCTCCATATCTACTTGCAAATCACACTAGAGGGGCGCAGCTTACTCACTGAGAGATGGTCTACCTAATCGATTTTCGGTGAACTTTGAGTACAGCATTGAGTCTGGAGGGTTCCACTACTTTATCGTACCGGTCCGACATGATTTCTTATCGAATAGATGTTGAGATGGACATTAATAAGCATAGTACGTCTCGATCGATGGCTACCTTTACGTCTATGAGTGCTTACATAAGGTCTCTCGTAAGTCATGGTCCCGCGGGGCTCGCGCAACATTGTGGATTAATGACTCCAGTGACGCATGTTCGATTCGCATGAAGTAGGTGGCGCGTATTCATACATGAATAGTAGGCAGAACGAGCACATTGGACCGATCTTGGAGGTTGGGCTTGAGGTCCCGCACTGATAGTTTACGGCCATGAAGACGACAATTGTCAATACTTCTCTATCCTGAGCGAATGCTGACGGGGGCCAGGCGGAAAAGTGCGACACAGTCTACTGATTACGTGTAGTACGTGGCTAAGCATATTCCCGGGTTCGCAAACATAGGTCTCTGATGGGGTATGGGTAAGAAATCTGAAGGTTGCGTCTCTCACCACGGTCAGGATACCGAATCAGCTCATAAGAGTTACAAACGCGCAAATGAAGGCCTAGTCCACAGGGGTGTCATTCGCACGAGCTGGGCTTAGAATCACTGTTTTCGGTCGCACCTGTAGGGGGACATGGGACGGATTTCACAACAAAAAAGCATCTCTCATGATTCGATTCAGATAGAGGAGAGGAGGTAAATGCCAACAAATCTATTCTTTAGTACCGCCTGACGGAACGTATTTAATCCTCGCCTCAGATCGACACCGCAGGGTAGCTGAAGACGTCCGTTCTTAGACATTTAGTCGATCATTTGTTATGAAACAGAACTAGGAGTCCGTGCCCTTCAGGCCGGCTCAGGGGCACCTACCTCCAGATCGCCCAGGTTGGGTTTATTAGGCGCCGAAAAGTTACTGCCCTATCAGCCCCTCCAATCCGACCTACGGACATCATCCCACTGGCTCGCAAAATATAAATTGCGGATGGGAAAGGATAAGGAAAATCATTACCTACACAGAAGGACAATGTCAGTTCCAAATAACACTGATACTTTCGGAGCAACTTGGTCCGGAAATGTAAGTACGACTATAGCCCTTTCGACCAACGCCGACAGTCCTATTTGGACGCCGAGAGAGGCGACGGGTAGCCGAATGTAAAGCTCTCGGGTCGCTCTTGGCGGAATGCGCTGCGGGTCCTACCCTAAACCCTTACCACCACCAACTTCGTTAGGAGCCGTATAGATTACAGCTCCCGCAAAATTAGAGAGGAATCTGAGTTATTAGCTGAGGACCCCGCATTTTCTGCGACGGCGTAGCTGCAGTGACGTACGATATGAGTTCCCGACTGTGAGGGAGTCCCAGTCGTGACTCCCTACAACGGCTCCAGATATTGTTACTTATGGTCAATATGCCCCGACCGCCCATTGTCTCGAGTACAGTCTTCCCCAAAGTTAAGCTGTGCATTACCTTACCGTTTTAGGTCCAGCTGGTAGCACCGAATGCTGCGCAATCCGAGCCCCCGAAATAGACTACGTGTCCACGGTCAATTGTCATGGGTAGCAGAGCTCAAAGAGGAGAAACGTGCCCCGTAAACCTATTAGATCTCGGTTGATAAATATCAGGCCACAGCAGGCTGCCCGATGCTTGTTTGAACAACAACTTCGGGAGCCGCGGTCCTTGGTTCTCCCGATATTCGGCCGCACCGAACGGTACGCGTCATCGCGAGGTGCGTTCTCGCAGCAAGAAATATTTGTTGTTGTTGTCTTCCTTCCGCATAGGAAACCTTAAGCGGTACCTTTCTACGAAGTTGAACCCTAGAAGCACGTGTAACAATTTTTTTTACGCTACACCCGGATCTGCTTCCATCTGTTGATCATATGAGCCTAATGTGACTAATCTGTGCCGTCGATTGAAAATTCGTTCTGAACCTAATCACATGAATTAAAATTAGGGCGAGAATTGGCTCCTTTTGGGCCGTAATCCTTCAAAGGGTTAACCGAATTTAGCCTCCACGGTGACACAAACTCCCATAGGTAAGGCAAACCCAATAACGAGGAAGCCTTGCCCACAGCATGTTTGATAAATACCCTTAGGGTAATATCGCGTGCAATACTGAAGCCGCTCTTCTAGCATCCGTGTTTGACATACTATGACCTTGAAGCCTGCCGCAGCTTCTAGGTCATCCAAGTAGATCAAAACGCCATGTTGTGGATCCATGCATCTTCCCAGTGAACATGGATCTTAGTGTGACAGGCGAGGAGCGGCGAACACTATCGGTGTGGCAAGCTCGGGCCTTCGTACGTTGTGGAAGTATGCGAATAAGGAGACCGTAATGTATCAAGTTCTTAAGAGCCTTGGTACCGTTGCAATTCGGCATGTTCCTACAGAGACACTCCGTGTTTGTCATCCGTCATAGATCTATGGCGTAGTTAGCGCCTCTGAAGTAGTTGTCCATTCAGCAGGCATTGCTTAGGGAGTTTCTGGCGCTTGCCGCTCAAGATGCTCACGGGCCTAAGTAGCACGGCAACCTTTTGACAAAGCATTTTATAAACTGAGCATATTGGCCCGAAACTAATCCAGCAAAGGGTGAAGACCTGTCAGCGGGCCCAGAGTGTGAACGGTCTACTGCGCGGTACATAAGTGGCGTAATCCATCAACAAGACCTACACGACCTGAATGATTTCCAACAACTTTATATGCTTTTCCGCATCTCGAGAGTACCGGAATCTATGCAATCTCCCAAGGATCCGTAGATTTGAAATTCAATCCGACGGGGTAAGGTTGCCGCGCCGGTTAGCTAATGTGCGGATTTATAGTCTTTTTCCCAGAAGGCGTAGTTAGTTTCGCACCTAACTACGACACATACTTGGGTCGACTGTTGAAGGTGGTAAGTTGCGAGCAGTCCGCCGCTCTCACGCGCCGAACCACGTTCATATCGGCAAAGTTGCGCGATGACCTATAGGTGTGCAAAGCTCGTCCGACATTGGGATTGGATTCACGTACATACGTTAGTATCATGGGTAAGCTTCCATGTCAGCCTCGTGTATAGCACCGGTGCGCCGCGCGTTAAGGATTCTATGCCCAGCAAATGTGCCAACGTTGTGGGGAGAAAAGTGTAGTTGGATGCGATCGTGACATCGGCACACCGAAACTCTGCAGCCAGTCCCGCTAATCTCATTGGCACCGGGTAAGAGATTACCTTTGGTTAGGAATCGCGTGCGACGTACTGCACGAAAACAGTGCCTGAACCGAGGTGTTTACTTAGATGGTTCTAGACCCAGCATGTTCCTCACTGGAACCTGACGTCGGTACGTGATCCTCTATACCTCCTTTTCGGTATTGGCCTGGCAGCTACTCTAACTGTTTGGGCCGCGCCGATTTCTCGAGTCCACACGGCGAGGTCAGCAAAATTGCCAGTTAGTGGATGTTGGGATCTCAACGCATTACCATGAGAGTTCTTGGTTTACCCGTTAACATCGCTGCGCACGGTGTGAAAAGCCTGTTTCTTTGGCCCCCATCATCTTCGGCCCGCAGATCTCAGATCAATGATGTAAGGTTGCGGCGGCAAAGACTAGACTTGAGTCGTGAGATGGTGCTTTGCTGAGGCCGTCTCCTATAGCTTATTCTAGGACTTTCCGCAAACCACCCGACGTGCGGCTGTCCACGATCGGATTCCATTCTGTCTCGGAGCATACAGCACTAGATTTGCCGCTTGAAAAATGTTCCATAACCATGATTTCAACCCCATCTAGTCGGCAGGCACAGCTGAGAACAGCGAAGGGCGTCGTGAAGGGCATTGCCCGTAGTGTTTCAGACGTGCTAGAGACTAAATCAACTATCTGCACTCGTAGCCTGGCGTGTGAGATGTCACCACGATGTGCCTAGAGGAGTGATTATGAACATGTATTACCACGTCCGGGTGTCGACGGCTATATGGCTAACATTTCTTATGGCTAGACGTGCTTGGAAAGGTTCCCCAGCCTTCTGTTTCCCGGTGCTTTCCACGAGTCTGGAGTTCTGGTAATTAACTACATGGCGTTAACGCGGAGGTAACCCCCAGTCATTGCATTGCAGGTAGGGCTTAGGTGCAATATAATTCACCAAGGCGCGGATTCCTCACGATTGTTACGAAGACACCCGGAGGGTTTCAGTATGGCTTGAGAAGTGTACGTTTTTCCGGCCAGGGTGTAACTATAACCAACACATGTTTGGCCACGGGCTAAGTCGGTCCGCACGACTGATTTCCCCCGCCCATGTGTTTGGGAGCAATAAACTGCGTCTGCCAAGAGTAACAACTCGAGTAGAGAAGGGAAGTCTCAGACTATTTTGCAAATCAGACTGTAAGGCTCAACAGCCATACAGCTTGCCCTACTACTGAATACTAGCGTAGCGTGGCCACATAGGAAAGACTTCATGTCTTCTAATAACCTTTTACCTCCAACGTCCCCGCCGTCTTCACGCGGTCCAACGATGAGGAAACAACCACCCCTATCTTCCGCGGAGTGGTTCACACGACCCCCGGCGTTAACGCGCACGTTGTTGTCTTTCGGGACGGCACTACCCCCAAATGCCCAGACCCAGTGCTAGCGATATTCAAACGCCGTCCGGTAAGTCCTGACGTTTTTCAACTGGATGCACTGGCGACACGTAGTTCGCAAGGCGTCCATGAGAGGTTTTAACCGTCATGTTTCCGTATCACGTCTTATGTCTGTCTCTATTCTCAGCGAAATTCTCATCATAGGGCGGAGACTATCTGAAGGCCAGCGAATACAAGATTTAATATCAAATATAGCATGGGGGCCAACAGAGGCCCCCCTGGTGCTGACGAATTATCGTGATATTAGTACAGCTGTCTGCAATGCCATTTCGAAGGCTTTTTGTTCGTATCACTGCTCTATGCATAGCGGTCACTATGACCTCTCAGCTTGACTCACCCGAATGACCAATTGTGGTCCAGCACTCCCTCATCTTCCCCCATTAACGATACGTTGGGCACCATCGGTGTGAGCTACCCGTTACAGTCATAGAATCGTTCTTTGCGTTGTACGCGGCACGGAGGTGACCGGGAAAAGCGCCGCGAAGGCCCCGCACTGAATAAAGCTAGTATTAGCGTCTGTCAAAGTGTTTTGACACCTAATTCGCTTCCAAGTCCCAATATCTAATCTAGCCTGCTTTGGGCCAACATCTCATTGCGTTATGCTAATGAAGAGGGTGCGGGATCACATCCGCTCTTCTCTTCCTATACACAGCGGACATTCGGGTTGGACGTTTGGAGTGATAATTTATCGTTAGGGATAAGTATGTCGGCGCTTAGTAGTATAGCCCGCTGACCAGCGTTCGATTTCGAACCTTACTGGACATTCTCAATAACTACTGATCATGACGTTTTCCTCAGTTCCTAGCCTTGACAACTAGCCACAGTCAGCATGGTAGAGAGCGTTGAGCCGGGGATAGCCAGGCTATTAAGACAAAGACCCTCGGGCCCCTTAATGCGCGTCAAGTCTGACGGTTTGAGTGCGGAGCAGTAAGCGCTTTGGTATAACCGTGACGTAGCAGATCCATGCTTCGCCCGCTTCCACCTGAGAGATACTAGCCTCTTTCGCACTTTGTAGGATTACGGGCAGCGAAATATTTATCCTGTGCGGCGAGCCCGCTTCGGTTTCGAGCTCTATCAGTGCGCGGTTGGCACTCCAACGCACGATAACATATACCCGCCCACAAGGCCATGCAGGTTTAACCTCCTATTCTGATTGTACCTGGCTGACTTTACGGTACCCACCAGCGCAGGATTAATAGCCTAATTATGCTAACCGGTGCTAGTCTAACTGCTGTTACTAGTCCGCCCCAGCTACCCCACGGGTCAGTAACTGCACCAGCAAGCATGGTTCTCCTCCTGAAGTTGTACGTTCGAGAACCCCGTATCGAGTTGGTATATAAATTAAGGGTTGTCTAAAACAGAAGCCTATTCCGCTATCATCGGTGTAATAACTGATCGCGCCGTGGTTAAATGGAGGAGCACCCGCATGGATACATCGCTAGCGTCTTGTAACTCTCTGGGGGCCTAGTATGGAACGGAACAATGACATCATTGCTTACGGGGCCCGCACTTAGCTGTCGCGTATCGCAAATCATATGGCATGTCAGTCCCGACATCACGAAAATGACCCCATCTGAGGTGGTCGGGAGGCGAACAGTCGAATATGATGTATGCACCCGCAACTTAATGTTCAAAGGCGGGCGAAATGCCTTCTCCCGTCCGGACTATCCTGAGTGCTAGCCGCGAGTCTGTAAAGGTTGACGCAACCATATAGCACGCAGAAAAATCACTCTCACACCATGAGAACCATGGCGGCACGCTGTCTACTTTGTCTGACAGGCTACGGAAGGAATGGTACATACGTACAAACGGATGATATGATATCGGTCATTGCCTATTGTGACGCTACCCTACTGCATCACCCCCTTAGAATGCGTTGGACGCTCTATAGCAGATCCTCCATCCAGTGGAAGTCTCGTCGCCGTGGTTTGCCTTAACGACCGTTGGAGAGAGCAGGACAGAAATATCGCCCTTTTGAGCGCATTATTTGGAATCGAGGTAAGTCAGTGCGGCATAATCGCGCCTCGTGAGCGGAACAGTTTTTGATCCCACCCGCTAAATGCCAAGGTGCTGTAACCTGGGCGCGACACCAAAAGACCACGTGCTGTATGAAGCATGTGTTCTAGCGCACTCTCAACCGTTACCCCGAGAGTAAAATGTTAGTTGTAGGCCGATTCTGCAATGGTAATTGGCGGAGTGTCTAGGGAAATGTTTCGGTCATACTTAACCGGCTACCTCTTCCTCCCTCAGATTCGGTCTGAGATGAGATATACTGGGTGAGTTGAGTCGCCCTGTATCGTTGCGGCGCTCGTGGACCAGACAGACAGTTCCCGTTTATCTCTGCTTCTAGATGGAGGGTCGCCTCCGTGTTAACGCCGGCGAAGGTAGTCGCAGCTGAAGTTGTGATGCACAATCAGGTGAGCCTTTTAAGTATGGTCCTACGGACGTGAACAGCTGGGCCCAGTCATTTAGTACGGGGGGTTTACCTATAAGGATACGGTAAGAACGTCATCTATCCGTCCCACTGGAGTCCGAGGGGTTCGTGTCTACACGGATTACTTATCATGCACACACGTCTACGGTCATGCATAAAGTTGTGCAGCGCAGCAATCGGAGCGGAGTTACACCATCTCCCTATTAACAAGGCACTTATTAGTACTTACCCCGTTATAGAGCTCTCATCTTATCGATAGAGCGCAGTCCTAAGTATTGGCTCGAGTGATTCGCTCCTCAGCCCTTGATTGTAACTCCCCCGATTGCAGGTTGTATGGTGAGTAAAATCTCTGCGCCCTTCTGTTCGGATAAAGAACCCCGACCACTAATGCCCGCCTGCTTGTTGGGCGGTAAATGGGTAACGGAACATGGACTATGAGTGCGATGATGGTCAATAGAATTACCTTATTACGCAGTAAAAGGAATGACGCAGACAGGTATTTGTCGACGATTGCTTCGAACCTGGCAAAATGGGGAGGTATCCTGTCATGTTCATCTGTAAAACAACTCCTGCCTCTTCGTAGAGGACACACACTGTGGGCCTTTAGCCTTTAGCAGCCCATTGGGGCTTACCAGCTGTCGTCATGGGGTATCATTAAGATCCATGCGCCCCCGAAACTTACTGCAAAACAATATGGCTTAAAGGTAAAGGGACCATCAGGAGAATGCTTAAGAGCGACATATAGATACGTATTTAATTAATTTATGTTAACGCAACCATCTCGCAGGAGTCGCATAGCATATTGCCGGGTGATAGTTAATGCACTGTGCTTCCGTGTTTATATAAAATAAGCAGTAACCTCTGACAGGTTGAGACTCCAACAAGTGCTCCGGGTATTTACCTTCTACCATGGCGTTCTAATATCACGAAAGAGAAATTGTGTGTACCGATGCCAGGTGACCGCCCGCGTGCGCCAACGACGCAATCTAGAGCATCCACGCTGAATTGGGGAACTCTTGCCGTTCGTCGCATGGTGTACTTGGTACCACTCGATATGCCTGATTAGGTTTGGCCGTAGCACGTAAGGTAGTGACTTTCCATTCAAGCTAGCGAAGCGACACCACCACAGTGCCCGGTCAAAATAACCCACACCTGGCCAGCATAGAGGCTAAAATAGCTACAGTGCGCTAATCGAGTGTTTTTGCATCGGCTCGTGGCTGGTGGACTCGGGACAGCTTAGAACTAACTCTGGTGTACAAACGCGATCGTAGCTCTCGCGACTTACTCACCGGAGTAGGTTAGATGGACAAGACCTAACCCGAAGCCTAAATCGCCCTGAGTGTTAGCCGCCATTCAATTCTATGGTTTATCGGGGGCGTCTATGGCTGCGACAGTATGGAGGCCCGTTATGGGCACCCGAGTATCGTACCATAGTAATCCCATATTCCTCTTCGAGCGACTATTGGATCAACATACCTACAGGGTAGTATGAATGTTCTTGATTACAGAAACCATGGAATCGGCGCATTCTATGTTTCACTTCCGAATAACAGTGAGCAAGGCATGCCCTTGACAAGGATCATCCCGACAGCAAGCCGATCGGGCCCTAGAGCCCGACCCCCAAACAGAACACCGGCCACGTAGTTGCTGGGACTAAACAAAGGTGTGTTTCCATAAAAGGAAATCTTCAAGTGTATTGTTGAGTCGTAACGCTTATATTTATGGCCCAATGGGCGTTGCGAGCACAGTAGCAGGCCTAGATGAATGCCTAGGCCACGATCGGGGGGAGGCTCATTGAACGTACTGCCATACCAAGCCCCCGTATGCTATGGCAGGAGGGGTTCTCTTCGTATAGAGCGAGGGTCTCTACGCCAAGCAGCATTCCCGTGTTGGGTGGCCAATGGGGCTCACTAGAAACTCGGTTTTTTTAGCGAAGGAATGAGCAAACTCGTGAAAGGTGGTACACACCAGTTGCGGCCGATTTGTTGTAGCAACAAGGTTTGAAGAATTGAGTAGATGGGCCAATTTACCTCCTATTTAGCGAGTGAGATGGCGCATGTTTATTCAGACTCCATGTGGGGTAGAGGCTAATCGTTTAGTAGCAATAACCCCGCGGGGCAAGAGACCGTAATAACTTGAATCTGTGGTAGCTATGAATATGTGCTTCGCCCTAAGTGTTATGTAACAAGAGTGATCCAGGGGCTCAGATCACACTTAGTACGATCCGCTACTGAAATGCGGCCGCGGGCTTGCACGCTGGACATAAGTCGGATAATCAATTGCCTACGACAGGTTCAGCCATAAGGCTTGGCTCCTAACACACTCATGATGTCTGGCTTTTACTCGTGCCCGGACATAAACGTATGCTCAAACGCGAGACAGGGGAGGGTCAGCACCGTTTAGATCTATAAGGCCTACCGGTAATATGGATCGACAACAAACAGATGCTATAGGGATACCTACTCCTTTGGACCCACATGTAGATGAAGGCAAACACGCAGAGCAAAGGAGAGTAGTCCACCCGGTATAAGTTTGTGCTTTGAATTCTGGCTACGCAGACTTGCACTCTGTCCCGGCATTCACTATACTTCTCCGGAAGTCCTTTAAGAAATGTCCGCGCTCATGTGGTTCCCGTTGCTCAGGGGCCAACTCAAGTAGATCTTTAAGGCGCAGTCGACCACAGGCTACTAGATACGAGTTATACTTATCCGGACATCTGGCTAAATACTTGGATACGATACTTCCCCAGTCGTGAGAACGAAGCTAATACAGATCGAATTTCGATGGTTCAGGCAGGCAGTTCTCAGGAGGCAAGGTGTTAAATAGTTTCGGAGGCTCTTTCGTACGATCAGGGTCTACTACCCTAGGGCATTTTGACTTTGGATTAAATATGCAAAATGCAAGGCCGATTGTGATCAGTACTGATACTCCAACTGGACCACCTTCAGACCCTTCGAGGGGACCTAGACGACGGGAACCCTTCCAGCGGGTGATACCAGTTAGAGCAAGTCACAAACACGATTCAGCCCCCGGGGTTTATGACGTACCATGCGAGTAATAATGCACGTATACGGAGCTCTTCCACCGAGCGATGGCATTTCGGGGCGAGGTAGTTGTCTTTCATTGGCATCGCACAACCCCCATCCTCTTAATTGGCATCGTCTCCAGCTGGAAAGAATTTGAGTGAGCATGTCGCCCCTATTATTCCGTTGCCAATAAAGTGTCTCAACTTTTGGCGAAGGTTTTAACGCATACAAGGAGAAGCCGCGAGACGTCTGTACCGCTGATCTGGACGCAAAGTGCTCGGACTGCCGCTGAGTTATCCTGGACGCCATGATTAGAGCCGTCGTCACTACCTGCATACATGGGCCGATAGAGTACTGCAACCAACAACTCACTTAAGCTCCACAACGGCTGGACACTTCCGAGAGCGGTCTTACACAAACGTTAGGTCCTGGGCCGCCGACCTTACCGCTAGTTAGTGAGAGCCAGTTAAAATTATGAACGCTCGGAACCTTCCCAACAGTGGCCGCAGCCTTCCTTGACGCCTAGCACATCTGGTTTATACTCGGGTATGCCGTAGATCGGTAACCTAGGGAACGACCCTGTGGGTTTAACACCCGAGTGCGTAATCAAGCCTAGAGGCCATCTCAACTCGAGAGGTCTCCTGACAAAGAGGCGCCCGATGAATCATCCAGAGGCGTCTGGCGGTCCTACGAGAGTGGCTTTGGATGCCTGCCCCTTGGATGGATCTGTCTTTAATCGGCGCCAATACCTAGCACTGCTAGGCTCCAGACTGTGTTTACATGCCGTAACCCTGATACTCGCAGAAACGTTGCTGGAAATTCCTAGCAGCTGAAACCATTCCCCGTAACGTACTAGTACGCTAAGAGAGAGTCTCTCCTGGCCCTGATGAGTGTGTTCTCATCTGGGGCACGATACAAGAATCGGAACGAACGCAATGCCGAAGTCCCTTGTACCTTAATTTGGGCGACGCAGATAGACCCAAAGATCGCGGACTACGGAAACTAGCATAGGACCTGTGTCGAGAAGGTTCGAGCAGGTAGTGACACGCAGCGCGGTGGCCGGCGGGGTGGCACATTGCGGGTCAATACTGGTAGTAGCCACTCTTTGGACATAGCGGCGGACCAGCGCCTAGAATGTCTCATTCTCATTTTGTTCCGTGGCACGTTACGTAATGACGGCCCGCCAGCACCTGTGTATGGACTTGTAGCTCGGGCCTCTGGTCCTGGCACGACAAGGCACCAGCCAGTAATCTCTCCTAAGGCGCTAGCGTGCATAGCGCGTCTGCCTACCGCCAGAGAACGCGTCATCTGCAAGACGTCCCAGCGTAGTGAATTGTAACTGCAAGCGTTCTCTTACGGTCATAGTGCCGATTTTGAGCAGTAATGGAAGCAGCAAAATGCCGCCCAAGCGATTCGCAAACTTCTAACAGAGCTACAGCCGGACACGACGCGGTGGTGCTCGCGGTTGGTGATCTTATGATATTAACGCCCATAGCGGCCATCTTAATCGACACCATGTTCGTTTTGGCAGGCCTTGTGGTAAACACGTGCTAGTGGCACCACCCATGCCCGTGCCCATACATCCAAACCGAGAGAAAGCCTATTTAAGCGAAAACCACAACTTCGAGGTTTCACCCCCTGCCATTGATAAAGCGAGGAGTACCCCCGATGCCGGGAAGCGTCCGCACCCATTTCTTTCGTTCTGGAATCCTCGGGCGACTTCTCGAAGATACTGTGCTCACGACCTGGAGTATCATGAACAATCGGAGGAAAATGAGTAATTGTCGAGTCGTTGTTAGACGGCACTTCCGTCCGGCCCAACTGTTCTCGGATACGTGTCCCGTGGTCAATGCTCTAAACCGGCTGCCGGCGACTCAGTTCACTGAGACAAATTCTGATGCTTTCGAAGCAAGGATGCGCCCAGAGCAGAGCTGCCCAGATGAGGTTAAGAACGTAACTATAATCGATCAGCCATTCGGCTTAAGGGGCCCCGGCGAAACGCGAAACACTTGGCACATGGACGCTTCACGCGCAACAGTAGTTGTCTCTTTCGTGAGCCACCGTAGCAGCTAGAAAGGCCTATCCAGTGATGCTTTATGACTGAGTGTCGAATCTAGGTATAGCATAGACTGGCTGATCGGGCGGGTCGGCCCACCCGTCTCGGTCGAGCGGTTCTGACTTTGGGTGGCTGTGTGAACCCAACTGCAGATGGAGTTGAATGGGTACACCCTATGCGAGGCCTCGTCTTTACACCAAATCGGGGCCCTGTGAAGTGCCACTCTTTTCCAGCCGGCAGCCGCTCAGTCTGATTTTGCTTGTACATGTCGTGTGCGAACGTTCCGGGAGGCTTCCGTGTTCCAAATACCGTGTTCTCATATTCGGTCCATCTACCGACGGAGAGTTGGGATGCCCGGGCCCGGAAATATAATTTAAACTCGTGGCCAAGAATTTAGCATGTTGTAAACATGAGAGACAGGGCCGGGCTAAAACATTACCCCTGAGTAATGTAGAGCCACAACTGAACATAACATTGGGATCTAACGCACGCAATCAGTGTAGCTTCAGCCCACCCTCTAAATTTCCCCCGGACAACTGGATTATCACCTGCGTCACGCGATAATTGCTCGCATCTCACCAACACACTTCGACAAATCTGGAGTCTCCCTGGTCCGTACGTCCAAAACCGTTTAAATGGGCGGGTGTGTCGTGAACCAATCTCCTCTTCCATTTGTCACATACTGGCGATGACATCCTTTTACTTGAATTATTCATCCGGGCACCAGCCGCTTTCCCTACGATCCCCGACACTCGGGGCTTCGGGAGTTGCCCGCCAAAAAACCGACAAACCAAACTATACAATCAATCCCATCTAGATGTAGGGGACTGAGGCTCTAAGCTATGCGCCTACTATACTTTGTAGGTATCAAACTACGCTTGAAGATAGTTGATAAGGAAGCGAATTGATCGAGTACCGTATCTTCAGTCCGACTCCCGTTCGAACGCAGCACGCTAACATGGTCCACTGGCATTCTTACTAAATACCTAGTTCACTTCTACATGAGGAGTGTCTGGGCCGGACTCACCTTTGATTAGATAACTGAAG diff --git a/toucan/src/test/resources/fake_chrQ.fa.fai b/toucan/src/test/resources/fake_chrQ.fa.fai new file mode 100644 index 0000000000000000000000000000000000000000..b7a558fdb3b3c0e85f6e3c634cc3ae80c601336d --- /dev/null +++ b/toucan/src/test/resources/fake_chrQ.fa.fai @@ -0,0 +1 @@ +chrQ 16571 6 16571 16572 diff --git a/toucan/src/test/resources/log4j.properties b/toucan/src/test/resources/log4j.properties new file mode 100644 index 0000000000000000000000000000000000000000..501af67582a546db584c8538b28cb6f9e07f1692 --- /dev/null +++ b/toucan/src/test/resources/log4j.properties @@ -0,0 +1,25 @@ +# +# Biopet is built on top of GATK Queue for building bioinformatic +# pipelines. It is mainly intended to support LUMC SHARK cluster which is running +# SGE. But other types of HPC that are supported by GATK Queue (such as PBS) +# should also be able to execute Biopet tools and pipelines. +# +# Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center +# +# Contact us at: sasc@lumc.nl +# +# A dual licensing mode is applied. The source code within this project that are +# not part of GATK Queue is freely available for non-commercial use under an AGPL +# license; For commercial users or users who do not want to follow the AGPL +# license, please contact us to obtain a separate license. +# + +# Set root logger level to DEBUG and its only appender to A1. +log4j.rootLogger=ERROR, A1 + +# A1 is set to be a ConsoleAppender. +log4j.appender.A1=org.apache.log4j.ConsoleAppender + +# A1 uses PatternLayout. +log4j.appender.A1.layout=org.apache.log4j.PatternLayout +log4j.appender.A1.layout.ConversionPattern=%-5p [%d] [%C{1}] - %m%n \ No newline at end of file diff --git a/toucan/src/test/scala/nl/lumc/sasc/biopet/pipelines/toucan/ToucanTest.scala b/toucan/src/test/scala/nl/lumc/sasc/biopet/pipelines/toucan/ToucanTest.scala new file mode 100644 index 0000000000000000000000000000000000000000..25c48f8f3b696d1709ba1bfd9ecb15b142a6a0fe --- /dev/null +++ b/toucan/src/test/scala/nl/lumc/sasc/biopet/pipelines/toucan/ToucanTest.scala @@ -0,0 +1,111 @@ +package nl.lumc.sasc.biopet.pipelines.toucan + +import java.io.File +import java.nio.file.Paths + +import com.google.common.io.Files +import nl.lumc.sasc.biopet.extensions.VariantEffectPredictor +import nl.lumc.sasc.biopet.extensions.tools.VcfWithVcf +import nl.lumc.sasc.biopet.utils.config.Config +import org.broadinstitute.gatk.queue.QSettings +import org.scalatest.Matchers +import org.scalatest.testng.TestNGSuite +import org.testng.annotations.Test + +/** + * Created by pjvan_thof on 4/11/16. + */ +class ToucanTest extends TestNGSuite with Matchers { + def initPipeline(map: Map[String, Any]): Toucan = { + new Toucan { + override def configNamespace = "toucan" + override def globalConfig = new Config(map) + qSettings = new QSettings + qSettings.runName = "test" + } + } + + @Test + def testDefault(): Unit = { + val pipeline = initPipeline(ToucanTest.config) + pipeline.inputVcf = new File(ToucanTest.resourcePath("/chrQ2.vcf.gz")) + pipeline.script() + + pipeline.functions.count(_.isInstanceOf[VariantEffectPredictor]) shouldBe 1 + pipeline.functions.count(_.isInstanceOf[VcfWithVcf]) shouldBe 0 + } + + @Test + def testBinning(): Unit = { + val pipeline = initPipeline(ToucanTest.config ++ Map("bin_size" -> 4000, "min_scatter_genome_size" -> 1000)) + pipeline.inputVcf = new File(ToucanTest.resourcePath("/chrQ2.vcf.gz")) + pipeline.script() + + pipeline.functions.count(_.isInstanceOf[VariantEffectPredictor]) shouldBe 4 + pipeline.functions.count(_.isInstanceOf[VcfWithVcf]) shouldBe 0 + } + + @Test + def testGonl(): Unit = { + val pipeline = initPipeline(ToucanTest.config ++ Map("gonl_vcf" -> ToucanTest.gonlVcfFile)) + pipeline.inputVcf = new File(ToucanTest.resourcePath("/chrQ2.vcf.gz")) + pipeline.script() + + pipeline.functions.count(_.isInstanceOf[VariantEffectPredictor]) shouldBe 1 + pipeline.functions.count(_.isInstanceOf[VcfWithVcf]) shouldBe 1 + } + + @Test + def testExac(): Unit = { + val pipeline = initPipeline(ToucanTest.config ++ Map("exac_vcf" -> ToucanTest.exacVcfFile)) + pipeline.inputVcf = new File(ToucanTest.resourcePath("/chrQ2.vcf.gz")) + pipeline.script() + + pipeline.functions.count(_.isInstanceOf[VariantEffectPredictor]) shouldBe 1 + pipeline.functions.count(_.isInstanceOf[VcfWithVcf]) shouldBe 1 + } + + @Test + def testVarda(): Unit = { + val pipeline = initPipeline(ToucanTest.config ++ Map("use_varda" -> true)) + val gvcfFile = File.createTempFile("bla.", ".g.vcf") + pipeline.inputVcf = new File(ToucanTest.resourcePath("/chrQ2.vcf.gz")) + pipeline.inputGvcf = Some(gvcfFile) + pipeline.script() + + pipeline.functions.count(_.isInstanceOf[VariantEffectPredictor]) shouldBe 1 + pipeline.functions.count(_.isInstanceOf[VcfWithVcf]) shouldBe 0 + } + +} + +object ToucanTest { + private def resourcePath(p: String): String = { + Paths.get(getClass.getResource(p).toURI).toString + } + + val outputDir = Files.createTempDir() + outputDir.deleteOnExit() + + val gonlVcfFile: File = File.createTempFile("gonl.", ".vcf.gz") + gonlVcfFile.deleteOnExit() + val exacVcfFile: File = File.createTempFile("exac.", ".vcf.gz") + exacVcfFile.deleteOnExit() + + val config = Map( + "reference_fasta" -> resourcePath("/fake_chrQ.fa"), + "output_dir" -> outputDir, + "gatk_jar" -> "test", + "varianteffectpredictor" -> Map( + "vep_script" -> "test", + "cache" -> true, + "dir" -> "test" + ), + "varda_root" -> "test", + "varda_token" -> "test", + "bcftools" -> Map("exe" -> "test"), + "bedtools" -> Map("exe" -> "test"), + "manwe" -> Map("exe" -> "test"), + "bgzip" -> Map("exe" -> "test") + ) +}