diff --git a/log4j.properties b/log4j.properties new file mode 100644 index 0000000000000000000000000000000000000000..501af67582a546db584c8538b28cb6f9e07f1692 --- /dev/null +++ b/log4j.properties @@ -0,0 +1,25 @@ +# +# Biopet is built on top of GATK Queue for building bioinformatic +# pipelines. It is mainly intended to support LUMC SHARK cluster which is running +# SGE. But other types of HPC that are supported by GATK Queue (such as PBS) +# should also be able to execute Biopet tools and pipelines. +# +# Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center +# +# Contact us at: sasc@lumc.nl +# +# A dual licensing mode is applied. The source code within this project that are +# not part of GATK Queue is freely available for non-commercial use under an AGPL +# license; For commercial users or users who do not want to follow the AGPL +# license, please contact us to obtain a separate license. +# + +# Set root logger level to DEBUG and its only appender to A1. +log4j.rootLogger=ERROR, A1 + +# A1 is set to be a ConsoleAppender. +log4j.appender.A1=org.apache.log4j.ConsoleAppender + +# A1 uses PatternLayout. +log4j.appender.A1.layout=org.apache.log4j.PatternLayout +log4j.appender.A1.layout.ConversionPattern=%-5p [%d] [%C{1}] - %m%n \ No newline at end of file diff --git a/protected/basty/src/main/scala/nl/lumc/sasc/biopet/pipelines/basty/Basty.scala b/protected/basty/src/main/scala/nl/lumc/sasc/biopet/pipelines/basty/Basty.scala index 5088e44c2eca6f54b23104c7695c3a03d5f762c8..544af1b7c1e925f0bc21ccf7c75fa0669a32fe84 100644 --- a/protected/basty/src/main/scala/nl/lumc/sasc/biopet/pipelines/basty/Basty.scala +++ b/protected/basty/src/main/scala/nl/lumc/sasc/biopet/pipelines/basty/Basty.scala @@ -60,28 +60,34 @@ class Basty(val root: Configurable) extends QScript with MultiSampleQScript { } def addMultiSampleJobs(): Unit = { - val refVariants = addGenerateFasta(null, outputDir + "reference/", outputName = "reference") - val refVariantSnps = addGenerateFasta(null, outputDir + "reference/", outputName = "reference", snpsOnly = true) + val refVariants = addGenerateFasta(null, new File(outputDir, "reference"), outputName = "reference") + val refVariantSnps = addGenerateFasta(null, new File(outputDir, "reference"), outputName = "reference", snpsOnly = true) - val catVariants = Cat(this, refVariants.variants :: samples.map(_._2.output.variants).toList, outputDir + "fastas/variant.fasta") + val catVariants = Cat(this, refVariants.variants :: samples.map(_._2.output.variants).toList, + new File(outputDir, "fastas" + File.separator + "variant.fasta")) add(catVariants) - val catVariantsSnps = Cat(this, refVariantSnps.variants :: samples.map(_._2.outputSnps.variants).toList, outputDir + "fastas/variant.snps_only.fasta") + val catVariantsSnps = Cat(this, refVariantSnps.variants :: samples.map(_._2.outputSnps.variants).toList, + new File(outputDir, "fastas" + File.separator + "variant.snps_only.fasta")) add(catVariantsSnps) - val catConsensus = Cat(this, refVariants.consensus :: samples.map(_._2.output.consensus).toList, outputDir + "fastas/consensus.fasta") + val catConsensus = Cat(this, refVariants.consensus :: samples.map(_._2.output.consensus).toList, + new File(outputDir, "fastas" + File.separator + "consensus.fasta")) add(catConsensus) - val catConsensusSnps = Cat(this, refVariantSnps.consensus :: samples.map(_._2.outputSnps.consensus).toList, outputDir + "fastas/consensus.snps_only.fasta") + val catConsensusSnps = Cat(this, refVariantSnps.consensus :: samples.map(_._2.outputSnps.consensus).toList, + new File(outputDir, "fastas" + File.separator + "consensus.snps_only.fasta")) add(catConsensusSnps) - val catConsensusVariants = Cat(this, refVariants.consensusVariants :: samples.map(_._2.output.consensusVariants).toList, outputDir + "fastas/consensus.variant.fasta") + val catConsensusVariants = Cat(this, refVariants.consensusVariants :: samples.map(_._2.output.consensusVariants).toList, + new File(outputDir, "fastas" + File.separator + "consensus.variant.fasta")) add(catConsensusVariants) - val catConsensusVariantsSnps = Cat(this, refVariantSnps.consensusVariants :: samples.map(_._2.outputSnps.consensusVariants).toList, outputDir + "fastas/consensus.variant.snps_only.fasta") + val catConsensusVariantsSnps = Cat(this, refVariantSnps.consensusVariants :: samples.map(_._2.outputSnps.consensusVariants).toList, + new File(outputDir, "fastas" + File.separator + "consensus.variant.snps_only.fasta")) add(catConsensusVariantsSnps) val seed: Int = config("seed", default = 12345) - def addTreeJobs(variants: File, concensusVariants: File, outputDir: String, outputName: String) { - val dirSufixRaxml = if (outputDir.endsWith(File.separator)) "raxml" else File.separator + "raxml" - val dirSufixGubbins = if (outputDir.endsWith(File.separator)) "gubbins" else File.separator + "gubbins" + def addTreeJobs(variants: File, concensusVariants: File, outputDir: File, outputName: String) { + val dirSufixRaxml = new File(outputDir, "raxml") + val dirSufixGubbins = new File(outputDir, "gubbins") val raxmlMl = new Raxml(this) raxmlMl.input = variants @@ -101,7 +107,7 @@ class Basty(val root: Configurable) extends QScript with MultiSampleQScript { raxmlBoot.m = config("raxml_ml_model", default = "GTRGAMMAX") raxmlBoot.p = seed raxmlBoot.b = math.abs(r.nextInt) - raxmlBoot.w = outputDir + dirSufixRaxml + raxmlBoot.w = dirSufixRaxml raxmlBoot.N = 1 raxmlBoot.n = outputName + "_boot_" + t add(raxmlBoot) @@ -125,16 +131,18 @@ class Basty(val root: Configurable) extends QScript with MultiSampleQScript { val gubbins = new RunGubbins(this) gubbins.fastafile = concensusVariants gubbins.startingTree = Some(raxmlBi.getBipartitionsFile) - gubbins.outputDirectory = outputDir + dirSufixGubbins + gubbins.outputDirectory = dirSufixGubbins add(gubbins) } - addTreeJobs(catVariantsSnps.output, catConsensusVariantsSnps.output, outputDir + "trees" + File.separator + "snps_only", "snps_only") - addTreeJobs(catVariants.output, catConsensusVariants.output, outputDir + "trees" + File.separator + "snps_indels", "snps_indels") + addTreeJobs(catVariantsSnps.output, catConsensusVariantsSnps.output, + new File(outputDir, "trees" + File.separator + "snps_only"), "snps_only") + addTreeJobs(catVariants.output, catConsensusVariants.output, + new File(outputDir, "trees" + File.separator + "snps_indels"), "snps_indels") } - def addGenerateFasta(sampleName: String, outputDir: String, outputName: String = null, + def addGenerateFasta(sampleName: String, outputDir: File, outputName: String = null, snpsOnly: Boolean = false): FastaOutput = { val bastyGenerateFasta = new BastyGenerateFasta(this) bastyGenerateFasta.outputName = if (outputName != null) outputName else sampleName diff --git a/protected/basty/src/test/resources/log4j.properties b/protected/basty/src/test/resources/log4j.properties new file mode 100644 index 0000000000000000000000000000000000000000..501af67582a546db584c8538b28cb6f9e07f1692 --- /dev/null +++ b/protected/basty/src/test/resources/log4j.properties @@ -0,0 +1,25 @@ +# +# Biopet is built on top of GATK Queue for building bioinformatic +# pipelines. It is mainly intended to support LUMC SHARK cluster which is running +# SGE. But other types of HPC that are supported by GATK Queue (such as PBS) +# should also be able to execute Biopet tools and pipelines. +# +# Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center +# +# Contact us at: sasc@lumc.nl +# +# A dual licensing mode is applied. The source code within this project that are +# not part of GATK Queue is freely available for non-commercial use under an AGPL +# license; For commercial users or users who do not want to follow the AGPL +# license, please contact us to obtain a separate license. +# + +# Set root logger level to DEBUG and its only appender to A1. +log4j.rootLogger=ERROR, A1 + +# A1 is set to be a ConsoleAppender. +log4j.appender.A1=org.apache.log4j.ConsoleAppender + +# A1 uses PatternLayout. +log4j.appender.A1.layout=org.apache.log4j.PatternLayout +log4j.appender.A1.layout.ConversionPattern=%-5p [%d] [%C{1}] - %m%n \ No newline at end of file diff --git a/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/ApplyRecalibration.scala b/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/ApplyRecalibration.scala index 8728b6c651824e340556222cb60ef52d7fd9ab0a..ccdce3411d3df623657b3cd2a14415cf67f7b134 100644 --- a/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/ApplyRecalibration.scala +++ b/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/ApplyRecalibration.scala @@ -11,8 +11,8 @@ import nl.lumc.sasc.biopet.core.config.Configurable class ApplyRecalibration(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.ApplyRecalibration with GatkGeneral { scatterCount = config("scattercount", default = 0) - override def afterGraph { - super.afterGraph + override def beforeGraph { + super.beforeGraph nt = Option(getThreads(3)) memoryLimit = Option(nt.getOrElse(1) * 2) diff --git a/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/BaseRecalibrator.scala b/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/BaseRecalibrator.scala index c07a2a66c363fe9623ad35225e9f76eb45aa67a4..e6cc3aaf64790aea8198f60d57d75df4c47082f1 100644 --- a/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/BaseRecalibrator.scala +++ b/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/BaseRecalibrator.scala @@ -22,7 +22,7 @@ object BaseRecalibrator { val br = new BaseRecalibrator(root) br.input_file :+= input br.out = output - br.afterGraph + br.beforeGraph return br } } \ No newline at end of file diff --git a/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/HaplotypeCaller.scala b/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/HaplotypeCaller.scala index b8f4ea4efa3ea8cfed563c2b82651c0001b794f7..9521e9ea0f0d09b49690a9f6ea977f13a6eb3b8d 100644 --- a/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/HaplotypeCaller.scala +++ b/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/HaplotypeCaller.scala @@ -35,13 +35,13 @@ class HaplotypeCaller(val root: Configurable) extends org.broadinstitute.gatk.qu stand_emit_conf = config("stand_emit_conf", default = 0) } - override def afterGraph { - super.afterGraph + override def beforeGraph { + super.beforeGraph if (bamOutput != null && nct.getOrElse(1) > 1) { threads = 1 logger.warn("BamOutput is on, nct/threads is forced to set on 1, this option is only for debug") } - nct = Some(threads) + nct = Some(getThreads(1)) memoryLimit = Option(memoryLimit.getOrElse(2.0) * nct.getOrElse(1)) } diff --git a/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/IndelRealigner.scala b/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/IndelRealigner.scala index fb5eb4cc89a716bc30e4c90d4578326ed0b71d42..315934f8d4d09178c3200e1e1f904a7a865e59bb 100644 --- a/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/IndelRealigner.scala +++ b/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/IndelRealigner.scala @@ -13,7 +13,7 @@ class IndelRealigner(val root: Configurable) extends org.broadinstitute.gatk.que } object IndelRealigner { - def apply(root: Configurable, input: File, targetIntervals: File, outputDir: String): IndelRealigner = { + def apply(root: Configurable, input: File, targetIntervals: File, outputDir: File): IndelRealigner = { val ir = new IndelRealigner(root) ir.input_file :+= input ir.targetIntervals = targetIntervals diff --git a/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/RealignerTargetCreator.scala b/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/RealignerTargetCreator.scala index bfd53fd6ee6761c3b7697c5277bef0a9af244784..76998f41d0c696df6bc06dfb8ff322331bd2ad47 100644 --- a/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/RealignerTargetCreator.scala +++ b/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/RealignerTargetCreator.scala @@ -18,7 +18,7 @@ class RealignerTargetCreator(val root: Configurable) extends org.broadinstitute. } object RealignerTargetCreator { - def apply(root: Configurable, input: File, outputDir: String): RealignerTargetCreator = { + def apply(root: Configurable, input: File, outputDir: File): RealignerTargetCreator = { val re = new RealignerTargetCreator(root) re.input_file :+= input re.out = new File(outputDir, input.getName.stripSuffix(".bam") + ".realign.intervals") diff --git a/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/UnifiedGenotyper.scala b/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/UnifiedGenotyper.scala index 26936df755a3d25aa41f78e0c2b568920b93e585..599ffa170519c17d7b3e35e14783ad75e208dadb 100644 --- a/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/UnifiedGenotyper.scala +++ b/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/UnifiedGenotyper.scala @@ -8,14 +8,14 @@ package nl.lumc.sasc.biopet.extensions.gatk import nl.lumc.sasc.biopet.core.config.Configurable class UnifiedGenotyper(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.UnifiedGenotyper with GatkGeneral { - override def afterGraph { - super.afterGraph + override def beforeGraph { + super.beforeGraph genotype_likelihoods_model = org.broadinstitute.gatk.tools.walkers.genotyper.GenotypeLikelihoodsCalculationModel.Model.BOTH if (config.contains("scattercount")) scatterCount = config("scattercount") if (config.contains("dbsnp")) this.dbsnp = config("dbsnp") this.sample_ploidy = config("ploidy") - nct = config("threads", default = 1) + nct = Some(getThreads(1)) memoryLimit = Option(nct.getOrElse(1) * 2) if (config.contains("allSitePLs")) this.allSitePLs = config("allSitePLs") if (config.contains("output_mode")) { diff --git a/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/VariantEval.scala b/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/VariantEval.scala index c7956cab19dde275c322f3196eafe4ddce60800f..a9d252fde235f347c16ac3ea1d6b7e346c3214ff 100644 --- a/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/VariantEval.scala +++ b/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/VariantEval.scala @@ -9,8 +9,8 @@ import java.io.File import nl.lumc.sasc.biopet.core.config.Configurable class VariantEval(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.VariantEval with GatkGeneral { - override def afterGraph { - super.afterGraph + override def beforeGraph { + super.beforeGraph } } @@ -21,7 +21,7 @@ object VariantEval { vareval.eval = Seq(sample) vareval.comp = Seq(compareWith) vareval.out = output - vareval.afterGraph + vareval.beforeGraph return vareval } @@ -35,7 +35,7 @@ object VariantEval { vareval.ST = ST vareval.noEV = true vareval.EV = EV - vareval.afterGraph + vareval.beforeGraph return vareval } diff --git a/protected/biopet-gatk-extensions/src/test/resources/log4j.properties b/protected/biopet-gatk-extensions/src/test/resources/log4j.properties new file mode 100644 index 0000000000000000000000000000000000000000..501af67582a546db584c8538b28cb6f9e07f1692 --- /dev/null +++ b/protected/biopet-gatk-extensions/src/test/resources/log4j.properties @@ -0,0 +1,25 @@ +# +# Biopet is built on top of GATK Queue for building bioinformatic +# pipelines. It is mainly intended to support LUMC SHARK cluster which is running +# SGE. But other types of HPC that are supported by GATK Queue (such as PBS) +# should also be able to execute Biopet tools and pipelines. +# +# Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center +# +# Contact us at: sasc@lumc.nl +# +# A dual licensing mode is applied. The source code within this project that are +# not part of GATK Queue is freely available for non-commercial use under an AGPL +# license; For commercial users or users who do not want to follow the AGPL +# license, please contact us to obtain a separate license. +# + +# Set root logger level to DEBUG and its only appender to A1. +log4j.rootLogger=ERROR, A1 + +# A1 is set to be a ConsoleAppender. +log4j.appender.A1=org.apache.log4j.ConsoleAppender + +# A1 uses PatternLayout. +log4j.appender.A1.layout=org.apache.log4j.PatternLayout +log4j.appender.A1.layout.ConversionPattern=%-5p [%d] [%C{1}] - %m%n \ No newline at end of file diff --git a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkBenchmarkGenotyping.scala b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkBenchmarkGenotyping.scala index a054a8703778b8d71a9a778a56960b798f9c9e5e..342dabcdad82ebb78e28a41c360ef13a3568069d 100644 --- a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkBenchmarkGenotyping.scala +++ b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkBenchmarkGenotyping.scala @@ -29,11 +29,8 @@ class GatkBenchmarkGenotyping(val root: Configurable) extends QScript with Biope var dbsnp: File = config("dbsnp") def init() { - if (config.contains("gvcffiles")) for (file <- config("gvcffiles").asList) { + if (config.contains("gvcffiles")) for (file <- config("gvcffiles").asList) gvcfFiles ::= file.toString - } - if (outputDir == null) throw new IllegalStateException("Missing Output directory on gatk module") - else if (!outputDir.endsWith("/")) outputDir += "/" } def biopetScript() { @@ -53,7 +50,7 @@ class GatkBenchmarkGenotyping(val root: Configurable) extends QScript with Biope val gatkGenotyping = new GatkGenotyping(this) gatkGenotyping.inputGvcfs = sampleGvcf :: gvcfPool gatkGenotyping.samples :+= sampleName - gatkGenotyping.outputDir = outputDir + "samples_" + gvcfPool.size + "/" + gatkGenotyping.outputDir = new File(outputDir, "samples_" + gvcfPool.size) gatkGenotyping.init gatkGenotyping.biopetScript addAll(gatkGenotyping.functions) diff --git a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkGenotyping.scala b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkGenotyping.scala index 0143d1bb63417956ec2a6d3d079a17b0b2f63d5e..7fcebd54afb8e18b3cf5236e4210001c9882b689 100644 --- a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkGenotyping.scala +++ b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkGenotyping.scala @@ -33,16 +33,15 @@ class GatkGenotyping(val root: Configurable) extends QScript with BiopetQScript var samples: List[String] = Nil def init() { - if (outputFile == null) outputFile = outputDir + outputName + ".vcf.gz" - if (outputDir == null) throw new IllegalStateException("Missing Output directory on gatk module") - else if (!outputDir.endsWith("/")) outputDir += "/" + require(outputName != null, "Outputname is null") + if (outputFile == null) outputFile = new File(outputDir, outputName + ".vcf.gz") } def biopetScript() { addGenotypeGVCFs(inputGvcfs, outputFile) if (!samples.isEmpty) { - if (samples.size > 1) addSelectVariants(outputFile, samples, outputDir + "samples/", "all") - for (sample <- samples) addSelectVariants(outputFile, List(sample), outputDir + "samples/", sample) + if (samples.size > 1) addSelectVariants(outputFile, samples, new File(outputDir, "samples/"), "all") + for (sample <- samples) addSelectVariants(outputFile, List(sample), new File(outputDir, "samples/"), sample) } } @@ -52,8 +51,8 @@ class GatkGenotyping(val root: Configurable) extends QScript with BiopetQScript return genotypeGVCFs.out } - def addSelectVariants(inputFile: File, samples: List[String], outputDir: String, name: String) { - val selectVariants = SelectVariants(this, inputFile, outputDir + name + ".vcf.gz") + def addSelectVariants(inputFile: File, samples: List[String], outputDir: File, name: String) { + val selectVariants = SelectVariants(this, inputFile, new File(outputDir, name + ".vcf.gz")) selectVariants.excludeNonVariants = true for (sample <- samples) selectVariants.sample_name :+= sample add(selectVariants) diff --git a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkPipeline.scala b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkPipeline.scala index 5eb657a517fe566a99728d25527ff042a232693c..b65eb6abaec460f3633d2da148bafb39bc0d49eb 100644 --- a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkPipeline.scala +++ b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkPipeline.scala @@ -47,13 +47,13 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri val mapping = new Mapping(qscript) mapping.sampleId = sampleId mapping.libId = libId - mapping.outputDir = libDir + "/variantcalling/" + mapping.outputDir = libDir /** Library variantcalling */ val gatkVariantcalling = new GatkVariantcalling(qscript) gatkVariantcalling.doublePreProces = false gatkVariantcalling.sampleID = sampleId - gatkVariantcalling.outputDir = libDir + gatkVariantcalling.outputDir = new File(libDir, "variantcalling") protected def addJobs(): Unit = { val bamFile: Option[File] = if (config.contains("R1")) { @@ -124,7 +124,7 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri /** sample variantcalling */ val gatkVariantcalling = new GatkVariantcalling(qscript) gatkVariantcalling.sampleID = sampleId - gatkVariantcalling.outputDir = sampleDir + "/variantcalling/" + gatkVariantcalling.outputDir = new File(sampleDir, "variantcalling/") protected def addJobs(): Unit = { addPerLibJobs() @@ -143,8 +143,6 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri } def init() { - if (outputDir == null) throw new IllegalStateException("Missing Output directory on gatk module") - else if (!outputDir.endsWith("/")) outputDir += "/" } val multisampleVariantcalling = new GatkVariantcalling(this) { @@ -158,7 +156,7 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri def addMultiSampleJobs(): Unit = { val gvcfFiles: List[File] = if (mergeGvcfs && externalGvcfs.size + samples.size > 1) { - val newFile = outputDir + "merged.gvcf.vcf.gz" + val newFile = new File(outputDir, "merged.gvcf.vcf.gz") add(CombineGVCFs(this, externalGvcfs ++ samples.map(_._2.gatkVariantcalling.scriptOutput.gvcfFile), newFile)) List(newFile) } else externalGvcfs ++ samples.map(_._2.gatkVariantcalling.scriptOutput.gvcfFile) @@ -167,7 +165,7 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri if (jointGenotyping) { val gatkGenotyping = new GatkGenotyping(this) gatkGenotyping.inputGvcfs = gvcfFiles - gatkGenotyping.outputDir = outputDir + "genotyping/" + gatkGenotyping.outputDir = new File(outputDir, "genotyping") gatkGenotyping.init gatkGenotyping.biopetScript addAll(gatkGenotyping.functions) @@ -185,7 +183,7 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri } if (gatkVariantcalling.useMpileup) { - val cvRaw = CombineVariants(this, allRawVcfFiles.toList, outputDir + "variantcalling/multisample.raw.vcf.gz") + val cvRaw = CombineVariants(this, allRawVcfFiles.toList, new File(outputDir, "variantcalling/multisample.raw.vcf.gz")) add(cvRaw) gatkVariantcalling.rawVcfInput = cvRaw.out } @@ -193,7 +191,7 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri multisampleVariantcalling.preProcesBams = false multisampleVariantcalling.doublePreProces = false multisampleVariantcalling.inputBams = allBamfiles.toList - multisampleVariantcalling.outputDir = outputDir + "variantcalling" + multisampleVariantcalling.outputDir = new File(outputDir, "variantcalling") multisampleVariantcalling.outputName = "multisample" multisampleVariantcalling.init multisampleVariantcalling.biopetScript @@ -203,7 +201,7 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri val recalibration = new GatkVariantRecalibration(this) recalibration.inputVcf = multisampleVariantcalling.scriptOutput.finalVcfFile recalibration.bamFiles = allBamfiles - recalibration.outputDir = outputDir + "recalibration/" + recalibration.outputDir = new File(outputDir, "recalibration") recalibration.init recalibration.biopetScript } diff --git a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVariantRecalibration.scala b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVariantRecalibration.scala index 409d83c7168891a4df5f100c711e8a0a1d0fc2b2..0e057f4a2cdf5131775d453edc816f00f37c3302 100644 --- a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVariantRecalibration.scala +++ b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVariantRecalibration.scala @@ -26,9 +26,7 @@ class GatkVariantRecalibration(val root: Configurable) extends QScript with Biop var outputVcf: File = _ def init() { - if (inputVcf == null) throw new IllegalStateException("Missing Output directory on gatk module") - if (outputDir == null) throw new IllegalStateException("Missing Output directory on gatk module") - else if (!outputDir.endsWith("/")) outputDir += "/" + require(inputVcf != null, "Missing Output directory on gatk module") } def biopetScript() { @@ -37,7 +35,7 @@ class GatkVariantRecalibration(val root: Configurable) extends QScript with Biop vcfFile = addIndelVariantRecalibrator(vcfFile, outputDir) } - def addSnpVariantRecalibrator(inputVcf: File, dir: String): File = { + def addSnpVariantRecalibrator(inputVcf: File, dir: File): File = { val snpRecal = VariantRecalibrator(this, inputVcf, swapExt(dir, inputVcf, ".vcf", ".indel.recal"), swapExt(dir, inputVcf, ".vcf", ".indel.tranches"), indel = false) if (!snpRecal.resource.isEmpty) { @@ -54,7 +52,7 @@ class GatkVariantRecalibration(val root: Configurable) extends QScript with Biop } } - def addIndelVariantRecalibrator(inputVcf: File, dir: String): File = { + def addIndelVariantRecalibrator(inputVcf: File, dir: File): File = { val indelRecal = VariantRecalibrator(this, inputVcf, swapExt(dir, inputVcf, ".vcf", ".indel.recal"), swapExt(dir, inputVcf, ".vcf", ".indel.tranches"), indel = true) if (!indelRecal.resource.isEmpty) { @@ -71,7 +69,7 @@ class GatkVariantRecalibration(val root: Configurable) extends QScript with Biop } } - def addVariantAnnotator(inputvcf: File, bamfiles: List[File], dir: String): File = { + def addVariantAnnotator(inputvcf: File, bamfiles: List[File], dir: File): File = { val variantAnnotator = VariantAnnotator(this, inputvcf, bamfiles, swapExt(dir, inputvcf, ".vcf", ".anotated.vcf")) add(variantAnnotator) return variantAnnotator.out diff --git a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVariantcalling.scala b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVariantcalling.scala index 5d2d1de57127fd85f5afa21eb92becd8a7454b2d..8e89a84fd7ab7f042a574cf61259638e2a647400 100644 --- a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVariantcalling.scala +++ b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVariantcalling.scala @@ -52,8 +52,6 @@ class GatkVariantcalling(val root: Configurable) extends QScript with BiopetQScr def init() { if (outputName == null && sampleID != null) outputName = sampleID else if (outputName == null) outputName = config("output_name", default = "noname") - if (outputDir == null) throw new IllegalStateException("Missing Output directory on gatk module") - else if (!outputDir.endsWith("/")) outputDir += "/" val baseRecalibrator = new BaseRecalibrator(this) if (preProcesBams && useBaseRecalibration && baseRecalibrator.knownSites.isEmpty) { @@ -66,7 +64,7 @@ class GatkVariantcalling(val root: Configurable) extends QScript with BiopetQScr if (files.isEmpty) throw new IllegalStateException("Files can't be empty") else if (!doublePreProces) files else if (files.size == 1) { - val bamFile: File = outputDir + files.head.getName + val bamFile = new File(outputDir, files.head.getName) if (bamFile != files.head) { val oldIndex: File = files.head.getAbsolutePath.stripSuffix(".bam") + ".bai" val newIndex: File = bamFile.getAbsolutePath.stripSuffix(".bam") + ".bai" @@ -113,20 +111,20 @@ class GatkVariantcalling(val root: Configurable) extends QScript with BiopetQScr val hcGvcf = new HaplotypeCaller(this) hcGvcf.useGvcf hcGvcf.input_file = scriptOutput.bamFiles - hcGvcf.out = outputDir + outputName + ".hc.discovery.gvcf.vcf.gz" + hcGvcf.out = new File(outputDir, outputName + ".hc.discovery.gvcf.vcf.gz") add(hcGvcf) scriptOutput.gvcfFile = hcGvcf.out } if (useHaplotypecaller) { if (sampleID != null) { - val genotypeGVCFs = GenotypeGVCFs(this, List(scriptOutput.gvcfFile), outputDir + outputName + ".hc.discovery.vcf.gz") + val genotypeGVCFs = GenotypeGVCFs(this, List(scriptOutput.gvcfFile), new File(outputDir, outputName + ".hc.discovery.vcf.gz")) add(genotypeGVCFs) scriptOutput.hcVcfFile = genotypeGVCFs.out } else { val hcGvcf = new HaplotypeCaller(this) hcGvcf.input_file = scriptOutput.bamFiles - hcGvcf.out = outputDir + outputName + ".hc.discovery.vcf.gz" + hcGvcf.out = new File(outputDir, outputName + ".hc.discovery.vcf.gz") add(hcGvcf) scriptOutput.hcVcfFile = hcGvcf.out } @@ -136,7 +134,7 @@ class GatkVariantcalling(val root: Configurable) extends QScript with BiopetQScr if (useUnifiedGenotyper) { val ugVcf = new UnifiedGenotyper(this) ugVcf.input_file = scriptOutput.bamFiles - ugVcf.out = outputDir + outputName + ".ug.discovery.vcf.gz" + ugVcf.out = new File(outputDir, outputName + ".ug.discovery.vcf.gz") add(ugVcf) scriptOutput.ugVcfFile = ugVcf.out mergBuffer += ("2.UG-Discovery" -> scriptOutput.ugVcfFile) @@ -148,7 +146,7 @@ class GatkVariantcalling(val root: Configurable) extends QScript with BiopetQScr val m2v = new MpileupToVcf(this) m2v.inputBam = scriptOutput.bamFiles.head m2v.sample = sampleID - m2v.output = outputDir + outputName + ".raw.vcf" + m2v.output = new File(outputDir, outputName + ".raw.vcf") add(m2v) scriptOutput.rawVcfFile = m2v.output @@ -160,7 +158,7 @@ class GatkVariantcalling(val root: Configurable) extends QScript with BiopetQScr ), super.defaults) } vcfFilter.inputVcf = m2v.output - vcfFilter.outputVcf = this.swapExt(outputDir, m2v.output, ".vcf", ".filter.vcf.gz") + vcfFilter.outputVcf = swapExt(outputDir, m2v.output, ".vcf", ".filter.vcf.gz") add(vcfFilter) scriptOutput.rawFilterVcfFile = vcfFilter.outputVcf } else if (rawVcfInput != null) scriptOutput.rawFilterVcfFile = rawVcfInput @@ -176,7 +174,7 @@ class GatkVariantcalling(val root: Configurable) extends QScript with BiopetQScr if (useHaplotypecaller) { val hcAlleles = new HaplotypeCaller(this) hcAlleles.input_file = scriptOutput.bamFiles - hcAlleles.out = outputDir + outputName + ".hc.allele.vcf.gz" + hcAlleles.out = new File(outputDir, outputName + ".hc.allele.vcf.gz") hcAlleles.alleles = mergeAlleles.output hcAlleles.genotyping_mode = org.broadinstitute.gatk.tools.walkers.genotyper.GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES add(hcAlleles) @@ -187,7 +185,7 @@ class GatkVariantcalling(val root: Configurable) extends QScript with BiopetQScr if (useUnifiedGenotyper) { val ugAlleles = new UnifiedGenotyper(this) ugAlleles.input_file = scriptOutput.bamFiles - ugAlleles.out = outputDir + outputName + ".ug.allele.vcf.gz" + ugAlleles.out = new File(outputDir, outputName + ".ug.allele.vcf.gz") ugAlleles.alleles = mergeAlleles.output ugAlleles.genotyping_mode = org.broadinstitute.gatk.tools.walkers.genotyper.GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES add(ugAlleles) @@ -206,32 +204,32 @@ class GatkVariantcalling(val root: Configurable) extends QScript with BiopetQScr sv.out } - val cvFinal = CombineVariants(this, mergeList.toList, outputDir + outputName + ".final.vcf.gz") + val cvFinal = CombineVariants(this, mergeList.toList, new File(outputDir, outputName + ".final.vcf.gz")) cvFinal.genotypemergeoption = org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils.GenotypeMergeType.UNSORTED add(cvFinal) val vcfStats = new VcfStats(this) vcfStats.input = cvFinal.out - vcfStats.setOutputDir(outputDir + File.separator + "vcfstats") + vcfStats.setOutputDir(new File(outputDir, "vcfstats")) add(vcfStats) scriptOutput.finalVcfFile = cvFinal.out } } - def addIndelRealign(inputBam: File, dir: String, isIntermediate: Boolean = true): File = { + def addIndelRealign(inputBam: File, dir: File, isIntermediate: Boolean = true): File = { val realignerTargetCreator = RealignerTargetCreator(this, inputBam, dir) realignerTargetCreator.isIntermediate = true add(realignerTargetCreator) - val indelRealigner = IndelRealigner.apply(this, inputBam, realignerTargetCreator.out, dir) + val indelRealigner = IndelRealigner(this, inputBam, realignerTargetCreator.out, dir) indelRealigner.isIntermediate = isIntermediate add(indelRealigner) return indelRealigner.o } - def addBaseRecalibrator(inputBam: File, dir: String, isIntermediate: Boolean = false): File = { + def addBaseRecalibrator(inputBam: File, dir: File, isIntermediate: Boolean = false): File = { val baseRecalibrator = BaseRecalibrator(this, inputBam, swapExt(dir, inputBam, ".bam", ".baserecal")) if (baseRecalibrator.knownSites.isEmpty) { diff --git a/protected/biopet-gatk-pipelines/src/test/resources/log4j.properties b/protected/biopet-gatk-pipelines/src/test/resources/log4j.properties new file mode 100644 index 0000000000000000000000000000000000000000..501af67582a546db584c8538b28cb6f9e07f1692 --- /dev/null +++ b/protected/biopet-gatk-pipelines/src/test/resources/log4j.properties @@ -0,0 +1,25 @@ +# +# Biopet is built on top of GATK Queue for building bioinformatic +# pipelines. It is mainly intended to support LUMC SHARK cluster which is running +# SGE. But other types of HPC that are supported by GATK Queue (such as PBS) +# should also be able to execute Biopet tools and pipelines. +# +# Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center +# +# Contact us at: sasc@lumc.nl +# +# A dual licensing mode is applied. The source code within this project that are +# not part of GATK Queue is freely available for non-commercial use under an AGPL +# license; For commercial users or users who do not want to follow the AGPL +# license, please contact us to obtain a separate license. +# + +# Set root logger level to DEBUG and its only appender to A1. +log4j.rootLogger=ERROR, A1 + +# A1 is set to be a ConsoleAppender. +log4j.appender.A1=org.apache.log4j.ConsoleAppender + +# A1 uses PatternLayout. +log4j.appender.A1.layout=org.apache.log4j.PatternLayout +log4j.appender.A1.layout.ConversionPattern=%-5p [%d] [%C{1}] - %m%n \ No newline at end of file diff --git a/protected/biopet-protected-package/src/test/resources/log4j.properties b/protected/biopet-protected-package/src/test/resources/log4j.properties new file mode 100644 index 0000000000000000000000000000000000000000..501af67582a546db584c8538b28cb6f9e07f1692 --- /dev/null +++ b/protected/biopet-protected-package/src/test/resources/log4j.properties @@ -0,0 +1,25 @@ +# +# Biopet is built on top of GATK Queue for building bioinformatic +# pipelines. It is mainly intended to support LUMC SHARK cluster which is running +# SGE. But other types of HPC that are supported by GATK Queue (such as PBS) +# should also be able to execute Biopet tools and pipelines. +# +# Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center +# +# Contact us at: sasc@lumc.nl +# +# A dual licensing mode is applied. The source code within this project that are +# not part of GATK Queue is freely available for non-commercial use under an AGPL +# license; For commercial users or users who do not want to follow the AGPL +# license, please contact us to obtain a separate license. +# + +# Set root logger level to DEBUG and its only appender to A1. +log4j.rootLogger=ERROR, A1 + +# A1 is set to be a ConsoleAppender. +log4j.appender.A1=org.apache.log4j.ConsoleAppender + +# A1 uses PatternLayout. +log4j.appender.A1.layout=org.apache.log4j.PatternLayout +log4j.appender.A1.layout.ConversionPattern=%-5p [%d] [%C{1}] - %m%n \ No newline at end of file diff --git a/protected/log4j.properties b/protected/log4j.properties new file mode 100644 index 0000000000000000000000000000000000000000..501af67582a546db584c8538b28cb6f9e07f1692 --- /dev/null +++ b/protected/log4j.properties @@ -0,0 +1,25 @@ +# +# Biopet is built on top of GATK Queue for building bioinformatic +# pipelines. It is mainly intended to support LUMC SHARK cluster which is running +# SGE. But other types of HPC that are supported by GATK Queue (such as PBS) +# should also be able to execute Biopet tools and pipelines. +# +# Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center +# +# Contact us at: sasc@lumc.nl +# +# A dual licensing mode is applied. The source code within this project that are +# not part of GATK Queue is freely available for non-commercial use under an AGPL +# license; For commercial users or users who do not want to follow the AGPL +# license, please contact us to obtain a separate license. +# + +# Set root logger level to DEBUG and its only appender to A1. +log4j.rootLogger=ERROR, A1 + +# A1 is set to be a ConsoleAppender. +log4j.appender.A1=org.apache.log4j.ConsoleAppender + +# A1 uses PatternLayout. +log4j.appender.A1.layout=org.apache.log4j.PatternLayout +log4j.appender.A1.layout.ConversionPattern=%-5p [%d] [%C{1}] - %m%n \ No newline at end of file diff --git a/protected/src/src/test/resources/log4j.properties b/protected/src/src/test/resources/log4j.properties new file mode 100644 index 0000000000000000000000000000000000000000..501af67582a546db584c8538b28cb6f9e07f1692 --- /dev/null +++ b/protected/src/src/test/resources/log4j.properties @@ -0,0 +1,25 @@ +# +# Biopet is built on top of GATK Queue for building bioinformatic +# pipelines. It is mainly intended to support LUMC SHARK cluster which is running +# SGE. But other types of HPC that are supported by GATK Queue (such as PBS) +# should also be able to execute Biopet tools and pipelines. +# +# Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center +# +# Contact us at: sasc@lumc.nl +# +# A dual licensing mode is applied. The source code within this project that are +# not part of GATK Queue is freely available for non-commercial use under an AGPL +# license; For commercial users or users who do not want to follow the AGPL +# license, please contact us to obtain a separate license. +# + +# Set root logger level to DEBUG and its only appender to A1. +log4j.rootLogger=ERROR, A1 + +# A1 is set to be a ConsoleAppender. +log4j.appender.A1=org.apache.log4j.ConsoleAppender + +# A1 uses PatternLayout. +log4j.appender.A1.layout=org.apache.log4j.PatternLayout +log4j.appender.A1.layout.ConversionPattern=%-5p [%d] [%C{1}] - %m%n \ No newline at end of file diff --git a/public/bam2wig/src/main/scala/nl/lumc/sasc/biopet/pipelines/bamtobigwig/Bam2Wig.scala b/public/bam2wig/src/main/scala/nl/lumc/sasc/biopet/pipelines/bamtobigwig/Bam2Wig.scala index e76acd863a3eed3ebdcb1a4b1b5654fe71d623b9..8c1f543bf551d7b828bf17ea4afda47c1bf036dd 100644 --- a/public/bam2wig/src/main/scala/nl/lumc/sasc/biopet/pipelines/bamtobigwig/Bam2Wig.scala +++ b/public/bam2wig/src/main/scala/nl/lumc/sasc/biopet/pipelines/bamtobigwig/Bam2Wig.scala @@ -46,7 +46,7 @@ class Bam2Wig(val root: Configurable) extends QScript with BiopetQScript { object Bam2Wig extends PipelineCommand { def apply(root: Configurable, bamFile: File): Bam2Wig = { val bamToBigWig = new Bam2Wig(root) - bamToBigWig.outputDir = bamFile.getParent + bamToBigWig.outputDir = bamFile.getParentFile bamToBigWig.bamFile = bamFile bamToBigWig.init() bamToBigWig.biopetScript() diff --git a/public/bam2wig/src/test/resources/log4j.properties b/public/bam2wig/src/test/resources/log4j.properties new file mode 100644 index 0000000000000000000000000000000000000000..501af67582a546db584c8538b28cb6f9e07f1692 --- /dev/null +++ b/public/bam2wig/src/test/resources/log4j.properties @@ -0,0 +1,25 @@ +# +# Biopet is built on top of GATK Queue for building bioinformatic +# pipelines. It is mainly intended to support LUMC SHARK cluster which is running +# SGE. But other types of HPC that are supported by GATK Queue (such as PBS) +# should also be able to execute Biopet tools and pipelines. +# +# Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center +# +# Contact us at: sasc@lumc.nl +# +# A dual licensing mode is applied. The source code within this project that are +# not part of GATK Queue is freely available for non-commercial use under an AGPL +# license; For commercial users or users who do not want to follow the AGPL +# license, please contact us to obtain a separate license. +# + +# Set root logger level to DEBUG and its only appender to A1. +log4j.rootLogger=ERROR, A1 + +# A1 is set to be a ConsoleAppender. +log4j.appender.A1=org.apache.log4j.ConsoleAppender + +# A1 uses PatternLayout. +log4j.appender.A1.layout=org.apache.log4j.PatternLayout +log4j.appender.A1.layout.ConversionPattern=%-5p [%d] [%C{1}] - %m%n \ No newline at end of file diff --git a/public/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetrics.scala b/public/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetrics.scala index a23486320609a0fa1563c3d5b21358cf70eb5d80..f16db9b0e39edbbc243ac7c5ce54bbd83defd857 100644 --- a/public/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetrics.scala +++ b/public/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetrics.scala @@ -41,8 +41,6 @@ class BamMetrics(val root: Configurable) extends QScript with BiopetQScript { var wholeGenome = false def init() { - if (outputDir == null) throw new IllegalStateException("Missing Output directory on BamMetrics module") - else if (!outputDir.endsWith("/")) outputDir += "/" if (config.contains("target_bed")) { for (file <- config("target_bed").asList) { bedFiles +:= new File(file.toString) @@ -63,7 +61,7 @@ class BamMetrics(val root: Configurable) extends QScript with BiopetQScript { add(BedToInterval(this, baitBedFile, inputBam, outputDir), true) for (bedFile <- bedFiles) { - val targetDir = outputDir + bedFile.getName.stripSuffix(".bed") + "/" + val targetDir = new File(outputDir, bedFile.getName.stripSuffix(".bed")) val targetInterval = BedToInterval(this, bedFile, inputBam, targetDir) add(targetInterval, true) add(CalculateHsMetrics(this, inputBam, if (baitIntervalFile != null) baitIntervalFile @@ -87,7 +85,7 @@ class BamMetrics(val root: Configurable) extends QScript with BiopetQScript { } object BamMetrics extends PipelineCommand { - def apply(root: Configurable, bamFile: File, outputDir: String): BamMetrics = { + def apply(root: Configurable, bamFile: File, outputDir: File): BamMetrics = { val bamMetrics = new BamMetrics(root) bamMetrics.inputBam = bamFile bamMetrics.outputDir = outputDir diff --git a/public/bammetrics/src/test/resources/log4j.properties b/public/bammetrics/src/test/resources/log4j.properties new file mode 100644 index 0000000000000000000000000000000000000000..501af67582a546db584c8538b28cb6f9e07f1692 --- /dev/null +++ b/public/bammetrics/src/test/resources/log4j.properties @@ -0,0 +1,25 @@ +# +# Biopet is built on top of GATK Queue for building bioinformatic +# pipelines. It is mainly intended to support LUMC SHARK cluster which is running +# SGE. But other types of HPC that are supported by GATK Queue (such as PBS) +# should also be able to execute Biopet tools and pipelines. +# +# Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center +# +# Contact us at: sasc@lumc.nl +# +# A dual licensing mode is applied. The source code within this project that are +# not part of GATK Queue is freely available for non-commercial use under an AGPL +# license; For commercial users or users who do not want to follow the AGPL +# license, please contact us to obtain a separate license. +# + +# Set root logger level to DEBUG and its only appender to A1. +log4j.rootLogger=ERROR, A1 + +# A1 is set to be a ConsoleAppender. +log4j.appender.A1=org.apache.log4j.ConsoleAppender + +# A1 uses PatternLayout. +log4j.appender.A1.layout=org.apache.log4j.PatternLayout +log4j.appender.A1.layout.ConversionPattern=%-5p [%d] [%C{1}] - %m%n \ No newline at end of file diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetCommandLineFunction.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetCommandLineFunction.scala index 4ec1a0af9f58c9cce28aa321c3c042b8c87dd194..b73f76c7bb72ba4a7022c3eba1381e3f85324fda 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetCommandLineFunction.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetCommandLineFunction.scala @@ -15,8 +15,21 @@ */ package nl.lumc.sasc.biopet.core +/** + * This class is for commandline programs where the executable is a non JVM based program + */ abstract class BiopetCommandLineFunction extends BiopetCommandLineFunctionTrait { + /** + * This function needs to be implemented to define the command that is executed + * @return Command to run + */ protected def cmdLine: String + + /** + * implementing a final version of the commandLine from org.broadinstitute.gatk.queue.function.CommandLineFunction + * User needs to implement cmdLine instead + * @return Command to run + */ final def commandLine: String = { preCmdInternal val cmd = cmdLine diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetCommandLineFunctionTrait.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetCommandLineFunctionTrait.scala index 3ed9ba5bf58e0bc4a62d7c4f9783e4fb924a2838..944dee43fa5fae667cad5e08a05ae4ce9d5f8fbe 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetCommandLineFunctionTrait.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetCommandLineFunctionTrait.scala @@ -25,34 +25,40 @@ import scala.util.matching.Regex import java.io.FileInputStream import java.security.MessageDigest +/** + * Biopet command line trait to auto check executable and cluster values + */ trait BiopetCommandLineFunctionTrait extends CommandLineFunction with Configurable { analysisName = configName @Input(doc = "deps", required = false) var deps: List[File] = Nil - @Argument(doc = "Threads", required = false) var threads = 0 val defaultThreads = 1 - @Argument(doc = "Vmem", required = false) var vmem: Option[String] = None val defaultVmem: String = "" - - @Argument(doc = "Executable", required = false) var executable: String = _ - protected[core] def beforeCmd { - } + /** + * Can override this method. This is executed just before the job is ready to run. + * Can check on run time files from pipeline here + */ + protected[core] def beforeCmd {} - protected[core] def afterGraph { - } + /** + * Can override this method. This is executed after the script is done en queue starts to generate the graph + */ + protected[core] def beforeGraph {} + /** + * Set default output file, threads and vmem for current job + */ override def freezeFieldValues() { - checkExecutable - afterGraph - - if (jobOutputFile == null) jobOutputFile = new File(firstOutput.getAbsoluteFile.getParent + "/." + firstOutput.getName + "." + configName + ".out") + preProcesExecutable + beforeGraph + if (jobOutputFile == null) jobOutputFile = new File(firstOutput.getAbsoluteFile.getParent, "." + firstOutput.getName + "." + configName + ".out") if (threads == 0) threads = getThreads(defaultThreads) if (threads > 1) nCoresRequest = Option(threads) @@ -67,7 +73,10 @@ trait BiopetCommandLineFunctionTrait extends CommandLineFunction with Configurab super.freezeFieldValues() } - protected[core] def checkExecutable { + /** + * Checks executable. Follow full CanonicalPath, checks if it is existing and do a md5sum on it to store in job report + */ + protected[core] def preProcesExecutable { if (!BiopetCommandLineFunctionTrait.executableMd5Cache.contains(executable)) { try if (executable != null) { if (!BiopetCommandLineFunctionTrait.executableCache.contains(executable)) { @@ -105,18 +114,34 @@ trait BiopetCommandLineFunctionTrait extends CommandLineFunction with Configurab addJobReportBinding("md5sum_exe", md5.getOrElse("None")) } + /** + * executes checkExecutable method and fill job report + */ final protected def preCmdInternal { - checkExecutable + preProcesExecutable beforeCmd - addJobReportBinding("cores", if (nCoresRequest.get.toInt > 0) nCoresRequest.get.toInt else 1) + addJobReportBinding("cores", nCoresRequest match { + case Some(n) if n > 0 => n + case _ => 1 + }) addJobReportBinding("version", getVersion) } + /** + * Command to get version of executable + * @return + */ protected def versionCommand: String = null + + /** Regex to get version from version command output */ protected val versionRegex: Regex = null - protected val versionExitcode = List(0) // Can select multiple + + /** Allowed exit codes for the version command */ + protected val versionExitcode = List(0) + + /** Executes the version command */ private def getVersionInternal: String = { if (versionCommand == null || versionRegex == null) return "N/A" val exe = new File(versionCommand.trim.split(" ")(0)) @@ -141,12 +166,20 @@ trait BiopetCommandLineFunctionTrait extends CommandLineFunction with Configurab return "N/A" } + /** Get version from cache otherwise execute the version command */ def getVersion: String = { + if (!BiopetCommandLineFunctionTrait.executableCache.contains(executable)) + preProcesExecutable if (!BiopetCommandLineFunctionTrait.versionCache.contains(executable)) BiopetCommandLineFunctionTrait.versionCache += executable -> getVersionInternal return BiopetCommandLineFunctionTrait.versionCache(executable) } + /** + * Get threads from config + * @param default default when not found in config + * @return number of threads + */ def getThreads(default: Int): Int = { val maxThreads: Int = config("maxthreads", default = 8) val threads: Int = config("threads", default = default) @@ -154,6 +187,12 @@ trait BiopetCommandLineFunctionTrait extends CommandLineFunction with Configurab else return maxThreads } + /** + * Get threads from config + * @param default default when not found in config + * @param module Module when this is difrent from default + * @return number of threads + */ def getThreads(default: Int, module: String): Int = { val maxThreads: Int = config("maxthreads", default = 8, submodule = module) val threads: Int = config("threads", default = default, submodule = module) @@ -162,9 +201,12 @@ trait BiopetCommandLineFunctionTrait extends CommandLineFunction with Configurab } } +/** + * stores global caches + */ object BiopetCommandLineFunctionTrait { import scala.collection.mutable.Map private val versionCache: Map[String, String] = Map() private val executableMd5Cache: Map[String, String] = Map() private val executableCache: Map[String, String] = Map() -} \ No newline at end of file +} diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetJavaCommandLineFunction.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetJavaCommandLineFunction.scala index ed3a1d99c1ccd33d8d5b128aaada5144b1a369e2..b0657e369afa46d75ccbe2b387f42944a490ae92 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetJavaCommandLineFunction.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetJavaCommandLineFunction.scala @@ -17,16 +17,28 @@ package nl.lumc.sasc.biopet.core import org.broadinstitute.gatk.queue.function.JavaCommandLineFunction +/** + * Biopet commandline class for java based programs + */ trait BiopetJavaCommandLineFunction extends JavaCommandLineFunction with BiopetCommandLineFunctionTrait { - executable = "java" + executable = config("java", default = "java", submodule = "java", freeVar = false) javaGCThreads = config("java_gc_threads") javaGCHeapFreeLimit = config("java_gc_heap_freelimit") javaGCTimeLimit = config("java_gc_timelimit") - override def javaOpts = super.javaOpts + optional("-Dscala.concurrent.context.numThreads=", threads, spaceSeparated = false, escape = false) + /** + * Constructs java opts, this adds scala threads + * @return + */ + override def javaOpts = super.javaOpts + + optional("-Dscala.concurrent.context.numThreads=", threads, spaceSeparated = false, escape = false) - override def afterGraph { + /** + * Sets memory limit + */ + override def beforeGraph { + super.beforeGraph memoryLimit = config("memory_limit") } @@ -38,7 +50,6 @@ trait BiopetJavaCommandLineFunction extends JavaCommandLineFunction with BiopetC preCmdInternal val cmd = super.commandLine val finalCmd = executable + cmd.substring(cmd.indexOf(" ")) - // addJobReportBinding("command", cmd) return cmd } } diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetQScript.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetQScript.scala index 062d725bafb7050258aaf07e0a0a0444e02aafdc..91a9da2a3b79171aba7be7144b8df2fa35675e01 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetQScript.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetQScript.scala @@ -23,20 +23,20 @@ import org.broadinstitute.gatk.queue.QSettings import org.broadinstitute.gatk.queue.function.QFunction import org.broadinstitute.gatk.queue.function.scattergather.ScatterGatherableFunction import org.broadinstitute.gatk.queue.util.{ Logging => GatkLogging } - import scala.collection.mutable.ListBuffer +/** + * Base for biopet pipeline + */ trait BiopetQScript extends Configurable with GatkLogging { @Argument(doc = "JSON config file(s)", fullName = "config_file", shortName = "config", required = false) val configfiles: List[File] = Nil - var outputDir: String = { - val temp = Config.getValueFromMap(globalConfig.map, ConfigValueIndex(this.configName, configPath, "output_dir")) - if (temp.isEmpty) "" - else { - val t = temp.get.value.toString - if (!t.endsWith("/")) t + "/" else t + var outputDir: File = { + Config.getValueFromMap(globalConfig.map, ConfigValueIndex(this.configName, configPath, "output_dir")) match { + case Some(value) => new File(value.asString).getAbsoluteFile + case _ => new File(".") } } @@ -45,17 +45,23 @@ trait BiopetQScript extends Configurable with GatkLogging { var outputFiles: Map[String, File] = Map() + /** Get implemented from org.broadinstitute.gatk.queue.QScript */ var qSettings: QSettings + /** Get implemented from org.broadinstitute.gatk.queue.QScript */ + var functions: Seq[QFunction] + + /** Init for pipeline */ def init - def biopetScript - var functions: Seq[QFunction] + /** Pipeline itself */ + def biopetScript + /** + * Script from queue itself, final to force some checks for each pipeline and write report + */ final def script() { - outputDir = config("output_dir") - if (outputDir.isEmpty) outputDir = new File(".").getAbsolutePath() - else if (!outputDir.endsWith("/")) outputDir += "/" + outputDir = config("output_dir").asFile.getAbsoluteFile init biopetScript @@ -65,19 +71,28 @@ trait BiopetQScript extends Configurable with GatkLogging { } for (function <- functions) function match { case f: BiopetCommandLineFunctionTrait => { - f.checkExecutable - f.afterGraph + f.preProcesExecutable + f.beforeGraph + f.commandLine } case _ => } - if (new File(outputDir).canWrite) globalConfig.writeReport(qSettings.runName, outputDir + ".log/" + qSettings.runName) - else BiopetQScript.addError("Output dir: '" + outputDir + "' is not writeable") + if (outputDir.getParentFile.canWrite || (outputDir.exists && outputDir.canWrite)) + globalConfig.writeReport(qSettings.runName, new File(outputDir, ".log/" + qSettings.runName)) + else BiopetQScript.addError("Parent of output dir: '" + outputDir.getParent + "' is not writeable, outputdir can not be created") BiopetQScript.checkErrors } - def add(functions: QFunction*) // Gets implemeted at org.broadinstitute.sting.queue.QScript + /** Get implemented from org.broadinstitute.gatk.queue.QScript */ + def add(functions: QFunction*) + + /** + * Function to set isIntermediate and add in 1 line + * @param function + * @param isIntermediate + */ def add(function: QFunction, isIntermediate: Boolean = false) { function.isIntermediate = isIntermediate add(function) @@ -107,4 +122,4 @@ object BiopetQScript extends Logging { throw new IllegalStateException("Biopet found errors") } } -} \ No newline at end of file +} diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/Logging.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/Logging.scala index 6fca2dd5d88abae55fc8ecbd0bbc105dbfda6d09..4566cd4f1b26f08f2af84e31735b6514dfd10d6a 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/Logging.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/Logging.scala @@ -17,10 +17,20 @@ package nl.lumc.sasc.biopet.core import org.apache.log4j.Logger +/** + * Trait to implement logger function on local class/object + */ trait Logging { + /** + * + * @return Global biopet logger + */ def logger = Logging.logger } +/** + * Logger object, has a global logger + */ object Logging { val logger = Logger.getRootLogger } \ No newline at end of file diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/MainCommand.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/MainCommand.scala index 369e5267dbc0a162f44b184cc9ee23595122e6ed..cdd226c89f6f2363a452e98116d60b691c1033c1 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/MainCommand.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/MainCommand.scala @@ -15,6 +15,9 @@ */ package nl.lumc.sasc.biopet.core +/** + * This trait is used in the biopet executable + */ trait MainCommand { lazy val commandName = this.getClass.getSimpleName diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/MultiSampleQScript.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/MultiSampleQScript.scala index 4184f29afacc7f6afcf6dc9d0eb45f9eddfc1993..c93cfa7a4c550ea34842e0f1d812eaac332f21d8 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/MultiSampleQScript.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/MultiSampleQScript.scala @@ -59,7 +59,7 @@ trait MultiSampleQScript extends BiopetQScript { def createFile(suffix: String): File = new File(libDir, sampleId + "-" + libId + suffix) /** Returns library directory */ - def libDir = sampleDir + "lib_" + libId + File.separator + def libDir = new File(sampleDir, "lib_" + libId) /** Function that add library jobs */ protected def addJobs() @@ -108,7 +108,7 @@ trait MultiSampleQScript extends BiopetQScript { def createFile(suffix: String) = new File(sampleDir, sampleId + suffix) /** Returns sample directory */ - def sampleDir = outputDir + "samples" + File.separator + sampleId + File.separator + def sampleDir = new File(outputDir, "samples" + File.separator + sampleId) } /** Sample type, need implementation in pipeline */ diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/PipelineCommand.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/PipelineCommand.scala index 60f7112525a92597c8e88c480e87c43bb228105a..8a6d6657ab820aed7fcdff294b8c0f4c768cacb7 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/PipelineCommand.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/PipelineCommand.scala @@ -20,10 +20,21 @@ import java.io.File import nl.lumc.sasc.biopet.core.config.Config import nl.lumc.sasc.biopet.core.workaround.BiopetQCommandLine +/** + * Wrapper around executable from Queue + */ trait PipelineCommand extends MainCommand with GatkLogging { + /** + * Gets location of compiled class of pipeline + * @return path from classPath to class file + */ def pipeline = "/" + getClass.getName.stripSuffix("$").replaceAll("\\.", "/") + ".class" + /** + * Class can be used directly from java with -cp option + * @param args + */ def main(args: Array[String]): Unit = { val argsSize = args.size for (t <- 0 until argsSize) { diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/ToolCommand.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/ToolCommand.scala index b3b25e9a764480f90badc5965fe36f50f22da1fc..53980eadb9e05ef93cc842d822ee75f01c52f67a 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/ToolCommand.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/ToolCommand.scala @@ -15,10 +15,16 @@ */ package nl.lumc.sasc.biopet.core +/** + * Trait for biopet tools, sets some default args + */ trait ToolCommand extends MainCommand with Logging { - protected abstract class AbstractArgs { - } + /** Placeholder for args */ + protected abstract class AbstractArgs {} + /** + * Abstract opt parser to add efault args to each biopet tool + */ protected abstract class AbstractOptParser extends scopt.OptionParser[Args](commandName) { opt[String]('l', "log_level") foreach { x => x.toLowerCase match { diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/config/Config.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/config/Config.scala index e25e8ef508188adbd5f767b20ca1f688c4eed922..d2fe78ec224c921896e51e04c2f51ad0435e201b 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/config/Config.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/config/Config.scala @@ -43,14 +43,17 @@ class Config(var map: Map[String, Any]) extends Logging { * @param valueName Name of value */ def loadConfigEnv(valueName: String) { - val globalFiles = sys.env.get(valueName).getOrElse("").split(":") - if (globalFiles.isEmpty) logger.info(valueName + " value not found, no global config is loaded") - for (globalFile <- globalFiles) { - val file: File = new File(globalFile) - if (file.exists()) { - logger.info("Loading config file: " + file) - loadConfigFile(file) - } else logger.warn(valueName + " value found but file does not exist, no global config is loaded") + sys.env.get(valueName) match { + case Some(globalFiles) => { + for (globalFile <- globalFiles.split(":")) { + val file: File = new File(globalFile) + if (file.exists) { + logger.info("Loading config file: " + file) + loadConfigFile(file) + } else logger.warn(valueName + " value found but file '" + file + "' does not exist, no global config is loaded") + } + } + case _ => logger.info(valueName + " value not found, no global config is loaded") } } @@ -140,7 +143,8 @@ class Config(var map: Map[String, Any]) extends Logging { } else ConfigValue(requestedIndex, null, null, freeVar) } - def writeReport(id: String, directory: String): Unit = { + def writeReport(id: String, directory: File): Unit = { + directory.mkdirs() def convertIndexValuesToMap(input: List[(ConfigValueIndex, Any)], forceFreeVar: Option[Boolean] = None): Map[String, Any] = { input.foldLeft(Map[String, Any]())( @@ -155,8 +159,7 @@ class Config(var map: Map[String, Any]) extends Logging { } def writeMapToJsonFile(map: Map[String, Any], name: String): Unit = { - val file = new File(directory + "/" + id + "." + name + ".json") - file.getParentFile.mkdirs() + val file = new File(directory, id + "." + name + ".json") val writer = new PrintWriter(file) writer.write(ConfigUtils.mapToJson(map).spaces2) writer.close() diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/config/ConfigValue.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/config/ConfigValue.scala index c15cd872f8a60fdd39762738a634cc745d2eb50d..bc3b5fc1394a3ae09169a138c39025788552858d 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/config/ConfigValue.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/config/ConfigValue.scala @@ -25,6 +25,12 @@ class ConfigValue(val requestIndex: ConfigValueIndex, val foundIndex: ConfigValu */ def asString = any2string(value) + /** + * Get value as File + * @return value as File + */ + def asFile = new File(any2string(value)) + /** * Get value as Int * @return value as Int diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/config/Configurable.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/config/Configurable.scala index 51997ccb8a12fb5c3a234e5e7d95c001dd704b4f..df01a54371ee836acdfc93ac41a87d15d7fbce28 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/config/Configurable.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/config/Configurable.scala @@ -17,11 +17,12 @@ package nl.lumc.sasc.biopet.core.config import nl.lumc.sasc.biopet.core.Logging import nl.lumc.sasc.biopet.utils.ConfigUtils.ImplicitConversions +import scala.collection.JavaConversions._ trait Configurable extends ImplicitConversions { /** Should be object of parant object */ val root: Configurable - val globalConfig: Config = if (root != null) root.globalConfig else Config.global + def globalConfig: Config = if (root != null) root.globalConfig else Config.global /** subfix to the path */ def subPath: List[String] = Nil diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/workaround/BiopetQCommandLine.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/workaround/BiopetQCommandLine.scala index a2ca78b4c52e0a0faa5a4a67ebd7998096a68cf9..cbd95512f2bee28b6a85eee03202c5e83ad3ef4e 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/workaround/BiopetQCommandLine.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/workaround/BiopetQCommandLine.scala @@ -39,14 +39,16 @@ * THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/* + * This is a modifed version of org.broadinstitute.gatk.queue.QCommandLine, works without compiling a scala files but used build in class files to skip compile step + */ + package nl.lumc.sasc.biopet.core.workaround import java.io.File import org.broadinstitute.gatk.utils.commandline._ import org.broadinstitute.gatk.queue.util._ -import org.broadinstitute.gatk.queue.QCommandPlugin -import org.broadinstitute.gatk.queue.QScript -import org.broadinstitute.gatk.queue.QScriptManager +import org.broadinstitute.gatk.queue.{ QCommandPlugin, QScript, QScriptManager } import org.broadinstitute.gatk.queue.util.{ Logging => GatkLogging } import org.broadinstitute.gatk.queue.engine.{ QStatusMessenger, QGraphSettings, QGraph } import collection.JavaConversions._ diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Fastqc.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Fastqc.scala index 7bda8792544eea2d863ab06192b51b55ae900077..05867d694490ebe64f6d48588f72d4288227e904 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Fastqc.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Fastqc.scala @@ -48,8 +48,8 @@ class Fastqc(val root: Configurable) extends BiopetCommandLineFunction { override def versionCommand = executable + " --version" override val defaultThreads = 4 - override def afterGraph { - this.checkExecutable + override def beforeGraph { + this.preProcesExecutable val fastqcDir = new File(executable).getParent diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Md5sum.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Md5sum.scala index 091d2b6c39136bd9fcb2c9c36e82f9f4c0301644..a266719a58af2345badfd5501261cb4473e9fa5f 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Md5sum.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Md5sum.scala @@ -43,10 +43,10 @@ class Md5sum(val root: Configurable) extends BiopetCommandLineFunction { } object Md5sum { - def apply(root: Configurable, fastqfile: File, outDir: String): Md5sum = { + def apply(root: Configurable, fastqfile: File, outDir: File): Md5sum = { val md5sum = new Md5sum(root) md5sum.input = fastqfile - md5sum.output = new File(outDir + fastqfile.getName + ".md5") + md5sum.output = new File(outDir, fastqfile.getName + ".md5") return md5sum } diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Raxml.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Raxml.scala index 1d7b45ec9617091a457f073e4110ff5e01932e24..9913254953607dd1542b70ebbf49d2a2dacdd429 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Raxml.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Raxml.scala @@ -64,10 +64,10 @@ class Raxml(val root: Configurable) extends BiopetCommandLineFunction { var executableNonThreads: String = config("exe", default = "raxmlHPC") var executableThreads: Option[String] = config("exe_pthreads") - override def afterGraph { + override def beforeGraph { if (threads == 0) threads = getThreads(defaultThreads) executable = if (threads > 1 && executableThreads.isDefined) executableThreads.get else executableNonThreads - super.afterGraph + super.beforeGraph out +:= getInfoFile f match { case "d" if b.isEmpty => { diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/RunGubbins.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/RunGubbins.scala index e732c1023de5f713f4c21ed366da68eddc825285..a041f5dc760543bc0377edc5678299c81ec042c2 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/RunGubbins.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/RunGubbins.scala @@ -47,8 +47,8 @@ class RunGubbins(val root: Configurable) extends BiopetCommandLineFunction { var verbose: Boolean = config("verbose", default = false) var noCleanup: Boolean = config("no_cleanup", default = false) - override def afterGraph: Unit = { - super.afterGraph + override def beforeGraph: Unit = { + super.beforeGraph jobLocalDir = new File(outputDirectory) if (prefix.isEmpty) prefix = Some(fastafile.getName) val out: List[String] = List(".recombination_predictions.embl", diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Seqstat.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Seqstat.scala index ef5450414b46fcbf793bae9691eaa5f38bb994cd..d30d19fdf36bb1e433b58af626dfd7ad088fd7ec 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Seqstat.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Seqstat.scala @@ -51,13 +51,6 @@ class Seqstat(val root: Configurable) extends BiopetCommandLineFunction { } object Seqstat { - def apply(root: Configurable, input: File, output: File): Seqstat = { - val seqstat = new Seqstat(root) - seqstat.input = input - seqstat.output = output - return seqstat - } - def apply(root: Configurable, fastqfile: File, outDir: String): Seqstat = { val seqstat = new Seqstat(root) val ext = fastqfile.getName.substring(fastqfile.getName.lastIndexOf(".")) diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Sickle.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Sickle.scala index 5056c70abc0e122fac0b95862f3443faaa48b5c7..abc517962b8302456b333c1f6b6aafe7fdd35a9b 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Sickle.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Sickle.scala @@ -53,7 +53,7 @@ class Sickle(val root: Configurable) extends BiopetCommandLineFunction { override val versionRegex = """sickle version (.*)""".r override def versionCommand = executable + " --version" - override def afterGraph { + override def beforeGraph { if (qualityType.isEmpty) qualityType = Some(defaultQualityType) } diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Stampy.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Stampy.scala index 1db8f6d67c8d892e6188aa750efb40b814fbbf1e..bb412a8fbc23a41bf64b905e349b4cc0787c6145 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Stampy.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Stampy.scala @@ -54,9 +54,9 @@ class Stampy(val root: Configurable) extends BiopetCommandLineFunction { var sensitive: Boolean = config("sensitive", default = false) var fast: Boolean = config("fast", default = false) - var readgroup: String = config("readgroup") + var readgroup: String = null var verbosity: Option[Int] = config("verbosity", default = 2) - var logfile: String = config("logfile") + var logfile: Option[String] = config("logfile") executable = config("exe", default = "stampy.py", freeVar = false) override val versionRegex = """stampy v(.*) \(.*\), .*""".r @@ -68,6 +68,11 @@ class Stampy(val root: Configurable) extends BiopetCommandLineFunction { override def versionCommand = executable + " --help" + override def beforeGraph: Unit = { + super.beforeGraph + require(readgroup != null) + } + def cmdLine: String = { var cmd: String = required(executable) + optional("-t", nCoresRequest) + diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Star.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Star.scala index 25fa1876b2abef8a1a6c34555e74d699607f9ba6..9404a895006398d7cd3cca35b3259846af1d6aa0 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Star.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Star.scala @@ -52,9 +52,9 @@ class Star(val root: Configurable) extends BiopetCommandLineFunction { executable = config("exe", "STAR") @Argument(doc = "Output Directory") - var outputDir: String = _ + var outputDir: File = _ - var genomeDir: String = config("genomeDir", reference.getParent + "/star/") + var genomeDir: File = config("genomeDir", new File(reference.getAbsoluteFile.getParent, "star")) var runmode: String = _ var sjdbOverhang: Int = _ var outFileNamePrefix: String = _ @@ -62,9 +62,8 @@ class Star(val root: Configurable) extends BiopetCommandLineFunction { override val defaultVmem = "6G" override val defaultThreads = 8 - override def afterGraph() { + override def beforeGraph() { if (outFileNamePrefix != null && !outFileNamePrefix.endsWith(".")) outFileNamePrefix += "." - if (!outputDir.endsWith("/")) outputDir += "/" val prefix = if (outFileNamePrefix != null) outputDir + outFileNamePrefix else outputDir if (runmode == null) { outputSam = new File(prefix + "Aligned.out.sam") @@ -97,36 +96,35 @@ class Star(val root: Configurable) extends BiopetCommandLineFunction { } object Star { - def apply(configurable: Configurable, R1: File, R2: File, outputDir: String, isIntermediate: Boolean = false, deps: List[File] = Nil): Star = { + def apply(configurable: Configurable, R1: File, R2: File, outputDir: File, isIntermediate: Boolean = false, deps: List[File] = Nil): Star = { val star = new Star(configurable) star.R1 = R1 if (R2 != null) star.R2 = R2 star.outputDir = outputDir star.isIntermediate = isIntermediate star.deps = deps - star.afterGraph + star.beforeGraph return star } - def _2pass(configurable: Configurable, R1: File, R2: File, outputDir: String, isIntermediate: Boolean = false, deps: List[File] = Nil): (File, List[Star]) = { - val outDir = if (outputDir.endsWith("/")) outputDir else outputDir + "/" - val starCommand_pass1 = Star(configurable, R1, if (R2 != null) R2 else null, outDir + "aln-pass1/") + def _2pass(configurable: Configurable, R1: File, R2: File, outputDir: File, isIntermediate: Boolean = false, deps: List[File] = Nil): (File, List[Star]) = { + val starCommand_pass1 = Star(configurable, R1, if (R2 != null) R2 else null, new File(outputDir, "aln-pass1")) starCommand_pass1.isIntermediate = isIntermediate starCommand_pass1.deps = deps - starCommand_pass1.afterGraph + starCommand_pass1.beforeGraph val starCommand_reindex = new Star(configurable) starCommand_reindex.sjdbFileChrStartEnd = starCommand_pass1.outputTab - starCommand_reindex.outputDir = outDir + "re-index/" + starCommand_reindex.outputDir = new File(outputDir, "re-index") starCommand_reindex.runmode = "genomeGenerate" starCommand_reindex.isIntermediate = isIntermediate - starCommand_reindex.afterGraph + starCommand_reindex.beforeGraph - val starCommand_pass2 = Star(configurable, R1, if (R2 != null) R2 else null, outDir + "aln-pass2/") + val starCommand_pass2 = Star(configurable, R1, if (R2 != null) R2 else null, new File(outputDir, "aln-pass2")) starCommand_pass2.genomeDir = starCommand_reindex.outputDir starCommand_pass2.isIntermediate = isIntermediate starCommand_pass2.deps = deps - starCommand_pass2.afterGraph + starCommand_pass2.beforeGraph return (starCommand_pass2.outputSam, List(starCommand_pass1, starCommand_reindex, starCommand_pass2)) } diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/TopHat.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/TopHat.scala index 98ee6de521d499710e186bb2c196d9d471c6aa5d..ce86a1ed9bd1e8d1afa04867887be2f21cf6231b 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/TopHat.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/TopHat.scala @@ -64,7 +64,7 @@ class TopHat(val root: Configurable) extends BiopetCommandLineFunction { override def versionCommand = executable + " --version" - override def afterGraph() { + override def beforeGraph() { if (!outputDir.endsWith("/")) outputDir += "/" output = new File(outputDir + "accepted_hits.bam") } diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/conifer/ConiferExport.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/conifer/ConiferExport.scala index 02c0f09584206770965e67e6afc32765f9997a3f..fc1e0a8628e3c03c3b88a06c032317bcc43eedd1 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/conifer/ConiferExport.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/conifer/ConiferExport.scala @@ -28,8 +28,8 @@ class ConiferExport(val root: Configurable) extends Conifer { @Output(doc = "Output <sample>.svdzrpkm.bed", shortName = "out", required = true) var output: File = _ - override def afterGraph { - this.checkExecutable + override def beforeGraph { + this.preProcesExecutable } override def cmdLine = super.cmdLine + diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/igvtools/IGVToolsCount.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/igvtools/IGVToolsCount.scala index dc6d21e6d4da53d42513ca8c7ceb9fd67fdc8570..b62c852f9f40578131d734d206531e77236fb595 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/igvtools/IGVToolsCount.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/igvtools/IGVToolsCount.scala @@ -41,8 +41,8 @@ class IGVToolsCount(val root: Configurable) extends IGVTools { var pairs: Boolean = config("pairs", default = false) - override def afterGraph { - super.afterGraph + override def beforeGraph { + super.beforeGraph wig.foreach(x => if (!x.getAbsolutePath.endsWith(".wig")) throw new IllegalArgumentException("WIG file should have a .wig file-extension")) @@ -100,4 +100,4 @@ object IGVToolsCount { counting.genomeChromSizes = genomeChromSizes return counting } -} \ No newline at end of file +} diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/macs2/Macs2CallPeak.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/macs2/Macs2CallPeak.scala index e6a9c48e925949bdd413a9aca79afbb7e28b91d7..8224fd4713f00d47511156459b02f5b733e1deda 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/macs2/Macs2CallPeak.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/macs2/Macs2CallPeak.scala @@ -55,7 +55,7 @@ class Macs2CallPeak(val root: Configurable) extends Macs2 { var broadcutoff: Option[Int] = config("broadcutoff") var callsummits: Boolean = config("callsummits", default = false) - override def afterGraph: Unit = { + override def beforeGraph: Unit = { if (name.isEmpty) throw new IllegalArgumentException("Name is not defined") if (outputdir == null) throw new IllegalArgumentException("Outputdir is not defined") output_narrow = new File(outputdir + name.get + ".narrowPeak") diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CalculateHsMetrics.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CalculateHsMetrics.scala index a48cd8cfe611e854202f137b2b1dbe5ee2cd295f..4aa970f636ddbbb7216abce5d96ebbbebc8a7598 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CalculateHsMetrics.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CalculateHsMetrics.scala @@ -58,7 +58,7 @@ class CalculateHsMetrics(val root: Configurable) extends Picard { } object CalculateHsMetrics { - def apply(root: Configurable, input: File, baitIntervals: File, targetIntervals: File, outputDir: String): CalculateHsMetrics = { + def apply(root: Configurable, input: File, baitIntervals: File, targetIntervals: File, outputDir: File): CalculateHsMetrics = { val calculateHsMetrics = new CalculateHsMetrics(root) calculateHsMetrics.input = input calculateHsMetrics.baitIntervals = baitIntervals diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectAlignmentSummaryMetrics.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectAlignmentSummaryMetrics.scala index b2553c47e2bf05bc14dd87f667cf80d0165181ce..6606d9e41acc4d5c2b785f905d844c2f91676b72 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectAlignmentSummaryMetrics.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectAlignmentSummaryMetrics.scala @@ -62,7 +62,7 @@ class CollectAlignmentSummaryMetrics(val root: Configurable) extends Picard { } object CollectAlignmentSummaryMetrics { - def apply(root: Configurable, input: File, outputDir: String): CollectAlignmentSummaryMetrics = { + def apply(root: Configurable, input: File, outputDir: File): CollectAlignmentSummaryMetrics = { val collectAlignmentSummaryMetrics = new CollectAlignmentSummaryMetrics(root) collectAlignmentSummaryMetrics.input = input collectAlignmentSummaryMetrics.output = new File(outputDir, input.getName.stripSuffix(".bam") + ".alignmentMetrics") diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectGcBiasMetrics.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectGcBiasMetrics.scala index 9cab9bfc90298f243f6cfdc8e051e7d451a4cf90..86198d2a344489a62b546f5d9c0dd4a8234c929a 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectGcBiasMetrics.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectGcBiasMetrics.scala @@ -49,7 +49,7 @@ class CollectGcBiasMetrics(val root: Configurable) extends Picard { @Argument(doc = "IS_BISULFITE_SEQUENCED", required = false) var isBisulfiteSequinced: Option[Boolean] = config("isbisulfitesequinced") - override def afterGraph { + override def beforeGraph { if (outputChart == null) outputChart = new File(output + ".pdf") //require(reference.exists) } @@ -67,7 +67,7 @@ class CollectGcBiasMetrics(val root: Configurable) extends Picard { } object CollectGcBiasMetrics { - def apply(root: Configurable, input: File, outputDir: String): CollectGcBiasMetrics = { + def apply(root: Configurable, input: File, outputDir: File): CollectGcBiasMetrics = { val collectGcBiasMetrics = new CollectGcBiasMetrics(root) collectGcBiasMetrics.input :+= input collectGcBiasMetrics.output = new File(outputDir, input.getName.stripSuffix(".bam") + ".gcbiasmetrics") diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectInsertSizeMetrics.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectInsertSizeMetrics.scala index 7d7c7ddc2e773601e772ea3e1fc744f97059f5cb..9b1777677a9567d8e88d21f44ec7f913e45341b5 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectInsertSizeMetrics.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectInsertSizeMetrics.scala @@ -54,7 +54,7 @@ class CollectInsertSizeMetrics(val root: Configurable) extends Picard { @Argument(doc = "HISTOGRAM_WIDTH", required = false) var histogramWidth: Option[Int] = config("histogramWidth") - override def afterGraph { + override def beforeGraph { if (outputHistogram == null) outputHistogram = new File(output + ".pdf") //require(reference.exists) } @@ -72,7 +72,7 @@ class CollectInsertSizeMetrics(val root: Configurable) extends Picard { } object CollectInsertSizeMetrics { - def apply(root: Configurable, input: File, outputDir: String): CollectInsertSizeMetrics = { + def apply(root: Configurable, input: File, outputDir: File): CollectInsertSizeMetrics = { val collectInsertSizeMetrics = new CollectInsertSizeMetrics(root) collectInsertSizeMetrics.input = input collectInsertSizeMetrics.output = new File(outputDir, input.getName.stripSuffix(".bam") + ".insertsizemetrics") diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/MarkDuplicates.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/MarkDuplicates.scala index 6df04c12ccb0ad1ca1c4853dac940ec3a304d043..181494ae12235f2fc95bade2098a9d313f6309b8 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/MarkDuplicates.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/MarkDuplicates.scala @@ -70,8 +70,8 @@ class MarkDuplicates(val root: Configurable) extends Picard { @Output(doc = "Bam Index", required = true) private var outputIndex: File = _ - override def afterGraph { - super.afterGraph + override def beforeGraph { + super.beforeGraph if (createIndex) outputIndex = new File(output.getAbsolutePath.stripSuffix(".bam") + ".bai") } diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/MergeSamFiles.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/MergeSamFiles.scala index 4de143498d9812502a2141facfacc0bbf2b046d5..8feaf782ac57aba5a6779e515fe2c6ce452f5871 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/MergeSamFiles.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/MergeSamFiles.scala @@ -53,7 +53,7 @@ class MergeSamFiles(val root: Configurable) extends Picard { } object MergeSamFiles { - def apply(root: Configurable, input: List[File], outputDir: String, sortOrder: String = null): MergeSamFiles = { + def apply(root: Configurable, input: List[File], outputDir: File, sortOrder: String = null): MergeSamFiles = { val mergeSamFiles = new MergeSamFiles(root) mergeSamFiles.input = input mergeSamFiles.output = new File(outputDir, input.head.getName.stripSuffix(".bam").stripSuffix(".sam") + ".merge.bam") diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/SortSam.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/SortSam.scala index 56cfec1dd668c5a4c3bb349276e0fcf25729a040..ef7ec250d5de2bd88c36e48af157ae29bce0a3b0 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/SortSam.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/SortSam.scala @@ -34,8 +34,8 @@ class SortSam(val root: Configurable) extends Picard { @Output(doc = "Bam Index", required = true) private var outputIndex: File = _ - override def afterGraph { - super.afterGraph + override def beforeGraph { + super.beforeGraph if (createIndex) outputIndex = new File(output.getAbsolutePath.stripSuffix(".bam") + ".bai") } diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/scripts/SquishBed.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/scripts/SquishBed.scala index d349b8d1d944a96332e5a01b686271dfd88e36ca..0a86da49ddd2f0c8eb20d6707b6c25e3d33d0c89 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/scripts/SquishBed.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/scripts/SquishBed.scala @@ -35,7 +35,7 @@ class SquishBed(val root: Configurable) extends PythonCommandLineFunction { } object SquishBed { - def apply(root: Configurable, input: File, outputDir: String): SquishBed = { + def apply(root: Configurable, input: File, outputDir: File): SquishBed = { val squishBed = new SquishBed(root) squishBed.input = input squishBed.output = new File(outputDir, input.getName.stripSuffix(".bed") + ".squish.bed") diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/FastqSync.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/FastqSync.scala index aaa2797b65930d197089aaa72d90dd5e4d9382e4..d5f4ad2112326ac36095c8fc5b11979a41e6406d 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/FastqSync.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/FastqSync.scala @@ -51,6 +51,8 @@ class FastqSync(val root: Configurable) extends BiopetJavaCommandLineFunction { @Output(doc = "Sync statistics", required = true) var outputStats: File = _ + override val defaultVmem = "5G" + // executed command line override def commandLine = super.commandLine + diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/MergeAlleles.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/MergeAlleles.scala index c431aa0dac3a5a5a39c8ab676a704ac3645a4ad9..3eec803c8dab66d824a86a71f794d691ebde2ba6 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/MergeAlleles.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/MergeAlleles.scala @@ -49,8 +49,8 @@ class MergeAlleles(val root: Configurable) extends BiopetJavaCommandLineFunction override val defaultVmem = "8G" memoryLimit = Option(4) - override def afterGraph { - super.afterGraph + override def beforeGraph { + super.beforeGraph if (output.getName.endsWith(".gz")) outputIndex = new File(output.getAbsolutePath + ".tbi") if (output.getName.endsWith(".vcf")) outputIndex = new File(output.getAbsolutePath + ".idx") } diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/MpileupToVcf.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/MpileupToVcf.scala index ae21c34e4b820ebfb0b33e3b033196e03a58ef6e..3e9743fb16e7c1d9228307a22ca950ee8f18eb0d 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/MpileupToVcf.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/MpileupToVcf.scala @@ -54,8 +54,8 @@ class MpileupToVcf(val root: Configurable) extends BiopetJavaCommandLineFunction override def defaults = ConfigUtils.mergeMaps(Map("samtoolsmpileup" -> Map("disable_baq" -> true, "min_map_quality" -> 1)), super.defaults) - override def afterGraph { - super.afterGraph + override def beforeGraph { + super.beforeGraph val samtoolsMpileup = new SamtoolsMpileup(this) } diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VcfStats.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VcfStats.scala index 11d930d5972897824e341d22d46808d849d7767b..81a0b83c3c3555aa7108d65821cafb94371e186c 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VcfStats.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VcfStats.scala @@ -21,15 +21,15 @@ class VcfStats(val root: Configurable) extends BiopetJavaCommandLineFunction { @Input(doc = "Input fastq", shortName = "I", required = true) var input: File = _ - protected var outputDir: String = _ + protected var outputDir: File = _ /** * Set output dir and a output file * @param dir */ - def setOutputDir(dir: String): Unit = { + def setOutputDir(dir: File): Unit = { outputDir = dir - this.jobOutputFile = new File(dir + File.separator + ".vcfstats.out") + this.jobOutputFile = new File(dir, ".vcfstats.out") } /** diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/utils/ConfigUtils.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/utils/ConfigUtils.scala index 2c9165036824f135f9552b69fa3b4085be25fc93..2c9ae7e6a87b977001ff94668db1d768511b378f 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/utils/ConfigUtils.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/utils/ConfigUtils.scala @@ -33,7 +33,8 @@ object ConfigUtils extends Logging { * @param map2 Backup for map1 * @return merged map */ - def mergeMaps(map1: Map[String, Any], map2: Map[String, Any]): Map[String, Any] = { + def mergeMaps(map1: Map[String, Any], map2: Map[String, Any], + resolveConflict: (Any, Any, String) => Any = (m1, m2, key) => m1): Map[String, Any] = { var newMap: Map[String, Any] = Map() for (key <- map1.keySet.++(map2.keySet)) { if (!map2.contains(key)) newMap += (key -> map1(key)) @@ -46,7 +47,7 @@ object ConfigUtils extends Logging { case _ => newMap += (key -> map1(key)) } } - case _ => newMap += (key -> map1(key)) + case _ => newMap += (key -> resolveConflict(map1(key), map2(key), key)) } } } diff --git a/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/core/config/ConfigTest.scala b/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/core/config/ConfigTest.scala index b7353b2ad1d4a91cd05340edbd7a61a51e64e266..0adc3a09b554ea489c9e0c10e0f25d7d785a3bdd 100644 --- a/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/core/config/ConfigTest.scala +++ b/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/core/config/ConfigTest.scala @@ -1,9 +1,7 @@ package nl.lumc.sasc.biopet.core.config -import nl.lumc.sasc.biopet.utils.ConfigUtils._ import nl.lumc.sasc.biopet.utils.{ ConfigUtilsTest, ConfigUtils } import org.scalatest.Matchers -import org.scalatest.mock.MockitoSugar import org.scalatest.testng.TestNGSuite import org.testng.annotations.{ DataProvider, Test } diff --git a/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/utils/ConfigUtilsTest.scala b/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/utils/ConfigUtilsTest.scala index ddcf887d819eb0edc09bfc5b6a42954115e62192..3a53f91f0462a27d81d3dd83a449f94c1d31d514 100644 --- a/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/utils/ConfigUtilsTest.scala +++ b/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/utils/ConfigUtilsTest.scala @@ -4,10 +4,8 @@ import java.io.{ PrintWriter, File } import argonaut.Argonaut._ import argonaut.Json -import nl.lumc.sasc.biopet.core.BiopetQScript import nl.lumc.sasc.biopet.core.config.{ ConfigValueIndex, ConfigValue } import org.scalatest.Matchers -import org.scalatest.mock.MockitoSugar import org.scalatest.testng.TestNGSuite import org.testng.annotations.Test diff --git a/public/biopet-public-package/src/test/resources/log4j.properties b/public/biopet-public-package/src/test/resources/log4j.properties new file mode 100644 index 0000000000000000000000000000000000000000..501af67582a546db584c8538b28cb6f9e07f1692 --- /dev/null +++ b/public/biopet-public-package/src/test/resources/log4j.properties @@ -0,0 +1,25 @@ +# +# Biopet is built on top of GATK Queue for building bioinformatic +# pipelines. It is mainly intended to support LUMC SHARK cluster which is running +# SGE. But other types of HPC that are supported by GATK Queue (such as PBS) +# should also be able to execute Biopet tools and pipelines. +# +# Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center +# +# Contact us at: sasc@lumc.nl +# +# A dual licensing mode is applied. The source code within this project that are +# not part of GATK Queue is freely available for non-commercial use under an AGPL +# license; For commercial users or users who do not want to follow the AGPL +# license, please contact us to obtain a separate license. +# + +# Set root logger level to DEBUG and its only appender to A1. +log4j.rootLogger=ERROR, A1 + +# A1 is set to be a ConsoleAppender. +log4j.appender.A1=org.apache.log4j.ConsoleAppender + +# A1 uses PatternLayout. +log4j.appender.A1.layout=org.apache.log4j.PatternLayout +log4j.appender.A1.layout.ConversionPattern=%-5p [%d] [%C{1}] - %m%n \ No newline at end of file diff --git a/public/carp/pom.xml b/public/carp/pom.xml index 482b83147312cf332cc7325c6a9196c092dcd91a..e6e0f86fc2bfed50f8361ccd18f450c84f3b2158 100644 --- a/public/carp/pom.xml +++ b/public/carp/pom.xml @@ -44,5 +44,17 @@ <artifactId>Mapping</artifactId> <version>${project.version}</version> </dependency> + <dependency> + <groupId>org.testng</groupId> + <artifactId>testng</artifactId> + <version>6.8</version> + <scope>test</scope> + </dependency> + <dependency> + <groupId>org.scalatest</groupId> + <artifactId>scalatest_2.11</artifactId> + <version>2.2.1</version> + <scope>test</scope> + </dependency> </dependencies> </project> diff --git a/public/carp/src/main/scala/nl/lumc/sasc/biopet/pipelines/carp/Carp.scala b/public/carp/src/main/scala/nl/lumc/sasc/biopet/pipelines/carp/Carp.scala index 763b7b9c616e5b301735e6ef387a30df6f510526..6868704b3d35dc8ef9fde30a02fdb009cd9c74a8 100644 --- a/public/carp/src/main/scala/nl/lumc/sasc/biopet/pipelines/carp/Carp.scala +++ b/public/carp/src/main/scala/nl/lumc/sasc/biopet/pipelines/carp/Carp.scala @@ -39,7 +39,7 @@ class Carp(val root: Configurable) extends QScript with MultiSampleQScript { def this() = this(null) override def defaults = ConfigUtils.mergeMaps(Map( - "mapping" -> Map("skip_markduplicates" -> true) + "mapping" -> Map("skip_markduplicates" -> true, "aligner" -> "bwa") ), super.defaults) def makeSample(id: String) = new Sample(id) diff --git a/public/carp/src/test/resources/log4j.properties b/public/carp/src/test/resources/log4j.properties new file mode 100644 index 0000000000000000000000000000000000000000..501af67582a546db584c8538b28cb6f9e07f1692 --- /dev/null +++ b/public/carp/src/test/resources/log4j.properties @@ -0,0 +1,25 @@ +# +# Biopet is built on top of GATK Queue for building bioinformatic +# pipelines. It is mainly intended to support LUMC SHARK cluster which is running +# SGE. But other types of HPC that are supported by GATK Queue (such as PBS) +# should also be able to execute Biopet tools and pipelines. +# +# Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center +# +# Contact us at: sasc@lumc.nl +# +# A dual licensing mode is applied. The source code within this project that are +# not part of GATK Queue is freely available for non-commercial use under an AGPL +# license; For commercial users or users who do not want to follow the AGPL +# license, please contact us to obtain a separate license. +# + +# Set root logger level to DEBUG and its only appender to A1. +log4j.rootLogger=ERROR, A1 + +# A1 is set to be a ConsoleAppender. +log4j.appender.A1=org.apache.log4j.ConsoleAppender + +# A1 uses PatternLayout. +log4j.appender.A1.layout=org.apache.log4j.PatternLayout +log4j.appender.A1.layout.ConversionPattern=%-5p [%d] [%C{1}] - %m%n \ No newline at end of file diff --git a/public/carp/src/test/scala/nl/lumc/sasc/biopet/pipelines/carp/CarpTest.scala b/public/carp/src/test/scala/nl/lumc/sasc/biopet/pipelines/carp/CarpTest.scala new file mode 100644 index 0000000000000000000000000000000000000000..da63e765dd64f39af329973c1ca513ba659f7f1d --- /dev/null +++ b/public/carp/src/test/scala/nl/lumc/sasc/biopet/pipelines/carp/CarpTest.scala @@ -0,0 +1,139 @@ +package nl.lumc.sasc.biopet.pipelines.carp + +import java.io.File + +import nl.lumc.sasc.biopet.core.config.Config +import nl.lumc.sasc.biopet.extensions.bwa.BwaMem +import nl.lumc.sasc.biopet.extensions.macs2.Macs2CallPeak +import nl.lumc.sasc.biopet.extensions.picard.{ MergeSamFiles, SortSam } +import nl.lumc.sasc.biopet.utils.ConfigUtils +import org.broadinstitute.gatk.queue.QSettings +import org.testng.annotations.{ Test, DataProvider } +import org.scalatest.Matchers +import org.scalatest.testng.TestNGSuite + +/** + * Created by pjvan_thof on 2/13/15. + */ +class CarpTest extends TestNGSuite with Matchers { + def initPipeline(map: Map[String, Any]): Carp = { + new Carp() { + override def configName = "carp" + override def globalConfig = new Config(map) + qSettings = new QSettings + qSettings.runName = "test" + } + } + + @DataProvider(name = "carpOptions", parallel = true) + def carpOptions = { + val bool = Array(true, false) + + for (s1 <- bool; s2 <- bool; s3 <- bool; t <- bool; c <- bool) yield Array("", s1, s2, s3, t, c) + } + + @Test(dataProvider = "carpOptions") + def testCarp(f: String, sample1: Boolean, sample2: Boolean, sample3: Boolean, threatment: Boolean, control: Boolean): Unit = { + val map = { + var m = ConfigUtils.mergeMaps(Map("output_dir" -> CarpTest.outputDir + ), CarpTest.excutables) + + if (sample1) m = ConfigUtils.mergeMaps(CarpTest.sample1, m.toMap) + if (sample2) m = ConfigUtils.mergeMaps(CarpTest.sample2, m.toMap) + if (sample3) m = ConfigUtils.mergeMaps(CarpTest.sample3, m.toMap) + if (threatment) m = ConfigUtils.mergeMaps(CarpTest.threatment1, m.toMap) + if (control) m = ConfigUtils.mergeMaps(CarpTest.control1, m.toMap) + m + } + + if (!sample1 && !sample2 && !sample3 && !threatment && !control) { // When no samples + intercept[IllegalArgumentException] { + initPipeline(map).script() + } + } else if (threatment && !control) { // If control of a samples does not exist in samples + intercept[IllegalStateException] { + initPipeline(map).script() + } + } else { // When samples are correct + val carp = initPipeline(map) + carp.script() + val numberLibs = (if (sample1) 1 else 0) + (if (sample2) 1 else 0) + (if (sample3) 2 else 0) + + (if (threatment) 1 else 0) + (if (control) 1 else 0) + val numberSamples = (if (sample1) 1 else 0) + (if (sample2) 1 else 0) + (if (sample3) 1 else 0) + + (if (threatment) 1 else 0) + (if (control) 1 else 0) + + carp.functions.count(_.isInstanceOf[BwaMem]) shouldBe numberLibs + carp.functions.count(_.isInstanceOf[SortSam]) shouldBe numberLibs + carp.functions.count(_.isInstanceOf[MergeSamFiles]) shouldBe (if (sample3) 1 else 0) + + carp.functions.count(_.isInstanceOf[Macs2CallPeak]) shouldBe (numberSamples + (if (threatment) 1 else 0)) + } + } +} + +object CarpTest { + val outputDir = System.getProperty("java.io.tmpdir") + File.separator + "flexiprep" + + val excutables = Map( + "reference" -> "test", + "seqstat" -> Map("exe" -> "test"), + "fastqc" -> Map("exe" -> "test"), + "seqtk" -> Map("exe" -> "test"), + "sickle" -> Map("exe" -> "test"), + "bwa" -> Map("exe" -> "test"), + "samtools" -> Map("exe" -> "test"), + "macs2" -> Map("exe" -> "test"), + "igvtools" -> Map("exe" -> "test"), + "wigtobigwig" -> Map("exe" -> "test") + ) + + val sample1 = Map( + "samples" -> Map("sample1" -> Map("libraries" -> Map( + "lib1" -> Map( + "R1" -> "1_1_R1.fq", + "R2" -> "1_1_R2.fq" + ) + ) + ))) + + val sample2 = Map( + "samples" -> Map("sample2" -> Map("libraries" -> Map( + "lib1" -> Map( + "R1" -> "2_1_R1.fq", + "R2" -> "2_1_R2.fq" + ) + ) + ))) + + val sample3 = Map( + "samples" -> Map("sample3" -> Map("libraries" -> Map( + "lib1" -> Map( + "R1" -> "3_1_R1.fq", + "R2" -> "3_1_R2.fq" + ), + "lib2" -> Map( + "R1" -> "3_2_R1.fq", + "R2" -> "3_2_R2.fq" + ) + ) + ))) + + val threatment1 = Map( + "samples" -> Map("threatment" -> Map("control" -> "control1", "libraries" -> Map( + "lib1" -> Map( + "R1" -> "threatment_1_R1.fq", + "R2" -> "threatment_1_R2.fq" + ) + ) + ))) + + val control1 = Map( + "samples" -> Map("control1" -> Map("libraries" -> Map( + "lib1" -> Map( + "R1" -> "control_1_R1.fq", + "R2" -> "control_1_R2.fq" + ) + ) + ))) + +} \ No newline at end of file diff --git a/public/flexiprep/Flexiprep.iml b/public/flexiprep/Flexiprep.iml index 54fb966499dc08774dd77641ac8d9286ecddbc96..791476d636c79826a9b7ef3f4cd4ca2d196a28bb 100644 --- a/public/flexiprep/Flexiprep.iml +++ b/public/flexiprep/Flexiprep.iml @@ -6,6 +6,8 @@ <content url="file://$MODULE_DIR$"> <sourceFolder url="file://$MODULE_DIR$/src/main/resources" type="java-resource" /> <sourceFolder url="file://$MODULE_DIR$/src/main/scala" isTestSource="false" /> + <sourceFolder url="file://$MODULE_DIR$/src/test/resources" type="java-test-resource" /> + <sourceFolder url="file://$MODULE_DIR$/src/test/scala" isTestSource="true" /> <excludeFolder url="file://$MODULE_DIR$/target" /> </content> <orderEntry type="inheritedJdk" /> @@ -16,7 +18,6 @@ <orderEntry type="library" name="Maven: org.broadinstitute.gatk:gatk-tools-public:3.3" level="project" /> <orderEntry type="library" name="Maven: org.broadinstitute.gatk:gatk-engine:3.3" level="project" /> <orderEntry type="library" name="Maven: org.scala-lang:scala-compiler:2.10.2" level="project" /> - <orderEntry type="library" name="Maven: org.scala-lang:scala-reflect:2.10.2" level="project" /> <orderEntry type="library" name="Maven: log4j:log4j:1.2.15" level="project" /> <orderEntry type="library" name="Maven: net.sf.jgrapht:jgrapht:0.8.3" level="project" /> <orderEntry type="library" name="Maven: org.apache.commons:commons-email:1.2" level="project" /> @@ -82,11 +83,19 @@ <orderEntry type="library" name="Maven: io.argonaut:argonaut_2.11:6.1-M4" level="project" /> <orderEntry type="library" name="Maven: org.scalaz:scalaz-core_2.11:7.1.0" level="project" /> <orderEntry type="library" name="Maven: org.scala-lang.modules:scala-parser-combinators_2.11:1.0.2" level="project" /> - <orderEntry type="library" name="Maven: org.scala-lang.modules:scala-xml_2.11:1.0.2" level="project" /> <orderEntry type="library" name="Maven: com.github.julien-truffaut:monocle-core_2.11:0.5.0" level="project" /> <orderEntry type="library" name="Maven: org.biojava:biojava3-core:3.1.0" level="project" /> <orderEntry type="library" name="Maven: org.biojava:biojava3-sequencing:3.1.0" level="project" /> <orderEntry type="library" name="Maven: com.google.guava:guava:18.0" level="project" /> <orderEntry type="library" name="Maven: com.github.scopt:scopt_2.10:3.2.0" level="project" /> + <orderEntry type="library" scope="TEST" name="Maven: org.testng:testng:6.8" level="project" /> + <orderEntry type="library" name="Maven: junit:junit:4.10" level="project" /> + <orderEntry type="library" name="Maven: org.hamcrest:hamcrest-core:1.1" level="project" /> + <orderEntry type="library" scope="TEST" name="Maven: org.beanshell:bsh:2.0b4" level="project" /> + <orderEntry type="library" scope="TEST" name="Maven: com.beust:jcommander:1.27" level="project" /> + <orderEntry type="library" scope="TEST" name="Maven: org.yaml:snakeyaml:1.6" level="project" /> + <orderEntry type="library" scope="TEST" name="Maven: org.scalatest:scalatest_2.11:2.2.1" level="project" /> + <orderEntry type="library" name="Maven: org.scala-lang:scala-reflect:2.11.2" level="project" /> + <orderEntry type="library" name="Maven: org.scala-lang.modules:scala-xml_2.11:1.0.2" level="project" /> </component> </module> \ No newline at end of file diff --git a/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Fastqc.scala b/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Fastqc.scala index b9574333ef16d877eb50c4e85d9c5173dc888426..3233b3a731eff74aa9882dc28e60654b3bc26efa 100644 --- a/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Fastqc.scala +++ b/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Fastqc.scala @@ -170,7 +170,7 @@ class Fastqc(root: Configurable) extends nl.lumc.sasc.biopet.extensions.Fastqc(r object Fastqc { - def apply(root: Configurable, fastqfile: File, outDir: String): Fastqc = { + def apply(root: Configurable, fastqfile: File, outDir: File): Fastqc = { val fastqcCommand = new Fastqc(root) fastqcCommand.fastqfile = fastqfile var filename: String = fastqfile.getName() @@ -178,8 +178,8 @@ object Fastqc { if (filename.endsWith(".gzip")) filename = filename.substring(0, filename.size - 5) if (filename.endsWith(".fastq")) filename = filename.substring(0, filename.size - 6) //if (filename.endsWith(".fq")) filename = filename.substring(0,filename.size - 3) - fastqcCommand.output = new File(outDir + "/" + filename + "_fastqc.zip") - fastqcCommand.afterGraph + fastqcCommand.output = new File(outDir, filename + "_fastqc.zip") + fastqcCommand.beforeGraph fastqcCommand } } diff --git a/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Flexiprep.scala b/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Flexiprep.scala index 531ed8bbb219da0327795eb295dab842531abf85..0d8c8ee51ccc7d4c7c5b7d84cdc66d5135b223e8 100644 --- a/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Flexiprep.scala +++ b/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Flexiprep.scala @@ -90,7 +90,7 @@ class Flexiprep(val root: Configurable) extends QScript with BiopetQScript with case _ => } - summary.out = outputDir + sampleId + "-" + libId + ".qc.summary.json" + summary.out = new File(outputDir, sampleId + "-" + libId + ".qc.summary.json") } def biopetScript() { @@ -108,7 +108,7 @@ class Flexiprep(val root: Configurable) extends QScript with BiopetQScript with outputFiles += ("fastq_input_R1" -> extractIfNeeded(input_R1, outputDir)) if (paired) outputFiles += ("fastq_input_R2" -> extractIfNeeded(input_R2.get, outputDir)) - fastqc_R1 = Fastqc(this, input_R1, outputDir + "/" + R1_name + ".fastqc/") + fastqc_R1 = Fastqc(this, input_R1, new File(outputDir, R1_name + ".fastqc/")) add(fastqc_R1) summary.addFastqc(fastqc_R1) outputFiles += ("fastqc_R1" -> fastqc_R1.output) @@ -118,7 +118,7 @@ class Flexiprep(val root: Configurable) extends QScript with BiopetQScript with summary.addMd5sum(md5sum_R1, R2 = false, after = false) if (paired) { - fastqc_R2 = Fastqc(this, input_R2.get, outputDir + "/" + R2_name + ".fastqc/") + fastqc_R2 = Fastqc(this, input_R2.get, new File(outputDir, R2_name + ".fastqc/")) add(fastqc_R2) summary.addFastqc(fastqc_R2, R2 = true) outputFiles += ("fastqc_R2" -> fastqc_R2.output) @@ -129,16 +129,16 @@ class Flexiprep(val root: Configurable) extends QScript with BiopetQScript with } } - def runTrimClip(R1_in: File, outDir: String, chunk: String): (File, File, List[File]) = { + def runTrimClip(R1_in: File, outDir: File, chunk: String): (File, File, List[File]) = { runTrimClip(R1_in, new File(""), outDir, chunk) } - def runTrimClip(R1_in: File, outDir: String): (File, File, List[File]) = { + def runTrimClip(R1_in: File, outDir: File): (File, File, List[File]) = { runTrimClip(R1_in, new File(""), outDir, "") } - def runTrimClip(R1_in: File, R2_in: File, outDir: String): (File, File, List[File]) = { + def runTrimClip(R1_in: File, R2_in: File, outDir: File): (File, File, List[File]) = { runTrimClip(R1_in, R2_in, outDir, "") } - def runTrimClip(R1_in: File, R2_in: File, outDir: String, chunkarg: String): (File, File, List[File]) = { + def runTrimClip(R1_in: File, R2_in: File, outDir: File, chunkarg: String): (File, File, List[File]) = { val chunk = if (chunkarg.isEmpty || chunkarg.endsWith("_")) chunkarg else chunkarg + "_" var results: Map[String, File] = Map() @@ -248,8 +248,8 @@ class Flexiprep(val root: Configurable) extends QScript with BiopetQScript with def runFinalize(fastq_R1: List[File], fastq_R2: List[File]) { if (fastq_R1.length != fastq_R2.length && paired) throw new IllegalStateException("R1 and R2 file number is not the same") - val R1 = new File(outputDir + R1_name + ".qc" + R1_ext + ".gz") - val R2 = new File(outputDir + R2_name + ".qc" + R2_ext + ".gz") + val R1 = new File(outputDir, R1_name + ".qc" + R1_ext + ".gz") + val R2 = new File(outputDir, R2_name + ".qc" + R2_ext + ".gz") add(Gzip(this, fastq_R1, R1)) if (paired) add(Gzip(this, fastq_R2, R2)) @@ -266,12 +266,12 @@ class Flexiprep(val root: Configurable) extends QScript with BiopetQScript with add(md5sum_R2) summary.addMd5sum(md5sum_R2, R2 = true, after = true) } - fastqc_R1_after = Fastqc(this, R1, outputDir + "/" + R1_name + ".qc.fastqc/") + fastqc_R1_after = Fastqc(this, R1, new File(outputDir, R1_name + ".qc.fastqc/")) addSummarizable(fastqc_R1_after) add(fastqc_R1_after) summary.addFastqc(fastqc_R1_after, after = true) if (paired) { - fastqc_R2_after = Fastqc(this, R2, outputDir + "/" + R2_name + ".qc.fastqc/") + fastqc_R2_after = Fastqc(this, R2, new File(outputDir, R2_name + ".qc.fastqc/")) add(fastqc_R2_after) summary.addFastqc(fastqc_R2_after, R2 = true, after = true) } @@ -281,7 +281,7 @@ class Flexiprep(val root: Configurable) extends QScript with BiopetQScript with addSummaryJobs } - def extractIfNeeded(file: File, runDir: String): File = { + def extractIfNeeded(file: File, runDir: File): File = { if (file == null) return file else if (file.getName().endsWith(".gz") || file.getName().endsWith(".gzip")) { var newFile: File = swapExt(runDir, file, ".gz", "") diff --git a/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/SeqtkSeq.scala b/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/SeqtkSeq.scala index 05236e13cacae24e5b5dffb408e1aa01b1c297e4..03144feee45b395504864abfadff03e50713429d 100644 --- a/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/SeqtkSeq.scala +++ b/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/SeqtkSeq.scala @@ -38,7 +38,7 @@ class SeqtkSeq(root: Configurable) extends nl.lumc.sasc.biopet.extensions.seqtk. } } - override def afterGraph { + override def beforeGraph { if (fastqc != null) deps ::= fastqc.output } diff --git a/public/flexiprep/src/test/resources/log4j.properties b/public/flexiprep/src/test/resources/log4j.properties new file mode 100644 index 0000000000000000000000000000000000000000..501af67582a546db584c8538b28cb6f9e07f1692 --- /dev/null +++ b/public/flexiprep/src/test/resources/log4j.properties @@ -0,0 +1,25 @@ +# +# Biopet is built on top of GATK Queue for building bioinformatic +# pipelines. It is mainly intended to support LUMC SHARK cluster which is running +# SGE. But other types of HPC that are supported by GATK Queue (such as PBS) +# should also be able to execute Biopet tools and pipelines. +# +# Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center +# +# Contact us at: sasc@lumc.nl +# +# A dual licensing mode is applied. The source code within this project that are +# not part of GATK Queue is freely available for non-commercial use under an AGPL +# license; For commercial users or users who do not want to follow the AGPL +# license, please contact us to obtain a separate license. +# + +# Set root logger level to DEBUG and its only appender to A1. +log4j.rootLogger=ERROR, A1 + +# A1 is set to be a ConsoleAppender. +log4j.appender.A1=org.apache.log4j.ConsoleAppender + +# A1 uses PatternLayout. +log4j.appender.A1.layout=org.apache.log4j.PatternLayout +log4j.appender.A1.layout.ConversionPattern=%-5p [%d] [%C{1}] - %m%n \ No newline at end of file diff --git a/public/flexiprep/src/test/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/FlexiprepTest.scala b/public/flexiprep/src/test/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/FlexiprepTest.scala new file mode 100644 index 0000000000000000000000000000000000000000..1fcaab37ba5f1518dc03650cf35568bf44667ce5 --- /dev/null +++ b/public/flexiprep/src/test/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/FlexiprepTest.scala @@ -0,0 +1,80 @@ +package nl.lumc.sasc.biopet.pipelines.flexiprep + +import java.io.File + +import nl.lumc.sasc.biopet.core.config.Config +import nl.lumc.sasc.biopet.extensions.{ Gzip, Zcat } +import nl.lumc.sasc.biopet.tools.FastqSync +import nl.lumc.sasc.biopet.utils.ConfigUtils +import org.broadinstitute.gatk.queue.QSettings +import org.broadinstitute.gatk.queue.function.QFunction +import org.scalatest.Matchers +import org.scalatest.testng.TestNGSuite +import org.testng.annotations.{ DataProvider, Test } + +/** + * Created by pjvan_thof on 2/11/15. + */ +class FlexiprepTest extends TestNGSuite with Matchers { + + def initPipeline(map: Map[String, Any]): Flexiprep = { + new Flexiprep() { + override def configName = "flexiprep" + override def globalConfig = new Config(map) + qSettings = new QSettings + qSettings.runName = "test" + } + } + + @DataProvider(name = "flexiprepOptions", parallel = true) + def flexiprepOptions = { + val paired = Array(true, false) + val skipTrims = Array(true, false) + val skipClips = Array(true, false) + val zipped = Array(true, false) + + for ( + pair <- paired; + skipTrim <- skipTrims; + skipClip <- skipClips; + zip <- zipped + ) yield Array("", pair, skipTrim, skipClip, zip) + } + + @Test(dataProvider = "flexiprepOptions") + def testFlexiprep(f: String, paired: Boolean, skipTrim: Boolean, skipClip: Boolean, zipped: Boolean) = { + val map = ConfigUtils.mergeMaps(Map("output_dir" -> FlexiprepTest.outputDir, + "skip_trim" -> skipTrim, + "skip_clip" -> skipClip + ), Map(FlexiprepTest.excutables.toSeq: _*)) + val flexiprep: Flexiprep = initPipeline(map) + + flexiprep.input_R1 = new File(flexiprep.outputDir, "bla_R1.fq" + (if (zipped) ".gz" else "")) + if (paired) flexiprep.input_R2 = Some(new File(flexiprep.outputDir, "bla_R2.fq" + (if (zipped) ".gz" else ""))) + flexiprep.sampleId = "1" + flexiprep.libId = "1" + flexiprep.script() + + flexiprep.functions.count(_.isInstanceOf[Fastqc]) shouldBe ( + if (paired && (skipClip && skipTrim)) 2 + else if (!paired && (skipClip && skipTrim)) 1 + else if (paired && !(skipClip && skipTrim)) 4 + else if (!paired && !(skipClip && skipTrim)) 2) + flexiprep.functions.count(_.isInstanceOf[Zcat]) shouldBe (if (zipped) (if (paired) 2 else 1) else 0) + flexiprep.functions.count(_.isInstanceOf[SeqtkSeq]) shouldBe (if (paired) 2 else 1) + flexiprep.functions.count(_.isInstanceOf[Cutadapt]) shouldBe (if (skipClip) 0 else (if (paired) 2 else 1)) + flexiprep.functions.count(_.isInstanceOf[FastqSync]) shouldBe (if (skipClip) 0 else (if (paired) 1 else 0)) + flexiprep.functions.count(_.isInstanceOf[Sickle]) shouldBe (if (skipTrim) 0 else 1) + flexiprep.functions.count(_.isInstanceOf[Gzip]) shouldBe (if (paired) 2 else 1) + } +} +object FlexiprepTest { + val outputDir = System.getProperty("java.io.tmpdir") + File.separator + "flexiprep" + + val excutables = Map( + "seqstat" -> Map("exe" -> "test"), + "fastqc" -> Map("exe" -> "test"), + "seqtk" -> Map("exe" -> "test"), + "sickle" -> Map("exe" -> "test") + ) +} \ No newline at end of file diff --git a/public/gentrap/src/test/resources/log4j.properties b/public/gentrap/src/test/resources/log4j.properties new file mode 100644 index 0000000000000000000000000000000000000000..501af67582a546db584c8538b28cb6f9e07f1692 --- /dev/null +++ b/public/gentrap/src/test/resources/log4j.properties @@ -0,0 +1,25 @@ +# +# Biopet is built on top of GATK Queue for building bioinformatic +# pipelines. It is mainly intended to support LUMC SHARK cluster which is running +# SGE. But other types of HPC that are supported by GATK Queue (such as PBS) +# should also be able to execute Biopet tools and pipelines. +# +# Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center +# +# Contact us at: sasc@lumc.nl +# +# A dual licensing mode is applied. The source code within this project that are +# not part of GATK Queue is freely available for non-commercial use under an AGPL +# license; For commercial users or users who do not want to follow the AGPL +# license, please contact us to obtain a separate license. +# + +# Set root logger level to DEBUG and its only appender to A1. +log4j.rootLogger=ERROR, A1 + +# A1 is set to be a ConsoleAppender. +log4j.appender.A1=org.apache.log4j.ConsoleAppender + +# A1 uses PatternLayout. +log4j.appender.A1.layout=org.apache.log4j.PatternLayout +log4j.appender.A1.layout.ConversionPattern=%-5p [%d] [%C{1}] - %m%n \ No newline at end of file diff --git a/public/kopisu/src/test/resources/log4j.properties b/public/kopisu/src/test/resources/log4j.properties new file mode 100644 index 0000000000000000000000000000000000000000..501af67582a546db584c8538b28cb6f9e07f1692 --- /dev/null +++ b/public/kopisu/src/test/resources/log4j.properties @@ -0,0 +1,25 @@ +# +# Biopet is built on top of GATK Queue for building bioinformatic +# pipelines. It is mainly intended to support LUMC SHARK cluster which is running +# SGE. But other types of HPC that are supported by GATK Queue (such as PBS) +# should also be able to execute Biopet tools and pipelines. +# +# Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center +# +# Contact us at: sasc@lumc.nl +# +# A dual licensing mode is applied. The source code within this project that are +# not part of GATK Queue is freely available for non-commercial use under an AGPL +# license; For commercial users or users who do not want to follow the AGPL +# license, please contact us to obtain a separate license. +# + +# Set root logger level to DEBUG and its only appender to A1. +log4j.rootLogger=ERROR, A1 + +# A1 is set to be a ConsoleAppender. +log4j.appender.A1=org.apache.log4j.ConsoleAppender + +# A1 uses PatternLayout. +log4j.appender.A1.layout=org.apache.log4j.PatternLayout +log4j.appender.A1.layout.ConversionPattern=%-5p [%d] [%C{1}] - %m%n \ No newline at end of file diff --git a/public/mapping/pom.xml b/public/mapping/pom.xml index ef2604bb240a7863896ecdc1fb4a49a138c748c7..71d0de73f7a4e6cda61bd668628b19776e929fe9 100644 --- a/public/mapping/pom.xml +++ b/public/mapping/pom.xml @@ -49,6 +49,18 @@ <artifactId>BamMetrics</artifactId> <version>${project.version}</version> </dependency> + <dependency> + <groupId>org.testng</groupId> + <artifactId>testng</artifactId> + <version>6.8</version> + <scope>test</scope> + </dependency> + <dependency> + <groupId>org.scalatest</groupId> + <artifactId>scalatest_2.11</artifactId> + <version>2.2.1</version> + <scope>test</scope> + </dependency> <dependency> <groupId>nl.lumc.sasc</groupId> <artifactId>Bam2Wig</artifactId> diff --git a/public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.scala b/public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.scala index 57a64f51fac807a83c6da86c99466b2e2f43d057..e6e483ab4b24cf2146c23946d12932f41021b269 100644 --- a/public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.scala +++ b/public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.scala @@ -97,7 +97,7 @@ class Mapping(val root: Configurable) extends QScript with BiopetQScript { protected var paired: Boolean = false val flexiprep = new Flexiprep(this) - def finalBamFile: File = outputDir + outputName + ".final.bam" + def finalBamFile: File = new File(outputDir, outputName + ".final.bam") def init() { require(outputDir != null, "Missing output directory on mapping module") @@ -128,7 +128,7 @@ class Mapping(val root: Configurable) extends QScript with BiopetQScript { def biopetScript() { if (!skipFlexiprep) { - flexiprep.outputDir = outputDir + "flexiprep" + File.separator + flexiprep.outputDir = new File(outputDir, "flexiprep") flexiprep.input_R1 = input_R1 flexiprep.input_R2 = input_R2 flexiprep.sampleId = this.sampleId @@ -145,9 +145,9 @@ class Mapping(val root: Configurable) extends QScript with BiopetQScript { else if (file.endsWith(".gzip")) return file.substring(0, file.lastIndexOf(".gzip")) else return file } - var chunks: Map[String, (String, String)] = Map() + var chunks: Map[File, (String, String)] = Map() if (chunking) for (t <- 1 to numberChunks.getOrElse(1)) { - val chunkDir = outputDir + "chunks/" + t + "/" + val chunkDir = new File(outputDir, "chunks" + File.separator + t) chunks += (chunkDir -> (removeGz(chunkDir + input_R1.getName), if (paired) removeGz(chunkDir + input_R2.get.getName) else "")) } @@ -177,7 +177,7 @@ class Mapping(val root: Configurable) extends QScript with BiopetQScript { var R2 = fastqfile._2 var deps: List[File] = Nil if (!skipFlexiprep) { - val flexiout = flexiprep.runTrimClip(R1, R2, chunkDir + "flexiprep/", chunkDir) + val flexiout = flexiprep.runTrimClip(R1, R2, new File(chunkDir, "flexiprep"), chunkDir) logger.debug(chunkDir + " - " + flexiout) R1 = flexiout._1 if (paired) R2 = flexiout._2 @@ -186,7 +186,7 @@ class Mapping(val root: Configurable) extends QScript with BiopetQScript { fastq_R2_output :+= R2 } - val outputBam = new File(chunkDir + outputName + ".bam") + val outputBam = new File(chunkDir, outputName + ".bam") bamFiles :+= outputBam aligner match { case "bwa" => addBwaMem(R1, R2, outputBam, deps) @@ -198,7 +198,7 @@ class Mapping(val root: Configurable) extends QScript with BiopetQScript { case _ => throw new IllegalStateException("Option Aligner: '" + aligner + "' is not valid") } if (config("chunk_metrics", default = false)) - addAll(BamMetrics(this, outputBam, chunkDir + "metrics/").functions) + addAll(BamMetrics(this, outputBam, new File(chunkDir, "metrics")).functions) } if (!skipFlexiprep) { flexiprep.runFinalize(fastq_R1_output, fastq_R2_output) @@ -207,7 +207,7 @@ class Mapping(val root: Configurable) extends QScript with BiopetQScript { var bamFile = bamFiles.head if (!skipMarkduplicates) { - bamFile = new File(outputDir + outputName + ".dedup.bam") + bamFile = new File(outputDir, outputName + ".dedup.bam") add(MarkDuplicates(this, bamFiles, bamFile)) } else if (skipMarkduplicates && chunking) { val mergeSamFile = MergeSamFiles(this, bamFiles, outputDir) @@ -215,7 +215,7 @@ class Mapping(val root: Configurable) extends QScript with BiopetQScript { bamFile = mergeSamFile.output } - if (!skipMetrics) addAll(BamMetrics(this, bamFile, outputDir + "metrics" + File.separator).functions) + if (!skipMetrics) addAll(BamMetrics(this, bamFile, new File(outputDir, "metrics")).functions) add(Ln(this, swapExt(outputDir, bamFile, ".bam", ".bai"), swapExt(outputDir, finalBamFile, ".bam", ".bai"))) add(Ln(this, bamFile, finalBamFile)) diff --git a/public/mapping/src/test/resources/log4j.properties b/public/mapping/src/test/resources/log4j.properties new file mode 100644 index 0000000000000000000000000000000000000000..501af67582a546db584c8538b28cb6f9e07f1692 --- /dev/null +++ b/public/mapping/src/test/resources/log4j.properties @@ -0,0 +1,25 @@ +# +# Biopet is built on top of GATK Queue for building bioinformatic +# pipelines. It is mainly intended to support LUMC SHARK cluster which is running +# SGE. But other types of HPC that are supported by GATK Queue (such as PBS) +# should also be able to execute Biopet tools and pipelines. +# +# Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center +# +# Contact us at: sasc@lumc.nl +# +# A dual licensing mode is applied. The source code within this project that are +# not part of GATK Queue is freely available for non-commercial use under an AGPL +# license; For commercial users or users who do not want to follow the AGPL +# license, please contact us to obtain a separate license. +# + +# Set root logger level to DEBUG and its only appender to A1. +log4j.rootLogger=ERROR, A1 + +# A1 is set to be a ConsoleAppender. +log4j.appender.A1=org.apache.log4j.ConsoleAppender + +# A1 uses PatternLayout. +log4j.appender.A1.layout=org.apache.log4j.PatternLayout +log4j.appender.A1.layout.ConversionPattern=%-5p [%d] [%C{1}] - %m%n \ No newline at end of file diff --git a/public/mapping/src/test/scala/nl/lumc/sasc/biopet/pipelines/mapping/MappingTest.scala b/public/mapping/src/test/scala/nl/lumc/sasc/biopet/pipelines/mapping/MappingTest.scala new file mode 100644 index 0000000000000000000000000000000000000000..43085064c1c51404b5c872079d3d780457ce0475 --- /dev/null +++ b/public/mapping/src/test/scala/nl/lumc/sasc/biopet/pipelines/mapping/MappingTest.scala @@ -0,0 +1,114 @@ +package nl.lumc.sasc.biopet.pipelines.mapping + +import java.io.File + +import nl.lumc.sasc.biopet.core.config.Config +import nl.lumc.sasc.biopet.extensions.bwa.{ BwaSamse, BwaSampe, BwaAln, BwaMem } +import nl.lumc.sasc.biopet.extensions.picard.{ MergeSamFiles, AddOrReplaceReadGroups, MarkDuplicates, SortSam } +import nl.lumc.sasc.biopet.extensions._ +import nl.lumc.sasc.biopet.pipelines.flexiprep.Cutadapt +import nl.lumc.sasc.biopet.pipelines.flexiprep.Fastqc +import nl.lumc.sasc.biopet.pipelines.flexiprep.Sickle +import nl.lumc.sasc.biopet.pipelines.flexiprep._ +import nl.lumc.sasc.biopet.tools.FastqSync +import nl.lumc.sasc.biopet.utils.ConfigUtils +import org.broadinstitute.gatk.queue.QSettings +import org.scalatest.Matchers +import org.scalatest.testng.TestNGSuite +import org.testng.annotations.{ DataProvider, Test } + +/** + * Created by pjvan_thof on 2/12/15. + */ +class MappingTest extends TestNGSuite with Matchers { + def initPipeline(map: Map[String, Any]): Mapping = { + new Mapping { + override def configName = "mapping" + override def globalConfig = new Config(map) + qSettings = new QSettings + qSettings.runName = "test" + } + } + + @DataProvider(name = "mappingOptions", parallel = true) + def mappingOptions = { + val aligners = Array("bwa", "bwa-aln", "star", "star-2pass", "bowtie", "stampy") + val paired = Array(true, false) + val chunks = Array(1, 5, 10, 100) + val skipMarkDuplicates = Array(true, false) + val skipFlexipreps = Array(true, false) + + for ( + aligner <- aligners; + pair <- paired; + chunk <- chunks; + skipMarkDuplicate <- skipMarkDuplicates; + skipFlexiprep <- skipFlexipreps + ) yield Array(aligner, pair, chunk, skipMarkDuplicate, skipFlexiprep) + } + + @Test(dataProvider = "mappingOptions") + def testMapping(aligner: String, paired: Boolean, chunks: Int, skipMarkDuplicate: Boolean, skipFlexiprep: Boolean) = { + val map = ConfigUtils.mergeMaps(Map("output_dir" -> MappingTest.outputDir, + "aligner" -> aligner, + "number_chunks" -> chunks, + "skip_markduplicates" -> skipMarkDuplicate, + "skip_flexiprep" -> skipFlexiprep + ), Map(MappingTest.excutables.toSeq: _*)) + val mapping: Mapping = initPipeline(map) + + mapping.input_R1 = new File(mapping.outputDir, "bla_R1.fq") + if (paired) mapping.input_R2 = Some(new File(mapping.outputDir, "bla_R2.fq")) + mapping.sampleId = "1" + mapping.libId = "1" + mapping.script() + + //Flexiprep + mapping.functions.count(_.isInstanceOf[Fastqc]) shouldBe (if (skipFlexiprep) 0 else if (paired) 4 else 2) + mapping.functions.count(_.isInstanceOf[Zcat]) shouldBe 0 + mapping.functions.count(_.isInstanceOf[SeqtkSeq]) shouldBe ((if (skipFlexiprep) 0 else if (paired) 2 else 1) * chunks) + mapping.functions.count(_.isInstanceOf[Cutadapt]) shouldBe ((if (skipFlexiprep) 0 else if (paired) 2 else 1) * chunks) + mapping.functions.count(_.isInstanceOf[FastqSync]) shouldBe ((if (skipFlexiprep) 0 else if (paired && !skipFlexiprep) 1 else 0) * chunks) + mapping.functions.count(_.isInstanceOf[Sickle]) shouldBe ((if (skipFlexiprep) 0 else 1) * chunks) + mapping.functions.count(_.isInstanceOf[Gzip]) shouldBe (if (skipFlexiprep) 0 else if (paired) 2 else 1) + + //aligners + mapping.functions.count(_.isInstanceOf[BwaMem]) shouldBe ((if (aligner == "bwa") 1 else 0) * chunks) + mapping.functions.count(_.isInstanceOf[BwaAln]) shouldBe ((if (aligner == "bwa-aln") (if (paired) 2 else 1) else 0) * chunks) + mapping.functions.count(_.isInstanceOf[BwaSampe]) shouldBe ((if (aligner == "bwa-aln") (if (paired) 1 else 0) else 0) * chunks) + mapping.functions.count(_.isInstanceOf[BwaSamse]) shouldBe ((if (aligner == "bwa-aln") (if (paired) 0 else 1) else 0) * chunks) + mapping.functions.count(_.isInstanceOf[Star]) shouldBe ((if (aligner == "star") 1 else if (aligner == "star-2pass") 3 else 0) * chunks) + mapping.functions.count(_.isInstanceOf[Bowtie]) shouldBe ((if (aligner == "bowtie") 1 else 0) * chunks) + mapping.functions.count(_.isInstanceOf[Stampy]) shouldBe ((if (aligner == "stampy") 1 else 0) * chunks) + + // Sort sam or replace readgroup + val sort = aligner match { + case "bwa" | "bwa-aln" | "stampy" => "sortsam" + case "star" | "star-2pass" | "bowtie" => "replacereadgroups" + case _ => throw new IllegalArgumentException("aligner: " + aligner + " does not exist") + } + + mapping.functions.count(_.isInstanceOf[SortSam]) shouldBe ((if (sort == "sortsam") 1 else 0) * chunks) + mapping.functions.count(_.isInstanceOf[AddOrReplaceReadGroups]) shouldBe ((if (sort == "replacereadgroups") 1 else 0) * chunks) + + mapping.functions.count(_.isInstanceOf[MergeSamFiles]) shouldBe (if (skipMarkDuplicate && chunks > 1) 1 else 0) + mapping.functions.count(_.isInstanceOf[MarkDuplicates]) shouldBe (if (skipMarkDuplicate) 0 else 1) + } +} + +object MappingTest { + val outputDir = System.getProperty("java.io.tmpdir") + File.separator + "flexiprep" + + val excutables = Map( + "reference" -> "test", + "seqstat" -> Map("exe" -> "test"), + "fastqc" -> Map("exe" -> "test"), + "seqtk" -> Map("exe" -> "test"), + "sickle" -> Map("exe" -> "test"), + "bwa" -> Map("exe" -> "test"), + "star" -> Map("exe" -> "test"), + "bowtie" -> Map("exe" -> "test"), + "stampy" -> Map("exe" -> "test", "genome" -> "test", "hash" -> "test"), + "samtools" -> Map("exe" -> "test") + ) +} \ No newline at end of file diff --git a/public/sage/src/main/scala/nl/lumc/sasc/biopet/pipelines/sage/Sage.scala b/public/sage/src/main/scala/nl/lumc/sasc/biopet/pipelines/sage/Sage.scala index 77dba4b1469eba2d75bedafc9f53be7c23acb557..f510bf8e5756bf2ffc18460339b7580d2e2ccdf6 100644 --- a/public/sage/src/main/scala/nl/lumc/sasc/biopet/pipelines/sage/Sage.scala +++ b/public/sage/src/main/scala/nl/lumc/sasc/biopet/pipelines/sage/Sage.scala @@ -15,6 +15,8 @@ */ package nl.lumc.sasc.biopet.pipelines.sage +import java.io.File + import nl.lumc.sasc.biopet.core.{ BiopetQScript, MultiSampleQScript, PipelineCommand } import nl.lumc.sasc.biopet.core.config.Configurable import nl.lumc.sasc.biopet.extensions.Cat @@ -72,7 +74,7 @@ class Sage(val root: Configurable) extends QScript with MultiSampleQScript { mapping.sampleId = sampleId protected def addJobs(): Unit = { - flexiprep.outputDir = libDir + "flexiprep/" + flexiprep.outputDir = new File(libDir, "flexiprep/") flexiprep.input_R1 = inputFastq flexiprep.init flexiprep.biopetScript @@ -123,7 +125,6 @@ class Sage(val root: Configurable) extends QScript with MultiSampleQScript { } def init() { - if (!outputDir.endsWith("/")) outputDir += "/" if (transcriptome.isEmpty && tagsLibrary.isEmpty) throw new IllegalStateException("No transcriptome or taglib found") if (countBed.isEmpty) @@ -138,10 +139,10 @@ class Sage(val root: Configurable) extends QScript with MultiSampleQScript { if (tagsLibrary.isEmpty) { val cdl = new SageCreateLibrary(this) cdl.input = transcriptome.get - cdl.output = outputDir + "taglib/tag.lib" - cdl.noAntiTagsOutput = outputDir + "taglib/no_antisense_genes.txt" - cdl.noTagsOutput = outputDir + "taglib/no_sense_genes.txt" - cdl.allGenesOutput = outputDir + "taglib/all_genes.txt" + cdl.output = new File(outputDir, "taglib/tag.lib") + cdl.noAntiTagsOutput = new File(outputDir, "taglib/no_antisense_genes.txt") + cdl.noTagsOutput = new File(outputDir, "taglib/no_sense_genes.txt") + cdl.allGenesOutput = new File(outputDir, "taglib/all_genes.txt") add(cdl) tagsLibrary = Some(cdl.output) } diff --git a/public/sage/src/test/resources/log4j.properties b/public/sage/src/test/resources/log4j.properties new file mode 100644 index 0000000000000000000000000000000000000000..501af67582a546db584c8538b28cb6f9e07f1692 --- /dev/null +++ b/public/sage/src/test/resources/log4j.properties @@ -0,0 +1,25 @@ +# +# Biopet is built on top of GATK Queue for building bioinformatic +# pipelines. It is mainly intended to support LUMC SHARK cluster which is running +# SGE. But other types of HPC that are supported by GATK Queue (such as PBS) +# should also be able to execute Biopet tools and pipelines. +# +# Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center +# +# Contact us at: sasc@lumc.nl +# +# A dual licensing mode is applied. The source code within this project that are +# not part of GATK Queue is freely available for non-commercial use under an AGPL +# license; For commercial users or users who do not want to follow the AGPL +# license, please contact us to obtain a separate license. +# + +# Set root logger level to DEBUG and its only appender to A1. +log4j.rootLogger=ERROR, A1 + +# A1 is set to be a ConsoleAppender. +log4j.appender.A1=org.apache.log4j.ConsoleAppender + +# A1 uses PatternLayout. +log4j.appender.A1.layout=org.apache.log4j.PatternLayout +log4j.appender.A1.layout.ConversionPattern=%-5p [%d] [%C{1}] - %m%n \ No newline at end of file diff --git a/public/yamsvp/src/test/resources/log4j.properties b/public/yamsvp/src/test/resources/log4j.properties new file mode 100644 index 0000000000000000000000000000000000000000..501af67582a546db584c8538b28cb6f9e07f1692 --- /dev/null +++ b/public/yamsvp/src/test/resources/log4j.properties @@ -0,0 +1,25 @@ +# +# Biopet is built on top of GATK Queue for building bioinformatic +# pipelines. It is mainly intended to support LUMC SHARK cluster which is running +# SGE. But other types of HPC that are supported by GATK Queue (such as PBS) +# should also be able to execute Biopet tools and pipelines. +# +# Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center +# +# Contact us at: sasc@lumc.nl +# +# A dual licensing mode is applied. The source code within this project that are +# not part of GATK Queue is freely available for non-commercial use under an AGPL +# license; For commercial users or users who do not want to follow the AGPL +# license, please contact us to obtain a separate license. +# + +# Set root logger level to DEBUG and its only appender to A1. +log4j.rootLogger=ERROR, A1 + +# A1 is set to be a ConsoleAppender. +log4j.appender.A1=org.apache.log4j.ConsoleAppender + +# A1 uses PatternLayout. +log4j.appender.A1.layout=org.apache.log4j.PatternLayout +log4j.appender.A1.layout.ConversionPattern=%-5p [%d] [%C{1}] - %m%n \ No newline at end of file