diff --git a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/extensions/RscriptCommandLineFunction.scala b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/extensions/RscriptCommandLineFunction.scala index c773de6155b5a771f242dbbe83a4a21f98089eaa..0e4a29378a8a4334eeed795198c117431bdee8e2 100644 --- a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/extensions/RscriptCommandLineFunction.scala +++ b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/extensions/RscriptCommandLineFunction.scala @@ -15,13 +15,9 @@ */ package nl.lumc.sasc.biopet.core.extensions -import java.io.{ File, FileOutputStream } - import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction import nl.lumc.sasc.biopet.utils.rscript.Rscript -import scala.sys.process._ - /** * General rscript extension * diff --git a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/summary/WriteSummary.scala b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/summary/WriteSummary.scala index 02c860fdb1719c8c4635d467bea752b74745f9b2..bb6e1bf5606f21e99ceb7d557a93b68b690c0c3d 100644 --- a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/summary/WriteSummary.scala +++ b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/summary/WriteSummary.scala @@ -184,6 +184,7 @@ class WriteSummary(val root: Configurable) extends InProcessFunction with Config map.toMap } } + object WriteSummary { /** Retrive checksum from file */ def parseChecksum(checksumFile: File): String = { diff --git a/public/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/ReferenceTest.scala b/public/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/ReferenceTest.scala index 31b70db9d1595f2dd5614c9abe4a5fa1047ed834..79741a2c2eb73a39cce67d8ad48b5680ba939163 100644 --- a/public/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/ReferenceTest.scala +++ b/public/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/ReferenceTest.scala @@ -19,6 +19,7 @@ class ReferenceTest extends TestNGSuite with Matchers with MockitoSugar { @Test def testDefault: Unit = { + Logging.errors.clear() make(config :: testReferenceNoIndex :: Nil).referenceFasta() Logging.checkErrors(true) diff --git a/public/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/summary/SummarizableTest.scala b/public/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/summary/SummarizableTest.scala new file mode 100644 index 0000000000000000000000000000000000000000..1be487ff6d1309685d004d4b2f8f8b5523a93d75 --- /dev/null +++ b/public/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/summary/SummarizableTest.scala @@ -0,0 +1,32 @@ +package nl.lumc.sasc.biopet.core.summary + +import java.io.File + +import org.scalatest.Matchers +import org.scalatest.testng.TestNGSuite +import org.testng.annotations.Test + +/** + * Created by pjvanthof on 14/01/16. + */ +class SummarizableTest extends TestNGSuite with Matchers { + @Test + def testDefaultMerge: Unit = { + val summarizable = new Summarizable { + def summaryFiles: Map[String, File] = ??? + def summaryStats: Any = ??? + } + intercept[IllegalStateException] { + summarizable.resolveSummaryConflict("1", "1", "key") + } + } + + def testOverrideMerge: Unit = { + val summarizable = new Summarizable { + def summaryFiles: Map[String, File] = ??? + def summaryStats: Any = ??? + override def resolveSummaryConflict(v1: Any, v2: Any, key: String) = v1 + } + summarizable.resolveSummaryConflict("1", "1", "key") shouldBe "1" + } +} diff --git a/public/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/summary/SummaryQScriptTest.scala b/public/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/summary/SummaryQScriptTest.scala new file mode 100644 index 0000000000000000000000000000000000000000..a53fe068f9537cdce91a75f45ef9393631377d0d --- /dev/null +++ b/public/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/summary/SummaryQScriptTest.scala @@ -0,0 +1,126 @@ +package nl.lumc.sasc.biopet.core.summary + +import java.io.File + +import nl.lumc.sasc.biopet.core.BiopetQScript.InputFile +import nl.lumc.sasc.biopet.core.extensions.Md5sum +import nl.lumc.sasc.biopet.utils.config.{ Config, Configurable } +import org.broadinstitute.gatk.queue.QScript +import org.scalatest.Matchers +import org.scalatest.testng.TestNGSuite +import org.testng.annotations.Test +import SummaryQScriptTest._ + +/** + * Created by pjvanthof on 14/01/16. + */ +class SummaryQScriptTest extends TestNGSuite with Matchers { + @Test + def testNoJobs: Unit = { + SummaryQScript.md5sumCache.clear() + val script = makeQscript() + script.addSummaryJobs() + SummaryQScript.md5sumCache shouldBe empty + } + + @Test + def testFiles: Unit = { + SummaryQScript.md5sumCache.clear() + val file = new File(s".${File.separator}bla") + val script = makeQscript(files = Map("file" -> file)) + script.addSummaryJobs() + SummaryQScript.md5sumCache should not be empty + SummaryQScript.md5sumCache.toMap shouldBe Map( + new File(s".${File.separator}bla") -> new File(s".${File.separator}bla.md5")) + script.functions.size shouldBe 2 + assert(script.functions + .filter(_.isInstanceOf[Md5sum]) + .map(_.asInstanceOf[Md5sum]) + .exists(_.cmdLine.contains(" || "))) + } + + @Test + def testDuplicateFiles: Unit = { + SummaryQScript.md5sumCache.clear() + val file = new File(s".${File.separator}bla") + val script = makeQscript(files = Map("file" -> file, "file2" -> file)) + script.addSummaryJobs() + SummaryQScript.md5sumCache should not be empty + SummaryQScript.md5sumCache.toMap shouldBe Map( + new File(s".${File.separator}bla") -> new File(s".${File.separator}bla.md5")) + script.functions.size shouldBe 2 + assert(script.functions + .filter(_.isInstanceOf[Md5sum]) + .map(_.asInstanceOf[Md5sum]) + .exists(_.cmdLine.contains(" || "))) + } + + @Test + def testAddSummarizable: Unit = { + SummaryQScript.md5sumCache.clear() + val file = new File(s".${File.separator}bla") + val script = makeQscript() + script.addSummarizable(makeSummarizable(files = Map("file" -> file, "file2" -> file)), "test") + script.summarizables.size shouldBe 1 + script.addSummaryJobs() + SummaryQScript.md5sumCache should not be empty + SummaryQScript.md5sumCache.toMap shouldBe Map( + new File(s".${File.separator}bla") -> new File(s".${File.separator}bla.md5")) + script.functions.size shouldBe 2 + assert(script.functions + .filter(_.isInstanceOf[Md5sum]) + .map(_.asInstanceOf[Md5sum]) + .exists(_.cmdLine.contains(" || "))) + } + + @Test + def testInputFile: Unit = { + SummaryQScript.md5sumCache.clear() + val file = new File(s".${File.separator}bla") + val script = makeQscript() + script.addSummarizable(makeSummarizable(files = Map("file" -> file, "file2" -> file)), "test") + script.summarizables.size shouldBe 1 + script.inputFiles :+= InputFile(file, Some("md5sum")) + script.inputFiles :+= InputFile(file, None) + script.addSummaryJobs() + SummaryQScript.md5sumCache should not be empty + SummaryQScript.md5sumCache.toMap shouldBe Map( + new File(s".${File.separator}bla") -> new File(s".${File.separator}bla.md5")) + script.functions.size shouldBe 3 + assert(script.functions + .filter(_.isInstanceOf[Md5sum]) + .map(_.asInstanceOf[Md5sum]) + .exists(_.cmdLine.contains(" || "))) + } + + @Test + def testAddQscript: Unit = { + SummaryQScript.md5sumCache.clear() + val script = makeQscript() + script.addSummaryQScript(script) + script.summaryQScripts.head shouldBe script + } +} + +object SummaryQScriptTest { + def makeQscript(settings: Map[String, Any] = Map(), + files: Map[String, File] = Map(), + c: Map[String, Any] = Map()) = + new SummaryQScript with QScript { + outputDir = new File(".") + override def globalConfig = new Config(c) + def summarySettings: Map[String, Any] = settings + def summaryFiles: Map[String, File] = files + val tempFile = File.createTempFile("summary", ".json") + tempFile.deleteOnExit() + def summaryFile: File = tempFile + def init(): Unit = ??? + def biopetScript(): Unit = ??? + def root: Configurable = null + } + + def makeSummarizable(files: Map[String, File] = Map(), stats: Map[String, Any] = Map()) = new Summarizable { + def summaryFiles: Map[String, File] = files + def summaryStats: Any = stats + } +} \ No newline at end of file diff --git a/public/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/summary/WriteSummaryTest.scala b/public/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/summary/WriteSummaryTest.scala new file mode 100644 index 0000000000000000000000000000000000000000..4bb196c162ddb93c85b19d1364d34789fb77e8b9 --- /dev/null +++ b/public/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/summary/WriteSummaryTest.scala @@ -0,0 +1,346 @@ +package nl.lumc.sasc.biopet.core.summary + +import java.io.{ PrintWriter, File } + +import com.google.common.io.Files +import nl.lumc.sasc.biopet.core._ +import nl.lumc.sasc.biopet.utils.config.{ Config, Configurable } +import nl.lumc.sasc.biopet.utils.summary.Summary +import org.broadinstitute.gatk.queue.function.CommandLineFunction +import org.broadinstitute.gatk.queue.{ QScript, QSettings } +import org.scalatest.Matchers +import org.scalatest.testng.TestNGSuite +import WriteSummaryTest._ +import org.testng.annotations.Test + +import scala.util.matching.Regex + +/** + * Created by pjvanthof on 15/01/16. + */ +class WriteSummaryTest extends TestNGSuite with Matchers { + + @Test + def testWrongRoot(): Unit = { + intercept[IllegalArgumentException] { + makeWriter(null) + } + } + + /** This is a basic summary test, no matter the content this should always be true */ + def basicSummaryTest(summary: Summary, + name: String, + sampleId: Option[String] = None, + libId: Option[String] = None): Unit = { + summary.getValue(sampleId, libId, name) should not be None + summary.getValue(sampleId, libId, name, "files", "pipeline").get shouldBe a[Map[_, _]] + summary.getValue(sampleId, libId, name, "settings").get shouldBe a[Map[_, _]] + summary.getValue(sampleId, libId, name, "executables").get shouldBe a[Map[_, _]] + + summary.getValue("meta") should not be None + summary.getValue("meta", "pipeline_name") shouldBe Some(name) + summary.getValue("meta", "last_commit_hash") shouldBe Some(nl.lumc.sasc.biopet.LastCommitHash) + summary.getValue("meta", "pipeline_version") shouldBe Some(nl.lumc.sasc.biopet.Version) + summary.getValue("meta", "output_dir") shouldBe Some(new File(".").getAbsolutePath) + summary.getValue("meta", "summary_creation") should not be None + } + + def createFakeCheckSum(file: File): Unit = { + file.getParentFile.mkdirs() + val writer = new PrintWriter(file) + writer.println("checksum file") + writer.close() + file.deleteOnExit() + } + + @Test + def testEmpty(): Unit = { + val qscript = makeQscript(name = "test") + val writer = makeWriter(qscript) + writer.freezeFieldValues() + writer.deps shouldBe empty + writer.run() + + val summary = new Summary(writer.out) + basicSummaryTest(summary, "test") + } + + @Test + def testMergeQscript(): Unit = { + val qscript = makeQscript(name = "test") + val qscript2 = makeQscript(name = "test2") + qscript.addSummaryQScript(qscript2) + val summaryWriter = new PrintWriter(qscript2.summaryFile) + summaryWriter.println("""{ "test2": "value" }""") + summaryWriter.close() + val writer = makeWriter(qscript) + writer.freezeFieldValues() + writer.run() + + val summary = new Summary(writer.out) + basicSummaryTest(summary, "test") + summary.getValue("test2") shouldBe Some("value") + } + + @Test + def testSingleJob(): Unit = { + val qscript = makeQscript("test") + val writer = makeWriter(qscript) + val summarizable = makeSummarizable(files = Map("file_1" -> new File("bla")), stats = Map("key" -> "value")) + qscript.addSummarizable(summarizable, "tool_1") + qscript.addSummaryJobs() + createFakeCheckSum(SummaryQScript.md5sumCache(new File("bla"))) + writer.freezeFieldValues() + writer.run() + + val summary = new Summary(writer.out) + basicSummaryTest(summary, "test") + summary.getValue("test", "stats", "tool_1", "key") shouldBe Some("value") + summary.getValue("test", "files", "tool_1", "file_1", "md5") shouldBe Some("checksum") + } + + @Test + def testSingleJavaJob(): Unit = { + val qscript = makeQscript("test") + val writer = makeWriter(qscript) + val summarizable = makeJavaCommand(files = Map("file_1" -> new File("bla")), stats = Map("key" -> "value")) + qscript.add(summarizable) + qscript.addSummarizable(summarizable, "tool_1") + qscript.addSummaryJobs() + createFakeCheckSum(SummaryQScript.md5sumCache(new File("bla"))) + writer.freezeFieldValues() + writer.run() + + val summary = new Summary(writer.out) + basicSummaryTest(summary, "test") + summary.getValue("test", "stats", "tool_1", "key") shouldBe Some("value") + summary.getValue("test", "files", "tool_1", "file_1", "md5") shouldBe Some("checksum") + summary.getValue("test", "executables", "java_command", "version") shouldBe Some("test version") + } + + @Test + def testVersion(): Unit = { + val qscript = makeQscript("test") + val writer = makeWriter(qscript) + val summarizable = makeVersionSummarizable(files = Map("file_1" -> new File("bla")), stats = Map("key" -> "value")) + qscript.add(summarizable) + qscript.addSummarizable(summarizable, "tool_1") + qscript.addSummaryJobs() + createFakeCheckSum(SummaryQScript.md5sumCache(new File("bla"))) + writer.freezeFieldValues() + writer.run() + + val summary = new Summary(writer.out) + basicSummaryTest(summary, "test") + summary.getValue("test", "stats", "tool_1", "key") shouldBe Some("value") + summary.getValue("test", "files", "tool_1", "file_1", "md5") shouldBe Some("checksum") + summary.getValue("test", "executables", "version_command", "version") shouldBe Some("test version") + } + + @Test + def testSampleLibrary(): Unit = { + val qscript = makeSampleLibraryQscript("test", s = Some("sampleName"), l = Some("libName")) + val writer = makeWriter(qscript) + val summarizable = makeSummarizable(files = Map("file_1" -> new File("bla")), stats = Map("key" -> "value")) + qscript.addSummarizable(summarizable, "tool_1") + qscript.addSummaryJobs() + createFakeCheckSum(SummaryQScript.md5sumCache(new File("bla"))) + writer.freezeFieldValues() + writer.deps shouldBe empty + writer.run() + + val summary = new Summary(writer.out) + basicSummaryTest(summary, "test", sampleId = Some("sampleName"), libId = Some("libName")) + summary.getValue(Some("sampleName"), Some("libName"), "test", "stats", "tool_1", "key") shouldBe Some("value") + summary.getValue(Some("sampleName"), Some("libName"), "test", "files", "tool_1", "file_1", "md5") shouldBe Some("checksum") + } + + @Test + def testSample(): Unit = { + val qscript = makeSampleLibraryQscript("test", s = Some("sampleName")) + val writer = makeWriter(qscript) + val summarizable = makeSummarizable(files = Map("file_1" -> new File("bla")), stats = Map("key" -> "value")) + qscript.addSummarizable(summarizable, "tool_1") + qscript.addSummaryJobs() + createFakeCheckSum(SummaryQScript.md5sumCache(new File("bla"))) + writer.freezeFieldValues() + writer.deps shouldBe empty + writer.run() + + val summary = new Summary(writer.out) + basicSummaryTest(summary, "test", sampleId = Some("sampleName"), libId = None) + summary.getValue(Some("sampleName"), None, "test", "stats", "tool_1", "key") shouldBe Some("value") + summary.getValue(Some("sampleName"), None, "test", "files", "tool_1", "file_1", "md5") shouldBe Some("checksum") + } + + @Test + def testMultisampleQscript(): Unit = { + val qscript = makeMultisampleQscript("test", multisampleConfig) + val writer = makeWriter(qscript) + val summarizable = makeSummarizable(files = Map("file_1" -> new File("bla")), stats = Map("key" -> "value")) + qscript.addSummarizable(summarizable, "tool_1") + qscript.addSummaryJobs() + createFakeCheckSum(SummaryQScript.md5sumCache(new File("bla"))) + writer.freezeFieldValues() + writer.deps shouldBe empty + writer.run() + + val summary = new Summary(writer.out) + basicSummaryTest(summary, "test") + summary.getValue("test", "stats", "tool_1", "key") shouldBe Some("value") + summary.getValue("test", "files", "tool_1", "file_1", "md5") shouldBe Some("checksum") + + summary.getValue(Some("sampleName"), Some("libName"), "test") should not be None + } + +} + +object WriteSummaryTest { + def makeWriter(root: Configurable, c: Map[String, Any] = Map()) = new WriteSummary(root) { + override def globalConfig = new Config(c) + override def outputs = Seq() + override def inputs = Seq() + qSettings = new QSettings { + jobName = "test" + jobTempDir = Files.createTempDir() + jobTempDir.deleteOnExit() + jobPriority = Some(1) + } + override def absoluteCommandDirectory() {} + } + + def makeQscript(name: String, + settings: Map[String, Any] = Map(), + files: Map[String, File] = Map(), + c: Map[String, Any] = Map()) = + new SummaryQScript with QScript { + summaryName = name + outputDir = new File(".").getAbsoluteFile + override def globalConfig = new Config(c) + def summarySettings: Map[String, Any] = settings + def summaryFiles: Map[String, File] = files + val tempFile = File.createTempFile("summary", ".json") + tempFile.deleteOnExit() + def summaryFile: File = tempFile + def init(): Unit = {} + def biopetScript(): Unit = {} + def root: Configurable = null + } + + def makeSampleLibraryQscript(name: String, + settings: Map[String, Any] = Map(), + files: Map[String, File] = Map(), + c: Map[String, Any] = Map(), + s: Option[String] = None, + l: Option[String] = None) = + new SummaryQScript with QScript with SampleLibraryTag { + sampleId = s + libId = l + summaryName = "test" + outputDir = new File(".").getAbsoluteFile + override def globalConfig = new Config(c) + def summarySettings: Map[String, Any] = settings + def summaryFiles: Map[String, File] = files + val tempFile = File.createTempFile("summary", ".json") + tempFile.deleteOnExit() + def summaryFile: File = tempFile + def init(): Unit = {} + def biopetScript(): Unit = {} + def root: Configurable = null + } + + def makeMultisampleQscript(name: String, + c: Map[String, Any], + settings: Map[String, Any] = Map(), + files: Map[String, File] = Map()) = + new MultiSampleQScript with QScript { + summaryName = "test" + outputDir = new File(".").getAbsoluteFile + override def globalConfig = new Config(c) + def summarySettings: Map[String, Any] = settings + def summaryFiles: Map[String, File] = files + val tempFile = File.createTempFile("summary", ".json") + tempFile.deleteOnExit() + def summaryFile: File = tempFile + def init(): Unit = {} + def biopetScript(): Unit = {} + def root: Configurable = null + + class Sample(id: String) extends AbstractSample(id) { + class Library(id: String) extends AbstractLibrary(id) { + protected def addJobs(): Unit = {} + def summaryFiles: Map[String, File] = files + def summaryStats: Any = Map() + } + + def makeLibrary(id: String): Library = new Library(id) + protected def addJobs(): Unit = {} + def summaryFiles: Map[String, File] = files + def summaryStats: Any = Map() + } + + def makeSample(id: String): Sample = new Sample(id) + + def addMultiSampleJobs(): Unit = {} + } + + val multisampleConfig = Map("samples" -> Map("sampleName" -> Map("libraries" -> Map("libName" -> Map())))) + + def makeSummarizable(files: Map[String, File] = Map(), stats: Map[String, Any] = Map()) = new Summarizable { + def summaryFiles: Map[String, File] = files + def summaryStats: Any = stats + } + + def makeJavaCommand(files: Map[String, File] = Map(), + stats: Map[String, Any] = Map(), + c: Map[String, Any] = Map()) = new BiopetJavaCommandLineFunction with Summarizable with Version { + override def globalConfig = new Config(c) + override def configName = "java_command" + def root: Configurable = null + def summaryStats: Map[String, Any] = stats + def summaryFiles: Map[String, File] = files + + def versionCommand: String = "echo test version" + def versionRegex: Regex = """(.*)""".r + override def getVersion = Some("test version") + + override def outputs = Seq() + override def inputs = Seq() + qSettings = new QSettings { + jobName = "test" + jobTempDir = Files.createTempDir() + jobTempDir.deleteOnExit() + jobPriority = Some(1) + } + override def absoluteCommandDirectory() {} + } + + def makeVersionSummarizable(files: Map[String, File] = Map(), + stats: Map[String, Any] = Map(), + c: Map[String, Any] = Map()) = + new CommandLineFunction with Configurable with Summarizable with Version { + override def globalConfig = new Config(c) + override def configName = "version_command" + def root: Configurable = null + + def summaryFiles: Map[String, File] = files + def summaryStats: Any = stats + + def versionCommand: String = "echo test version" + def versionRegex: Regex = """(.*)""".r + override def getVersion = Some("test version") + + def commandLine: String = "" + + override def outputs = Seq() + override def inputs = Seq() + qSettings = new QSettings { + jobName = "test" + jobTempDir = Files.createTempDir() + jobTempDir.deleteOnExit() + jobPriority = Some(1) + } + override def absoluteCommandDirectory() {} + } + +} \ No newline at end of file diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Cutadapt.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Cutadapt.scala index 6e06894f916a5206bcac748d924eb8f3a9f51c53..80743e00d167dd7a37ae9ec5d208cf0a83c7be97 100644 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Cutadapt.scala +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Cutadapt.scala @@ -26,18 +26,19 @@ import scala.collection.mutable import scala.io.Source /** - * Extension for cutadept - * Based on version 1.5 + * Extension for cutadapt + * Started with version 1.5 + * Updated to version 1.9 (18-01-2016 by wyleung) */ class Cutadapt(val root: Configurable) extends BiopetCommandLineFunction with Summarizable with Version { @Input(doc = "Input fastq file") - var fastq_input: File = _ + var fastqInput: File = _ @Output - var fastq_output: File = _ + var fastqOutput: File = _ @Output(doc = "Output statistics file") - var stats_output: File = _ + var statsOutput: File = _ executable = config("exe", default = "cutadapt") def versionCommand = executable + " --version" @@ -46,28 +47,121 @@ class Cutadapt(val root: Configurable) extends BiopetCommandLineFunction with Su /** Name of the key containing clipped adapters information in the summary stats. */ def adaptersStatsName = "adapters" - var default_clip_mode: String = config("default_clip_mode", default = "3") - var opt_adapter: Set[String] = config("adapter", default = Nil) - var opt_anywhere: Set[String] = config("anywhere", default = Nil) - var opt_front: Set[String] = config("front", default = Nil) - - var opt_discard: Boolean = config("discard", default = false) - var opt_minimum_length: Int = config("minimum_length", 1) - var opt_maximum_length: Option[Int] = config("maximum_length") + var defaultClipMode: String = config("default_clip_mode", default = "3") + var adapter: Set[String] = config("adapter", default = Nil) + var anywhere: Set[String] = config("anywhere", default = Nil) + var front: Set[String] = config("front", default = Nil) + + var errorRate: Option[Double] = config("error_rate") + var noIndels: Boolean = config("no_indels", default = false) + var times: Option[Int] = config("times") + var overlap: Option[Int] = config("overlap") + var matchReadWildcards: Boolean = config("match_read_wildcards", default = false) + var noMatchAdapterWildcards: Boolean = config("no_match_adapter_wildcards", default = false) // specific for 1.9 + + /** Options for filtering of processed reads */ + var discard: Boolean = config("discard", default = false) + var trimmedOnly: Boolean = config("trimmed_only", default = false) + var minimumLength: Int = config("minimum_length", 1) + var maximumLength: Option[Int] = config("maximum_length") + var noTrim: Boolean = config("no_trim", default = false) + var maxN: Option[Int] = config("max_n") // specific for 1.9 + var maskAdapter: Boolean = config("mask_adapter", default = false) + + /** Options that influence what gets output to where */ + var quiet: Boolean = config("quiet", default = false) + // var output: File // see up @Output + var infoFile: Option[File] = config("info_file") + var restFile: Option[File] = config("rest_file") + var wildcardFile: Option[File] = config("wildcard_file") + var tooShortOutput: Option[File] = config("too_short_output") + var tooLongOutput: Option[File] = config("too_long_output") + var untrimmedOutput: Option[File] = config("untrimmed_output") + + /** Additional read modifications */ + var cut: Option[Int] = config("cut") + var qualityCutoff: Option[String] = config("quality_cutoff") + var qualityBase: Option[Int] = config("quality_base") + var trimN: Boolean = config("trim_n", default = false) + var prefix: Option[String] = config("prefix") + var suffix: Option[String] = config("suffix") + var stripSuffix: Set[String] = config("strip_suffix") + var lengthTag: Option[String] = config("length_tag") + + /** Colorspace options */ + var colorspace: Boolean = config("colorspace", default = false) + var doubleEncode: Boolean = config("double_encode", default = false) + var trimPrimer: Boolean = config("trim_primer", default = false) + var stripF3: Boolean = config("strip_f3", default = false) + var maq: Boolean = config("maq", default = false) + var bwa: Boolean = config("bwa", default = false, freeVar = false) + var noZeroCap: Boolean = config("no_zero_cap", default = false) + var zeroCap: Boolean = config("zero_cap", default = false) + + /** Paired end options */ + var peAdapter: Set[String] = config("pe_adapter", default = Nil) + var peAdapterFront: Set[String] = config("pe_adapter_front", default = Nil) + var peAdapterBoth: Set[String] = config("pe_adapter_both", default = Nil) + var peCut: Boolean = config("pe_cut", default = false) + var pairedOutput: Option[File] = config("paired_output") + var interleaved: Boolean = config("interleaved", default = false) + var untrimmedPairedOutput: Option[File] = config("untrimmed_paired_output") /** return commandline to execute */ def cmdLine = required(executable) + - // options - repeat("-a", opt_adapter) + - repeat("-b", opt_anywhere) + - repeat("-g", opt_front) + - conditional(opt_discard, "--discard") + - optional("-m", opt_minimum_length) + - optional("-M", opt_maximum_length) + + // Options that influence how the adapters are found + repeat("-a", adapter) + + repeat("-b", anywhere) + + repeat("-g", front) + + optional("--error-rate", errorRate) + + conditional(noIndels, "--no-indels") + + optional("--times", times) + + optional("--overlap", overlap) + + conditional(matchReadWildcards, "--match-read-wildcards") + + conditional(noMatchAdapterWildcards, "--no-match-adapter-wildcards") + + // Options for filtering of processed reads + conditional(discard, "--discard") + + conditional(trimmedOnly, "--trimmed-only") + + optional("-m", minimumLength) + + optional("-M", maximumLength) + + conditional(noTrim, "--no-trim") + + optional("--max-n", maxN) + + conditional(maskAdapter, "--mask-adapter") + + conditional(quiet, "--quiet") + + optional("--info-file", infoFile) + + optional("--rest-file", restFile) + + optional("--wildcard-file", wildcardFile) + + optional("--too-short-output", tooShortOutput) + + optional("--too-long-output", tooLongOutput) + + optional("--untrimmed-output", untrimmedOutput) + + // Additional read modifications + optional("--cut", cut) + + optional("--quality-cutoff", qualityCutoff) + + conditional(trimN, "--trim-n") + + optional("--prefix", prefix) + + optional("--suffix", suffix) + + optional("--strip-suffix", stripSuffix) + + optional("--length-tag", lengthTag) + + // Colorspace options + conditional(colorspace, "--colorspace") + + conditional(doubleEncode, "--double-encode") + + conditional(trimPrimer, "--trim-primer") + + conditional(stripF3, "--strip-f3") + + conditional(maq, "--maq") + + conditional(bwa, "--bwa") + + conditional(noZeroCap, "--no-zero-cap") + + conditional(zeroCap, "--zero-cap") + + // Paired-end options + repeat("-A", peAdapter) + + repeat("-G", peAdapterFront) + + repeat("-B", peAdapterBoth) + + conditional(interleaved, "--interleaved") + + optional("--paired-output", pairedOutput) + + optional("--untrimmed-paired-output", untrimmedPairedOutput) + // input / output - required(fastq_input) + - (if (outputAsStsout) "" else required("--output", fastq_output) + - " > " + required(stats_output)) + required(fastqInput) + + (if (outputAsStsout) "" else required("--output", fastqOutput) + + " > " + required(statsOutput)) /** Output summary stats */ def summaryStats: Map[String, Any] = { @@ -79,7 +173,7 @@ class Cutadapt(val root: Configurable) extends BiopetCommandLineFunction with Su val stats: mutable.Map[String, Int] = mutable.Map("trimmed" -> 0, "tooshort" -> 0, "toolong" -> 0) val adapter_stats: mutable.Map[String, Int] = mutable.Map() - if (stats_output.exists) for (line <- Source.fromFile(stats_output).getLines()) { + if (statsOutput.exists) for (line <- Source.fromFile(statsOutput).getLines()) { line match { case trimR(m) => stats += ("trimmed" -> m.toInt) case tooShortR(m) => stats += ("tooshort" -> m.toInt) diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/varscan/Mpileup2cns.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/varscan/Mpileup2cns.scala index 0379c36d9ace680b7833bf226912504bb619f8e2..4fb0a05fd7e04617c15a68c63763d7e60da322d4 100644 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/varscan/Mpileup2cns.scala +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/varscan/Mpileup2cns.scala @@ -39,6 +39,8 @@ class Mpileup2cns(val root: Configurable) extends Varscan { var vcfSampleList: Option[File] = config("vcf_sample_list") var variants: Option[Int] = config("variants") + override def defaultCoreMemory = 6.0 + override def beforeGraph(): Unit = { val validValues: Set[Int] = Set(0, 1) // check for boolean vars that are passed as ints diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/varscan/Varscan.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/varscan/Varscan.scala index f3d8585de87216b872bee7235e364df29514710a..76da0423cea32f599c978e852ca0aa35a740137e 100644 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/varscan/Varscan.scala +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/varscan/Varscan.scala @@ -15,20 +15,15 @@ */ package nl.lumc.sasc.biopet.extensions.varscan -import nl.lumc.sasc.biopet.core.BiopetJavaCommandLineFunction +import nl.lumc.sasc.biopet.core.{ Version, BiopetJavaCommandLineFunction } -abstract class Varscan extends BiopetJavaCommandLineFunction { +abstract class Varscan extends BiopetJavaCommandLineFunction with Version { override def subPath = "varscan" :: super.subPath jarFile = config("varscan_jar") - /** - * TODO: test version - * override def versionCommand = super.commandLine - * override val versionRegex = """VarScan v(.*)""".r - */ - - override def defaultCoreMemory = 5.0 + def versionCommand = super.commandLine + def versionRegex = """VarScan v(.*)""".r } diff --git a/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/BamUtils.scala b/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/BamUtils.scala index f13230534af0a61fa29f68daa8d48c81a55e1f23..d0c185cf1a837c816404c2c160bc8f6428e1cc19 100644 --- a/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/BamUtils.scala +++ b/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/BamUtils.scala @@ -25,7 +25,8 @@ object BamUtils { val inputSam = SamReaderFactory.makeDefault.open(file) val samples = inputSam.getFileHeader.getReadGroups.map(_.getSample).distinct if (samples.size == 1) samples.head -> file - else throw new IllegalArgumentException("Bam contains multiple sample IDs: " + file) + else if (samples.size > 1) throw new IllegalArgumentException("Bam contains multiple sample IDs: " + file) + else throw new IllegalArgumentException("Bam does not contain sample ID or have no readgroups defined: " + file) } if (temp.map(_._1).distinct.size != temp.size) throw new IllegalArgumentException("Samples has been found twice") temp.toMap diff --git a/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/Logging.scala b/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/Logging.scala index d01755e07300cf09f874bc36f07d640d6567c84e..9d4b9dc2bf84bcda084ce5de9647620e1a130658 100644 --- a/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/Logging.scala +++ b/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/Logging.scala @@ -36,7 +36,7 @@ trait Logging { object Logging { val logger = Logger.getRootLogger - private val errors: ListBuffer[Exception] = ListBuffer() + private[biopet] val errors: ListBuffer[Exception] = ListBuffer() def addError(error: String, debug: String = null): Unit = { val msg = error + (if (debug != null && logger.isDebugEnabled) "; " + debug else "") diff --git a/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Cutadapt.scala b/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Cutadapt.scala index b34b3772296f9de419f7a249d45daefc513c7259..f974f8c9a43f685390ee0c510ffa0064d07167b5 100644 --- a/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Cutadapt.scala +++ b/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Cutadapt.scala @@ -46,16 +46,16 @@ class Cutadapt(root: Configurable, fastqc: Fastqc) extends nl.lumc.sasc.biopet.e // adapter sequence is clipped but not found by FastQC ~ should not happen since all clipped adapter // sequences come from FastQC case _ => - throw new IllegalStateException(s"Adapter '$seq' is clipped but not found by FastQC in '$fastq_input'.") + throw new IllegalStateException(s"Adapter '$seq' is clipped but not found by FastQC in '$fastqInput'.") } // FastQC found no adapters case otherwise => ; - logger.debug(s"No adapters found for summarizing in '$fastq_input'.") + logger.debug(s"No adapters found for summarizing in '$fastqInput'.") None } // "adapters" key not found ~ something went wrong in our part - case _ => throw new RuntimeException(s"Required key 'adapters' not found in stats entry '$fastq_input'.") + case _ => throw new RuntimeException(s"Required key 'adapters' not found in stats entry '$fastqInput'.") } initStats.updated(adaptersStatsName, adapterCounts) } diff --git a/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/QcCommand.scala b/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/QcCommand.scala index 22a9a4a526a0a8d0c640b9262baef99063f07288..390d68b58e5335305e836effa745bc36c9c5b8fa 100644 --- a/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/QcCommand.scala +++ b/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/QcCommand.scala @@ -102,12 +102,12 @@ class QcCommand(val root: Configurable, val fastqc: Fastqc) extends BiopetComman val foundAdapters = fastqc.foundAdapters.map(_.seq) if (foundAdapters.nonEmpty) { val cutadapt = new Cutadapt(root, fastqc) - cutadapt.fastq_input = seqtk.output - cutadapt.fastq_output = new File(output.getParentFile, input.getName + ".cutadapt.fq") - cutadapt.stats_output = new File(flexiprep.outputDir, s"${flexiprep.sampleId.getOrElse("x")}-${flexiprep.libId.getOrElse("x")}.$read.clip.stats") - if (cutadapt.default_clip_mode == "3") cutadapt.opt_adapter ++= foundAdapters - else if (cutadapt.default_clip_mode == "5") cutadapt.opt_front ++= foundAdapters - else if (cutadapt.default_clip_mode == "both") cutadapt.opt_anywhere ++= foundAdapters + cutadapt.fastqInput = seqtk.output + cutadapt.fastqOutput = new File(output.getParentFile, input.getName + ".cutadapt.fq") + cutadapt.statsOutput = new File(flexiprep.outputDir, s"${flexiprep.sampleId.getOrElse("x")}-${flexiprep.libId.getOrElse("x")}.$read.clip.stats") + if (cutadapt.defaultClipMode == "3") cutadapt.adapter ++= foundAdapters + else if (cutadapt.defaultClipMode == "5") cutadapt.front ++= foundAdapters + else if (cutadapt.defaultClipMode == "both") cutadapt.anywhere ++= foundAdapters addPipeJob(cutadapt) Some(cutadapt) } else None @@ -117,7 +117,7 @@ class QcCommand(val root: Configurable, val fastqc: Fastqc) extends BiopetComman val sickle = new Sickle(root) sickle.output_stats = new File(flexiprep.outputDir, s"${flexiprep.sampleId.getOrElse("x")}-${flexiprep.libId.getOrElse("x")}.$read.trim.stats") sickle.input_R1 = clip match { - case Some(c) => c.fastq_output + case Some(c) => c.fastqOutput case _ => seqtk.output } sickle.output_R1 = new File(output.getParentFile, input.getName + ".sickle.fq") @@ -127,7 +127,7 @@ class QcCommand(val root: Configurable, val fastqc: Fastqc) extends BiopetComman val outputFile = (clip, trim) match { case (_, Some(t)) => t.output_R1 - case (Some(c), _) => c.fastq_output + case (Some(c), _) => c.fastqOutput case _ => seqtk.output } diff --git a/public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.scala b/public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.scala index 559c15ff35fbf1fdc9649db8cb3ed4ac166d9559..a9be0fc1ce44a6fd445cd12156549a42cd52948e 100644 --- a/public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.scala +++ b/public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.scala @@ -107,7 +107,7 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S ) /** File to add to the summary */ - def summaryFiles: Map[String, File] = Map("output_bamfile" -> finalBamFile, "input_R1" -> input_R1, + def summaryFiles: Map[String, File] = Map("output_bam" -> finalBamFile, "input_R1" -> input_R1, "reference" -> referenceFasta()) ++ (if (input_R2.isDefined) Map("input_R2" -> input_R2.get) else Map()) diff --git a/public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/MultisampleMappingReport.scala b/public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/MultisampleMappingReport.scala index 30e4e3e3433838cc0e07af1e54c91dca0e796b8f..2ace6f962b245acca0e65212ea6ee11bb478ebf0 100644 --- a/public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/MultisampleMappingReport.scala +++ b/public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/MultisampleMappingReport.scala @@ -66,7 +66,7 @@ trait MultisampleMappingReportTrait extends MultisampleReportBuilder { "After QC fastq files" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepOutputfiles.ssp"), "Bam files per lib" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/mapping/outputBamfiles.ssp", Map("sampleLevel" -> false)), "Preprocessed bam files" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/mapping/outputBamfiles.ssp", - Map("pipelineName" -> pipelineName, "fileTag" -> "preProcessBam"))), Map()) + Map("pipelineName" -> pipelineName, "fileTag" -> "output_bam_preprocess"))), Map()) /** Single sample page */ def samplePage(sampleId: String, args: Map[String, Any]): ReportPage = { diff --git a/public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/MultisampleMappingTrait.scala b/public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/MultisampleMappingTrait.scala index 97a175de006fe90f29321f0ce92f51a5533dfb8e..38195664d78afab8924fbe96f4742c47cbe80af6 100644 --- a/public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/MultisampleMappingTrait.scala +++ b/public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/MultisampleMappingTrait.scala @@ -63,7 +63,7 @@ trait MultisampleMappingTrait extends MultiSampleQScript class Library(libId: String) extends AbstractLibrary(libId) { def summaryFiles: Map[String, File] = (inputR1.map("input_R1" -> _) :: inputR2.map("input_R2" -> _) :: inputBam.map("input_bam" -> _) :: bamFile.map("output_bam" -> _) :: - preProcessBam.map("output_preProcessBam" -> _) :: Nil).flatten.toMap + preProcessBam.map("output_bam_preprocess" -> _) :: Nil).flatten.toMap def summaryStats: Map[String, Any] = Map() @@ -162,7 +162,7 @@ trait MultisampleMappingTrait extends MultiSampleQScript } def summaryFiles: Map[String, File] = (bamFile.map("output_bam" -> _) :: - preProcessBam.map("output_preProcessBam" -> _) :: Nil).flatten.toMap + preProcessBam.map("output_bam_preprocess" -> _) :: Nil).flatten.toMap def summaryStats: Map[String, Any] = Map()