diff --git a/protected/biopet-gatk-pipelines/src/test/scala/nl/lumc/sasc/biopet/pipelines/gatk/ShivaTest.scala b/protected/biopet-gatk-pipelines/src/test/scala/nl/lumc/sasc/biopet/pipelines/gatk/ShivaTest.scala index 9395daa1da479d97d5b5bd822379be5085b67c6c..e47797cc7482900cd9084774320ef174eb79e6e2 100644 --- a/protected/biopet-gatk-pipelines/src/test/scala/nl/lumc/sasc/biopet/pipelines/gatk/ShivaTest.scala +++ b/protected/biopet-gatk-pipelines/src/test/scala/nl/lumc/sasc/biopet/pipelines/gatk/ShivaTest.scala @@ -93,6 +93,12 @@ class ShivaTest extends TestNGSuite with Matchers { object ShivaTest { val outputDir = Files.createTempDir() + new File(outputDir, "input").mkdirs() + def inputTouch(name: String): String = { + val file = new File(outputDir, "input" + File.separator + name) + Files.touch(file) + file.getAbsolutePath + } private def copyFile(name: String): Unit = { val is = getClass.getResourceAsStream("/" + name) @@ -136,8 +142,8 @@ object ShivaTest { val sample1 = Map( "samples" -> Map("sample1" -> Map("libraries" -> Map( "lib1" -> Map( - "R1" -> "1_1_R1.fq", - "R2" -> "1_1_R2.fq" + "R1" -> inputTouch("1_1_R1.fq"), + "R2" -> inputTouch("1_1_R2.fq") ) ) ))) @@ -145,8 +151,8 @@ object ShivaTest { val sample2 = Map( "samples" -> Map("sample2" -> Map("libraries" -> Map( "lib1" -> Map( - "R1" -> "2_1_R1.fq", - "R2" -> "2_1_R2.fq" + "R1" -> inputTouch("2_1_R1.fq"), + "R2" -> inputTouch("2_1_R2.fq") ) ) ))) @@ -154,12 +160,12 @@ object ShivaTest { val sample3 = Map( "samples" -> Map("sample3" -> Map("libraries" -> Map( "lib1" -> Map( - "R1" -> "3_1_R1.fq", - "R2" -> "3_1_R2.fq" + "R1" -> inputTouch("3_1_R1.fq"), + "R2" -> inputTouch("3_1_R2.fq") ), "lib2" -> Map( - "R1" -> "3_2_R1.fq", - "R2" -> "3_2_R2.fq" + "R1" -> inputTouch("3_2_R1.fq"), + "R2" -> inputTouch("3_2_R2.fq") ) ) ))) diff --git a/protected/biopet-gatk-pipelines/src/test/scala/nl/lumc/sasc/biopet/pipelines/gatk/ShivaVariantcallingTest.scala b/protected/biopet-gatk-pipelines/src/test/scala/nl/lumc/sasc/biopet/pipelines/gatk/ShivaVariantcallingTest.scala index 2c60207a82fae9da823eb02d7a860b5dd8be1834..8e8a1eddf9e5d6b5aab36c4c6fea7d35ea2da82e 100644 --- a/protected/biopet-gatk-pipelines/src/test/scala/nl/lumc/sasc/biopet/pipelines/gatk/ShivaVariantcallingTest.scala +++ b/protected/biopet-gatk-pipelines/src/test/scala/nl/lumc/sasc/biopet/pipelines/gatk/ShivaVariantcallingTest.scala @@ -73,7 +73,7 @@ class ShivaVariantcallingTest extends TestNGSuite with Matchers { val map = Map("variantcallers" -> callers.toList) val pipeline = initPipeline(map) - pipeline.inputBams = (for (n <- 1 to bams) yield new File("bam_" + n + ".bam")).toList + pipeline.inputBams = (for (n <- 1 to bams) yield ShivaVariantcallingTest.inputTouch("bam_" + n + ".bam")).toList val illegalArgumentException = pipeline.inputBams.isEmpty || (!raw && !bcftools && @@ -107,6 +107,12 @@ class ShivaVariantcallingTest extends TestNGSuite with Matchers { object ShivaVariantcallingTest { val outputDir = Files.createTempDir() + new File(outputDir, "input").mkdirs() + def inputTouch(name: String): File = { + val file = new File(outputDir, "input" + File.separator + name).getAbsoluteFile + Files.touch(file) + file + } private def copyFile(name: String): Unit = { val is = getClass.getResourceAsStream("/" + name) diff --git a/public/bam2wig/src/main/scala/nl/lumc/sasc/biopet/pipelines/bamtobigwig/Bam2Wig.scala b/public/bam2wig/src/main/scala/nl/lumc/sasc/biopet/pipelines/bamtobigwig/Bam2Wig.scala index 451350227e5620ae612212eb341e0acae49ad958..fb9e39611fd861f9158503581f17d4bd12e6cb30 100644 --- a/public/bam2wig/src/main/scala/nl/lumc/sasc/biopet/pipelines/bamtobigwig/Bam2Wig.scala +++ b/public/bam2wig/src/main/scala/nl/lumc/sasc/biopet/pipelines/bamtobigwig/Bam2Wig.scala @@ -35,6 +35,7 @@ class Bam2Wig(val root: Configurable) extends QScript with BiopetQScript { var bamFile: File = null def init(): Unit = { + inputFiles :+= new InputFile(bamFile) } def biopetScript(): Unit = { diff --git a/public/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetrics.scala b/public/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetrics.scala index defb8361c68974ccffdddcba26dbe7700d58fea2..f6a6dc090defdb238f76b68f21189faf38fd207c 100644 --- a/public/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetrics.scala +++ b/public/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetrics.scala @@ -71,7 +71,8 @@ class BamMetrics(val root: Configurable) extends QScript with SummaryQScript wit } /** executed before script */ - def init() { + def init(): Unit = { + inputFiles :+= new InputFile(inputBam) } /** Script to add jobs */ diff --git a/public/bammetrics/src/test/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetricsTest.scala b/public/bammetrics/src/test/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetricsTest.scala index 7e97b4a78e1176b1603a7ae5aec10057c4bd0175..33304cb198807f46d3801ddfc9b05eaa7d555fe4 100644 --- a/public/bammetrics/src/test/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetricsTest.scala +++ b/public/bammetrics/src/test/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetricsTest.scala @@ -69,7 +69,7 @@ class BamMetricsTest extends TestNGSuite with Matchers { Map("regions_of_interest" -> (1 to rois).map("roi_" + _ + ".bed").toList) val bammetrics: BamMetrics = initPipeline(map) - bammetrics.inputBam = new File("input.bam") + bammetrics.inputBam = BamMetricsTest.bam bammetrics.sampleId = Some("1") bammetrics.libId = Some("1") bammetrics.script() @@ -98,6 +98,10 @@ class BamMetricsTest extends TestNGSuite with Matchers { object BamMetricsTest { val outputDir = Files.createTempDir() + new File(outputDir, "input").mkdirs() + + val bam = new File(outputDir, "input" + File.separator + "bla.bam") + Files.touch(bam) private def copyFile(name: String): Unit = { val is = getClass.getResourceAsStream("/" + name) diff --git a/public/basty/src/main/scala/nl/lumc/sasc/biopet/pipelines/basty/BastyTrait.scala b/public/basty/src/main/scala/nl/lumc/sasc/biopet/pipelines/basty/BastyTrait.scala index 1990085cef63fc60c637e04bb17811a20b837f85..a2a1f832843072d5abc9d46fb0280ba3b2c19ea4 100644 --- a/public/basty/src/main/scala/nl/lumc/sasc/biopet/pipelines/basty/BastyTrait.scala +++ b/public/basty/src/main/scala/nl/lumc/sasc/biopet/pipelines/basty/BastyTrait.scala @@ -89,6 +89,8 @@ trait BastyTrait extends MultiSampleQScript { addAll(shiva.functions) addSummaryQScript(shiva) + inputFiles :::= shiva.inputFiles + addSamplesJobs() } diff --git a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetQScript.scala b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetQScript.scala index c84c9faa84c187efbc2f20d0e6599784b7f2db26..fa714c9423707456f1f6d1cfa6f2617c44318218 100644 --- a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetQScript.scala +++ b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetQScript.scala @@ -48,6 +48,10 @@ trait BiopetQScript extends Configurable with GatkLogging { var outputFiles: Map[String, File] = Map() + type InputFile = BiopetQScript.InputFile + + var inputFiles: List[InputFile] = Nil + /** Get implemented from org.broadinstitute.gatk.queue.QScript */ var qSettings: QSettings @@ -86,6 +90,11 @@ trait BiopetQScript extends Configurable with GatkLogging { globalConfig.writeReport(qSettings.runName, new File(outputDir, ".log/" + qSettings.runName)) else Logging.addError("Parent of output dir: '" + outputDir.getParent + "' is not writeable, outputdir can not be created") + inputFiles.foreach { i => + if (!i.file.exists()) Logging.addError(s"Input file does not exist: ${i.file}") + else if (!i.file.canRead()) Logging.addError(s"Input file can not be read: ${i.file}") + } + this match { case q: MultiSampleQScript if q.onlySamples.nonEmpty && !q.samples.forall(x => q.onlySamples.contains(x._1)) => logger.info("Write report is skipped because sample flag is used") @@ -107,3 +116,7 @@ trait BiopetQScript extends Configurable with GatkLogging { add(function) } } + +object BiopetQScript { + protected case class InputFile(file: File, md5: Option[String] = None) +} \ No newline at end of file diff --git a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/extensions/CheckChecksum.scala b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/extensions/CheckChecksum.scala new file mode 100644 index 0000000000000000000000000000000000000000..0ae2587f7928bb8d8cfe3e157f79fec7afff031a --- /dev/null +++ b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/extensions/CheckChecksum.scala @@ -0,0 +1,40 @@ +package nl.lumc.sasc.biopet.core.extensions + +import java.io.File + +import nl.lumc.sasc.biopet.core.summary.WriteSummary +import org.broadinstitute.gatk.queue.function.InProcessFunction +import org.broadinstitute.gatk.utils.commandline.{ Argument, Input } + +/** + * This class checks md5sums and give an exit code 1 when md5sum is not the same + * + * Created by pjvanthof on 16/08/15. + */ +class CheckChecksum extends InProcessFunction { + @Input(required = true) + var inputFile: File = _ + + @Input(required = true) + var checksumFile: File = _ + + @Argument(required = true) + var checksum: String = _ + + override def freezeFieldValues(): Unit = { + super.freezeFieldValues() + jobOutputFile = new File(checksumFile.getParentFile, checksumFile.getName + ".check.out") + } + + /** Exits whenever the input md5sum is not the same as the output md5sum */ + def run: Unit = { + val outputChecksum = WriteSummary.parseChecksum(checksumFile).toLowerCase + + if (outputChecksum != checksum.toLowerCase) { + logger.error(s"Input file: '$inputFile' md5sum is not as expected, aborting pipeline") + + // 130 Simulates a ctr-C + Runtime.getRuntime.halt(130) + } + } +} \ No newline at end of file diff --git a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/summary/SummaryQScript.scala b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/summary/SummaryQScript.scala index 7f0fce673b9b9f053c871ef4c2ba283f9dea36cb..6eaae5391f4712b427cc0307d7a480d1aee1e712 100644 --- a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/summary/SummaryQScript.scala +++ b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/summary/SummaryQScript.scala @@ -18,7 +18,7 @@ package nl.lumc.sasc.biopet.core.summary import java.io.File import nl.lumc.sasc.biopet.core._ -import nl.lumc.sasc.biopet.core.extensions.Md5sum +import nl.lumc.sasc.biopet.core.extensions.{ CheckChecksum, Md5sum } import scala.collection.mutable @@ -27,7 +27,7 @@ import scala.collection.mutable * * Created by pjvan_thof on 2/14/15. */ -trait SummaryQScript extends BiopetQScript { +trait SummaryQScript extends BiopetQScript { qscript => /** Key is sample/library, None is sample or library is not applicable */ private[summary] var summarizables: Map[(String, Option[String], Option[String]), List[Summarizable]] = Map() @@ -116,6 +116,20 @@ trait SummaryQScript extends BiopetQScript { //TODO: add more checksums types } + for (inputFile <- inputFiles) { + inputFile.md5 match { + case Some(checksum) => { + val checkMd5 = new CheckChecksum + checkMd5.inputFile = inputFile.file + require(SummaryQScript.md5sumCache.contains(inputFile.file), "Md5 job is not executed, checksum file can't be found") + checkMd5.checksumFile = SummaryQScript.md5sumCache(inputFile.file) + checkMd5.checksum = checksum + add(checkMd5) + } + case _ => + } + } + for ((_, summarizableList) <- summarizables; summarizable <- summarizableList) { summarizable match { case f: BiopetCommandLineFunctionTrait => f.beforeGraph() diff --git a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/summary/WriteSummary.scala b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/summary/WriteSummary.scala index 3db4607d6288f84b204033ee3db186ee76771935..3530e4d0ad05f1916ccf344e625bbe5b097c0f65 100644 --- a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/summary/WriteSummary.scala +++ b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/summary/WriteSummary.scala @@ -153,10 +153,11 @@ class WriteSummary(val root: Configurable) extends InProcessFunction with Config def parseFile(file: File): Map[String, Any] = { val map: mutable.Map[String, Any] = mutable.Map() map += "path" -> file.getAbsolutePath - if (md5sum) map += "md5" -> parseChecksum(SummaryQScript.md5sumCache(file)) + if (md5sum) map += "md5" -> WriteSummary.parseChecksum(SummaryQScript.md5sumCache(file)) map.toMap } - +} +object WriteSummary { /** Retrive checksum from file */ def parseChecksum(checksumFile: File): String = { Source.fromFile(checksumFile).getLines().toList.head.split(" ")(0) diff --git a/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/MpileupToVcf.scala b/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/MpileupToVcf.scala index 5a3e1e3f26588c103a4bf5e0a190aa223d064450..e5ba10717a5e32b3baf4dbe059d64dccfdcdcc61 100644 --- a/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/MpileupToVcf.scala +++ b/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/MpileupToVcf.scala @@ -57,7 +57,7 @@ class MpileupToVcf(val root: Configurable) extends ToolCommandFuntion with Refer } override def beforeCmd(): Unit = { - if (sample == null && inputBam.exists()) { + if (sample == null && inputBam.exists() && inputBam.length() > 0) { val inputSam = SamReaderFactory.makeDefault.open(inputBam) val readGroups = inputSam.getFileHeader.getReadGroups val samples = readGroups.map(readGroup => readGroup.getSample).distinct diff --git a/public/carp/src/main/scala/nl/lumc/sasc/biopet/pipelines/carp/Carp.scala b/public/carp/src/main/scala/nl/lumc/sasc/biopet/pipelines/carp/Carp.scala index 195718d5de9f0b708c2bcbc64faaf927109c176f..f8b4dc6db0ff771d046ffe320a24a7c0d294b832 100644 --- a/public/carp/src/main/scala/nl/lumc/sasc/biopet/pipelines/carp/Carp.scala +++ b/public/carp/src/main/scala/nl/lumc/sasc/biopet/pipelines/carp/Carp.scala @@ -78,6 +78,10 @@ class Carp(val root: Configurable) extends QScript with MultiSampleQScript with if (config.contains("R1")) { mapping.input_R1 = config("R1") if (config.contains("R2")) mapping.input_R2 = config("R2") + + inputFiles :+= new InputFile(mapping.input_R1, config("R1_md5")) + mapping.input_R2.foreach(inputFiles :+= new InputFile(_, config("R2_md5"))) + mapping.init() mapping.biopetScript() addAll(mapping.functions) diff --git a/public/carp/src/test/scala/nl/lumc/sasc/biopet/pipelines/carp/CarpTest.scala b/public/carp/src/test/scala/nl/lumc/sasc/biopet/pipelines/carp/CarpTest.scala index c361908fa1110ef7133639e91912acab5cce0da9..407b6fb415a4dbce63a80228241b7a81df29cf36 100644 --- a/public/carp/src/test/scala/nl/lumc/sasc/biopet/pipelines/carp/CarpTest.scala +++ b/public/carp/src/test/scala/nl/lumc/sasc/biopet/pipelines/carp/CarpTest.scala @@ -97,6 +97,12 @@ class CarpTest extends TestNGSuite with Matchers { object CarpTest { val outputDir = Files.createTempDir() + new File(outputDir, "input").mkdirs() + def inputTouch(name: String): String = { + val file = new File(outputDir, "input" + File.separator + name) + Files.touch(file) + file.getAbsolutePath + } private def copyFile(name: String): Unit = { val is = getClass.getResourceAsStream("/" + name) @@ -127,8 +133,8 @@ object CarpTest { val sample1 = Map( "samples" -> Map("sample1" -> Map("libraries" -> Map( "lib1" -> Map( - "R1" -> "1_1_R1.fq", - "R2" -> "1_1_R2.fq" + "R1" -> inputTouch("1_1_R1.fq"), + "R2" -> inputTouch("1_1_R2.fq") ) ) ))) @@ -136,8 +142,8 @@ object CarpTest { val sample2 = Map( "samples" -> Map("sample2" -> Map("libraries" -> Map( "lib1" -> Map( - "R1" -> "2_1_R1.fq", - "R2" -> "2_1_R2.fq" + "R1" -> inputTouch("2_1_R1.fq"), + "R2" -> inputTouch("2_1_R2.fq") ) ) ))) @@ -145,12 +151,12 @@ object CarpTest { val sample3 = Map( "samples" -> Map("sample3" -> Map("libraries" -> Map( "lib1" -> Map( - "R1" -> "3_1_R1.fq", - "R2" -> "3_1_R2.fq" + "R1" -> inputTouch("3_1_R1.fq"), + "R2" -> inputTouch("3_1_R2.fq") ), "lib2" -> Map( - "R1" -> "3_2_R1.fq", - "R2" -> "3_2_R2.fq" + "R1" -> inputTouch("3_2_R1.fq"), + "R2" -> inputTouch("3_2_R2.fq") ) ) ))) @@ -158,8 +164,8 @@ object CarpTest { val threatment1 = Map( "samples" -> Map("threatment" -> Map("control" -> "control1", "libraries" -> Map( "lib1" -> Map( - "R1" -> "threatment_1_R1.fq", - "R2" -> "threatment_1_R2.fq" + "R1" -> inputTouch("threatment_1_R1.fq"), + "R2" -> inputTouch("threatment_1_R2.fq") ) ) ))) @@ -167,8 +173,8 @@ object CarpTest { val control1 = Map( "samples" -> Map("control1" -> Map("libraries" -> Map( "lib1" -> Map( - "R1" -> "control_1_R1.fq", - "R2" -> "control_1_R2.fq" + "R1" -> inputTouch("control_1_R1.fq"), + "R2" -> inputTouch("control_1_R2.fq") ) ) ))) diff --git a/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Flexiprep.scala b/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Flexiprep.scala index 07c356b7a894d5797002751d264a4b24e3ba5e7e..7ae2730c2bd92c3c06e10ecb2452cc3d1e823933 100644 --- a/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Flexiprep.scala +++ b/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Flexiprep.scala @@ -85,6 +85,9 @@ class Flexiprep(val root: Configurable) extends QScript with SummaryQScript with paired = input_R2.isDefined + inputFiles :+= new InputFile(input_R1) + input_R2.foreach(inputFiles :+= new InputFile(_)) + if (input_R1.endsWith(".gz")) R1_name = input_R1.getName.substring(0, input_R1.getName.lastIndexOf(".gz")) else if (input_R1.endsWith(".gzip")) R1_name = input_R1.getName.substring(0, input_R1.getName.lastIndexOf(".gzip")) else R1_name = input_R1.getName diff --git a/public/flexiprep/src/test/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/FlexiprepTest.scala b/public/flexiprep/src/test/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/FlexiprepTest.scala index 2cb3f0e7b36175d9bf7633d7d639e6b03612976f..7b6716024963be03f256eb31bfad2bf5bcdf769e 100644 --- a/public/flexiprep/src/test/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/FlexiprepTest.scala +++ b/public/flexiprep/src/test/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/FlexiprepTest.scala @@ -67,8 +67,8 @@ class FlexiprepTest extends TestNGSuite with Matchers { ), Map(FlexiprepTest.executables.toSeq: _*)) val flexiprep: Flexiprep = initPipeline(map) - flexiprep.input_R1 = new File(flexiprep.outputDir, "bla_R1.fq" + (if (zipped) ".gz" else "")) - if (paired) flexiprep.input_R2 = Some(new File(flexiprep.outputDir, "bla_R2.fq" + (if (zipped) ".gz" else ""))) + flexiprep.input_R1 = (if (zipped) FlexiprepTest.r1Zipped else FlexiprepTest.r1) + if (paired) flexiprep.input_R2 = Some((if (zipped) FlexiprepTest.r2Zipped else FlexiprepTest.r2)) flexiprep.sampleId = Some("1") flexiprep.libId = Some("1") flexiprep.script() @@ -95,6 +95,16 @@ class FlexiprepTest extends TestNGSuite with Matchers { object FlexiprepTest { val outputDir = Files.createTempDir() + new File(outputDir, "input").mkdirs() + + val r1 = new File(outputDir, "input" + File.separator + "R1.fq") + Files.touch(r1) + val r2 = new File(outputDir, "input" + File.separator + "R2.fq") + Files.touch(r2) + val r1Zipped = new File(outputDir, "input" + File.separator + "R1.fq.gz") + Files.touch(r1Zipped) + val r2Zipped = new File(outputDir, "input" + File.separator + "R2.fq.gz") + Files.touch(r2Zipped) val executables = Map( "seqstat" -> Map("exe" -> "test"), diff --git a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/Gentrap.scala b/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/Gentrap.scala index c500757088d3c313190cb4a9b0eca65bfc48f9a4..d033594c6e42663427108baf12038b7eed6fb3f6 100644 --- a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/Gentrap.scala +++ b/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/Gentrap.scala @@ -844,6 +844,8 @@ class Gentrap(val root: Configurable) extends QScript def addJobs(): Unit = { // create per-library alignment file addAll(mappingJob.functions) + // Input file checking + inputFiles :::= mappingJob.inputFiles // add bigwig track addAll(bam2wigModule.functions) qscript.addSummaryQScript(mappingJob) diff --git a/public/gentrap/src/test/scala/nl/lumc/sasc/biopet/pipelines/gentrap/GentrapTest.scala b/public/gentrap/src/test/scala/nl/lumc/sasc/biopet/pipelines/gentrap/GentrapTest.scala index 140db66fd62dd850365b27137cc3df0891bd38a7..2b2ddaf259e8fa5ad382ef1aca1337826ecf0ad8 100644 --- a/public/gentrap/src/test/scala/nl/lumc/sasc/biopet/pipelines/gentrap/GentrapTest.scala +++ b/public/gentrap/src/test/scala/nl/lumc/sasc/biopet/pipelines/gentrap/GentrapTest.scala @@ -43,8 +43,8 @@ class GentrapTest extends TestNGSuite with Matchers { /** Convenience method for making library config */ private def makeLibConfig(idx: Int, paired: Boolean = true) = { - val files = Map("R1" -> "test_R1.fq") - if (paired) (s"lib_$idx", files ++ Map("R2" -> "test_R2.fq")) + val files = Map("R1" -> GentrapTest.inputTouch("test_R1.fq")) + if (paired) (s"lib_$idx", files ++ Map("R2" -> GentrapTest.inputTouch("test_R2.fq"))) else (s"lib_$idx", files) } @@ -179,6 +179,12 @@ class GentrapTest extends TestNGSuite with Matchers { object GentrapTest { val outputDir = Files.createTempDir() + new File(outputDir, "input").mkdirs() + def inputTouch(name: String): String = { + val file = new File(outputDir, "input" + File.separator + name) + Files.touch(file) + file.getAbsolutePath + } private def copyFile(name: String): Unit = { val is = getClass.getResourceAsStream("/" + name) diff --git a/public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.scala b/public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.scala index 9d508b12ce727acb0bdfdf52fc485b8e353e5dae..3c886ee3a7c04544953e02a29d960a9436559bcb 100644 --- a/public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.scala +++ b/public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.scala @@ -137,6 +137,9 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S require(sampleId.isDefined, "Missing sample ID on mapping module") require(libId.isDefined, "Missing library ID on mapping module") + inputFiles :+= new InputFile(input_R1) + input_R2.foreach(inputFiles :+= new InputFile(_)) + paired = input_R2.isDefined if (readgroupId == null) readgroupId = sampleId.get + "-" + libId.get diff --git a/public/mapping/src/test/scala/nl/lumc/sasc/biopet/pipelines/mapping/MappingTest.scala b/public/mapping/src/test/scala/nl/lumc/sasc/biopet/pipelines/mapping/MappingTest.scala index 849bffcb6998f31e8d527af465de68775b6e11a4..3c28181101a75eafa1518e441fb457a5ae197099 100644 --- a/public/mapping/src/test/scala/nl/lumc/sasc/biopet/pipelines/mapping/MappingTest.scala +++ b/public/mapping/src/test/scala/nl/lumc/sasc/biopet/pipelines/mapping/MappingTest.scala @@ -79,11 +79,11 @@ class MappingTest extends TestNGSuite with Matchers { val mapping: Mapping = initPipeline(map) if (zipped) { - mapping.input_R1 = new File(mapping.outputDir, "bla_R1.fq.gz") - if (paired) mapping.input_R2 = Some(new File(mapping.outputDir, "bla_R2.fq.gz")) + mapping.input_R1 = MappingTest.r1Zipped + if (paired) mapping.input_R2 = Some(MappingTest.r2Zipped) } else { - mapping.input_R1 = new File(mapping.outputDir, "bla_R1.fq") - if (paired) mapping.input_R2 = Some(new File(mapping.outputDir, "bla_R2.fq")) + mapping.input_R1 = MappingTest.r1 + if (paired) mapping.input_R2 = Some(MappingTest.r2) } mapping.sampleId = Some("1") mapping.libId = Some("1") @@ -131,6 +131,16 @@ class MappingTest extends TestNGSuite with Matchers { object MappingTest { val outputDir = Files.createTempDir() + new File(outputDir, "input").mkdirs() + + val r1 = new File(outputDir, "input" + File.separator + "R1.fq") + Files.touch(r1) + val r2 = new File(outputDir, "input" + File.separator + "R2.fq") + Files.touch(r2) + val r1Zipped = new File(outputDir, "input" + File.separator + "R1.fq.gz") + Files.touch(r1Zipped) + val r2Zipped = new File(outputDir, "input" + File.separator + "R2.fq.gz") + Files.touch(r2Zipped) private def copyFile(name: String): Unit = { val is = getClass.getResourceAsStream("/" + name) diff --git a/public/sage/src/main/scala/nl/lumc/sasc/biopet/pipelines/sage/Sage.scala b/public/sage/src/main/scala/nl/lumc/sasc/biopet/pipelines/sage/Sage.scala index 4aaee19c295de32dbc306a94cf6fecf63d1ba754..2ee8c3b1930aff553234f6f3860d7c13e0818783 100644 --- a/public/sage/src/main/scala/nl/lumc/sasc/biopet/pipelines/sage/Sage.scala +++ b/public/sage/src/main/scala/nl/lumc/sasc/biopet/pipelines/sage/Sage.scala @@ -88,6 +88,8 @@ class Sage(val root: Configurable) extends QScript with MultiSampleQScript { mapping.sampleId = Some(sampleId) protected def addJobs(): Unit = { + inputFiles :+= new InputFile(inputFastq, config("R1_md5")) + flexiprep.outputDir = new File(libDir, "flexiprep/") flexiprep.input_R1 = inputFastq flexiprep.init() diff --git a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaTrait.scala b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaTrait.scala index 3f65bf2a20864e2017f512981daa0d37aca52772..dafb2e1ccc5a72807aa37cc6858711afbfe78c23 100644 --- a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaTrait.scala +++ b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaTrait.scala @@ -136,51 +136,56 @@ trait ShivaTrait extends MultiSampleQScript with SummaryQScript with Reference { case (true, _) => mapping.foreach(mapping => { mapping.input_R1 = config("R1") mapping.input_R2 = config("R2") + inputFiles :+= new InputFile(mapping.input_R1, config("R1_md5")) + mapping.input_R2.foreach(inputFiles :+= new InputFile(_, config("R2_md5"))) }) - case (false, true) => config("bam_to_fastq", default = false).asBoolean match { - case true => - val samToFastq = SamToFastq(qscript, config("bam"), - new File(libDir, sampleId + "-" + libId + ".R1.fastq"), - new File(libDir, sampleId + "-" + libId + ".R2.fastq")) - samToFastq.isIntermediate = true - qscript.add(samToFastq) - mapping.foreach(mapping => { - mapping.input_R1 = samToFastq.fastqR1 - mapping.input_R2 = Some(samToFastq.fastqR2) - }) - case false => - val inputSam = SamReaderFactory.makeDefault.open(config("bam")) - val readGroups = inputSam.getFileHeader.getReadGroups - - val readGroupOke = readGroups.forall(readGroup => { - if (readGroup.getSample != sampleId) logger.warn("Sample ID readgroup in bam file is not the same") - if (readGroup.getLibrary != libId) logger.warn("Library ID readgroup in bam file is not the same") - readGroup.getSample == sampleId && readGroup.getLibrary == libId - }) - inputSam.close() - - if (!readGroupOke) { - if (config("correct_readgroups", default = false).asBoolean) { - logger.info("Correcting readgroups, file:" + config("bam")) - val aorrg = AddOrReplaceReadGroups(qscript, config("bam"), bamFile.get) - aorrg.RGID = sampleId + "-" + libId - aorrg.RGLB = libId - aorrg.RGSM = sampleId - aorrg.isIntermediate = true - qscript.add(aorrg) - } else throw new IllegalStateException("Sample readgroup and/or library of input bamfile is not correct, file: " + bamFile + - "\nPlease note that it is possible to set 'correct_readgroups' to true in the config to automatic fix this") - } else { - val oldBamFile: File = config("bam") - val oldIndex: File = new File(oldBamFile.getAbsolutePath.stripSuffix(".bam") + ".bai") - val newIndex: File = new File(libDir, oldBamFile.getName.stripSuffix(".bam") + ".bai") - val baiLn = Ln(qscript, oldIndex, newIndex) - add(baiLn) - - val bamLn = Ln(qscript, oldBamFile, bamFile.get) - bamLn.deps :+= baiLn.output - add(bamLn) - } + case (false, true) => { + inputFiles :+= new InputFile(config("bam"), config("bam_md5")) + config("bam_to_fastq", default = false).asBoolean match { + case true => + val samToFastq = SamToFastq(qscript, config("bam"), + new File(libDir, sampleId + "-" + libId + ".R1.fastq"), + new File(libDir, sampleId + "-" + libId + ".R2.fastq")) + samToFastq.isIntermediate = true + qscript.add(samToFastq) + mapping.foreach(mapping => { + mapping.input_R1 = samToFastq.fastqR1 + mapping.input_R2 = Some(samToFastq.fastqR2) + }) + case false => + val inputSam = SamReaderFactory.makeDefault.open(config("bam")) + val readGroups = inputSam.getFileHeader.getReadGroups + + val readGroupOke = readGroups.forall(readGroup => { + if (readGroup.getSample != sampleId) logger.warn("Sample ID readgroup in bam file is not the same") + if (readGroup.getLibrary != libId) logger.warn("Library ID readgroup in bam file is not the same") + readGroup.getSample == sampleId && readGroup.getLibrary == libId + }) + inputSam.close() + + if (!readGroupOke) { + if (config("correct_readgroups", default = false).asBoolean) { + logger.info("Correcting readgroups, file:" + config("bam")) + val aorrg = AddOrReplaceReadGroups(qscript, config("bam"), bamFile.get) + aorrg.RGID = sampleId + "-" + libId + aorrg.RGLB = libId + aorrg.RGSM = sampleId + aorrg.isIntermediate = true + qscript.add(aorrg) + } else throw new IllegalStateException("Sample readgroup and/or library of input bamfile is not correct, file: " + bamFile + + "\nPlease note that it is possible to set 'correct_readgroups' to true in the config to automatic fix this") + } else { + val oldBamFile: File = config("bam") + val oldIndex: File = new File(oldBamFile.getAbsolutePath.stripSuffix(".bam") + ".bai") + val newIndex: File = new File(libDir, oldBamFile.getName.stripSuffix(".bam") + ".bai") + val baiLn = Ln(qscript, oldIndex, newIndex) + add(baiLn) + + val bamLn = Ln(qscript, oldBamFile, bamFile.get) + bamLn.deps :+= baiLn.output + add(bamLn) + } + } } case _ => logger.warn("Sample: " + sampleId + " Library: " + libId + ", no reads found") } @@ -294,7 +299,7 @@ trait ShivaTrait extends MultiSampleQScript with SummaryQScript with Reference { addAll(vc.functions) addSummaryQScript(vc) - if (config("annotation", default = true).asBoolean) { + if (config("annotation", default = false).asBoolean) { val toucan = new Toucan(this) toucan.outputDir = new File(outputDir, "annotation") toucan.inputVCF = vc.finalFile diff --git a/public/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaTest.scala b/public/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaTest.scala index 56b6bf172b2d122b2ccd70818d3f9f92161e270d..cff186c29e8776f4457ceb6787ea75efc61a8885 100644 --- a/public/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaTest.scala +++ b/public/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaTest.scala @@ -88,6 +88,12 @@ class ShivaTest extends TestNGSuite with Matchers { object ShivaTest { val outputDir = Files.createTempDir() + new File(outputDir, "input").mkdirs() + def inputTouch(name: String): String = { + val file = new File(outputDir, "input" + File.separator + name) + Files.touch(file) + file.getAbsolutePath + } private def copyFile(name: String): Unit = { val is = getClass.getResourceAsStream("/" + name) @@ -131,8 +137,8 @@ object ShivaTest { val sample1 = Map( "samples" -> Map("sample1" -> Map("libraries" -> Map( "lib1" -> Map( - "R1" -> "1_1_R1.fq", - "R2" -> "1_1_R2.fq" + "R1" -> inputTouch("1_1_R1.fq"), + "R2" -> inputTouch("1_1_R2.fq") ) ) ))) @@ -140,8 +146,8 @@ object ShivaTest { val sample2 = Map( "samples" -> Map("sample2" -> Map("libraries" -> Map( "lib1" -> Map( - "R1" -> "2_1_R1.fq", - "R2" -> "2_1_R2.fq" + "R1" -> inputTouch("2_1_R1.fq"), + "R2" -> inputTouch("2_1_R2.fq") ) ) ))) @@ -149,12 +155,12 @@ object ShivaTest { val sample3 = Map( "samples" -> Map("sample3" -> Map("libraries" -> Map( "lib1" -> Map( - "R1" -> "3_1_R1.fq", - "R2" -> "3_1_R2.fq" + "R1" -> inputTouch("3_1_R1.fq"), + "R2" -> inputTouch("3_1_R2.fq") ), "lib2" -> Map( - "R1" -> "3_2_R1.fq", - "R2" -> "3_2_R2.fq" + "R1" -> inputTouch("3_2_R1.fq"), + "R2" -> inputTouch("3_2_R2.fq") ) ) ))) diff --git a/public/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcallingTest.scala b/public/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcallingTest.scala index 0281d33f678804d7edd1620d204a0830d4f3201f..caeea0326d46e4b7673dae4a3d07f27c3cbd5f4e 100644 --- a/public/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcallingTest.scala +++ b/public/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcallingTest.scala @@ -61,7 +61,7 @@ class ShivaVariantcallingTest extends TestNGSuite with Matchers { val map = Map("variantcallers" -> callers.toList) val pipeline = initPipeline(map) - pipeline.inputBams = (for (n <- 1 to bams) yield new File("bam_" + n + ".bam")).toList + pipeline.inputBams = (for (n <- 1 to bams) yield ShivaVariantcallingTest.inputTouch("bam_" + n + ".bam")).toList val illegalArgumentException = pipeline.inputBams.isEmpty || (!raw && !bcftools && !freebayes) @@ -88,6 +88,12 @@ class ShivaVariantcallingTest extends TestNGSuite with Matchers { object ShivaVariantcallingTest { val outputDir = Files.createTempDir() + new File(outputDir, "input").mkdirs() + def inputTouch(name: String): File = { + val file = new File(outputDir, "input" + File.separator + name).getAbsoluteFile + Files.touch(file) + file + } private def copyFile(name: String): Unit = { val is = getClass.getResourceAsStream("/" + name) diff --git a/public/toucan/src/main/scala/nl/lumc/sasc/biopet/pipelines/toucan/Toucan.scala b/public/toucan/src/main/scala/nl/lumc/sasc/biopet/pipelines/toucan/Toucan.scala index 606853f933f6cd1263a0f44193e5ae0e615812df..7c3cc327858be60bb3d299e550c0f4244b656560 100644 --- a/public/toucan/src/main/scala/nl/lumc/sasc/biopet/pipelines/toucan/Toucan.scala +++ b/public/toucan/src/main/scala/nl/lumc/sasc/biopet/pipelines/toucan/Toucan.scala @@ -35,6 +35,7 @@ class Toucan(val root: Configurable) extends QScript with BiopetQScript with Sum var inputVCF: File = _ def init(): Unit = { + inputFiles :+= new InputFile(inputVCF) } override def defaults = ConfigUtils.mergeMaps(Map(