From c3f100e55e3e2945178c8d0c7428626df371ce18 Mon Sep 17 00:00:00 2001 From: Peter van 't Hof <p.j.van_t_hof@lumc.nl> Date: Sun, 16 Aug 2015 16:21:28 +0200 Subject: [PATCH] Adding md5sum checking --- .../lumc/sasc/biopet/core/BiopetQScript.scala | 2 +- .../biopet/core/summary/SummaryQScript.scala | 18 ++++++++-- .../biopet/core/summary/WriteSummary.scala | 5 +-- .../sasc/biopet/extensions/CheckMd5.scala | 33 +++++++++++++++++++ 4 files changed, 53 insertions(+), 5 deletions(-) create mode 100644 public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/CheckMd5.scala diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetQScript.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetQScript.scala index 8961f2550..a5aa3efdb 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetQScript.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetQScript.scala @@ -89,7 +89,7 @@ trait BiopetQScript extends Configurable with GatkLogging { globalConfig.writeReport(qSettings.runName, new File(outputDir, ".log/" + qSettings.runName)) else BiopetQScript.addError("Parent of output dir: '" + outputDir.getParent + "' is not writeable, outputdir can not be created") - inputFiles.foreach{ i => + inputFiles.foreach { i => if (!i.file.exists()) BiopetQScript.addError(s"Input file does not exist: ${i.file}") if (!i.file.canRead()) BiopetQScript.addError(s"Input file can not be read: ${i.file}") } diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/summary/SummaryQScript.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/summary/SummaryQScript.scala index c59438133..844a664c4 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/summary/SummaryQScript.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/summary/SummaryQScript.scala @@ -18,7 +18,7 @@ package nl.lumc.sasc.biopet.core.summary import java.io.File import nl.lumc.sasc.biopet.core._ -import nl.lumc.sasc.biopet.extensions.Md5sum +import nl.lumc.sasc.biopet.extensions.{ CheckMd5, Md5sum } import scala.collection.mutable @@ -27,7 +27,7 @@ import scala.collection.mutable * * Created by pjvan_thof on 2/14/15. */ -trait SummaryQScript extends BiopetQScript { +trait SummaryQScript extends BiopetQScript { qscript => /** Key is sample/library, None is sample or library is not applicable */ private[summary] var summarizables: Map[(String, Option[String], Option[String]), List[Summarizable]] = Map() @@ -116,6 +116,20 @@ trait SummaryQScript extends BiopetQScript { //TODO: add more checksums types } + for (inputFile <- inputFiles) { + inputFile.md5 match { + case Some(checksum) => { + val checkMd5 = new CheckMd5 + checkMd5.inputFile = inputFile.file + require(SummaryQScript.md5sumCache.contains(inputFile.file), "Md5 job is not executed, checksum file can't be found") + checkMd5.md5file = SummaryQScript.md5sumCache(inputFile.file) + checkMd5.md5sum = checksum + add(checkMd5) + } + case _ => + } + } + for ((_, summarizableList) <- summarizables; summarizable <- summarizableList) { summarizable match { case f: BiopetCommandLineFunctionTrait => f.beforeGraph() diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/summary/WriteSummary.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/summary/WriteSummary.scala index 0dad01e5b..d987dcef3 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/summary/WriteSummary.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/summary/WriteSummary.scala @@ -153,10 +153,11 @@ class WriteSummary(val root: Configurable) extends InProcessFunction with Config def parseFile(file: File): Map[String, Any] = { val map: mutable.Map[String, Any] = mutable.Map() map += "path" -> file.getAbsolutePath - if (md5sum) map += "md5" -> parseChecksum(SummaryQScript.md5sumCache(file)) + if (md5sum) map += "md5" -> WriteSummary.parseChecksum(SummaryQScript.md5sumCache(file)) map.toMap } - +} +object WriteSummary { /** Retrive checksum from file */ def parseChecksum(checksumFile: File): String = { Source.fromFile(checksumFile).getLines().toList.head.split(" ")(0) diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/CheckMd5.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/CheckMd5.scala new file mode 100644 index 000000000..b6a108642 --- /dev/null +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/CheckMd5.scala @@ -0,0 +1,33 @@ +package nl.lumc.sasc.biopet.extensions + +import java.io.File + +import nl.lumc.sasc.biopet.core.summary.WriteSummary +import org.broadinstitute.gatk.queue.function.InProcessFunction +import org.broadinstitute.gatk.utils.commandline.{ Argument, Input } + +/** + * This class checks md5sums and give an exit code 1 when md5sum is not the same + * + * Created by pjvanthof on 16/08/15. + */ +class CheckMd5 extends InProcessFunction { + @Input(required = true) + var inputFile: File = _ + + @Input(required = true) + var md5file: File = _ + + @Argument(required = true) + var md5sum: String = _ + + /** Exits whenever the input md5sum is not the same as the output md5sum */ + def run: Unit = { + val outputMd5sum = WriteSummary.parseChecksum(md5file).toLowerCase + + if (outputMd5sum != md5sum.toLowerCase) { + logger.error(s"Input file: '$inputFile' md5sum is not as expected, aborting pipeline") + System.exit(1) + } + } +} \ No newline at end of file -- GitLab