diff --git a/public/biopet-extentsions/src/main/scala/nl/lumc/sasc/biopet/extensions/kraken/Kraken.scala b/public/biopet-extentsions/src/main/scala/nl/lumc/sasc/biopet/extensions/kraken/Kraken.scala index ae475e2ca9fe24775af0d017ca7df42e1905cd09..dfe6f59d68b37411da199441a2d33fb891454e2a 100644 --- a/public/biopet-extentsions/src/main/scala/nl/lumc/sasc/biopet/extensions/kraken/Kraken.scala +++ b/public/biopet-extentsions/src/main/scala/nl/lumc/sasc/biopet/extensions/kraken/Kraken.scala @@ -51,7 +51,7 @@ class Kraken(val root: Configurable) extends BiopetCommandLineFunction { var paired: Boolean = config("paired", default = false) executable = config("exe", default = "kraken") - override def versionRegex = """Kraken version ([\d\w\-\.]+)\n.*""".r + override def versionRegex = """^Kraken version ([\d\w\-\.]+)""".r override def versionExitcode = List(0, 1) override def versionCommand = executable + " --version" diff --git a/public/biopet-public-package/src/main/scala/nl/lumc/sasc/biopet/BiopetExecutablePublic.scala b/public/biopet-public-package/src/main/scala/nl/lumc/sasc/biopet/BiopetExecutablePublic.scala index 76815745d80694cbad14245044f37dc676e621a0..946922ec7075938e124a659ba1f5a3acf40a5dd1 100644 --- a/public/biopet-public-package/src/main/scala/nl/lumc/sasc/biopet/BiopetExecutablePublic.scala +++ b/public/biopet-public-package/src/main/scala/nl/lumc/sasc/biopet/BiopetExecutablePublic.scala @@ -27,7 +27,8 @@ object BiopetExecutablePublic extends BiopetExecutable { nl.lumc.sasc.biopet.pipelines.bamtobigwig.Bam2Wig, nl.lumc.sasc.biopet.pipelines.carp.Carp, nl.lumc.sasc.biopet.pipelines.toucan.Toucan, - nl.lumc.sasc.biopet.pipelines.shiva.ShivaSvCalling + nl.lumc.sasc.biopet.pipelines.shiva.ShivaSvCalling, + nl.lumc.sasc.biopet.pipelines.gears.Gears ) def pipelines: List[MainCommand] = List( diff --git a/public/gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/Gears.scala b/public/gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/Gears.scala index 6278de048657cbe21b55acf3aee8507c71ec105c..59151902fb31fa6d870522ff202977bd7e3dd14a 100644 --- a/public/gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/Gears.scala +++ b/public/gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/Gears.scala @@ -18,7 +18,7 @@ package nl.lumc.sasc.biopet.pipelines.gears import nl.lumc.sasc.biopet.core.PipelineCommand import nl.lumc.sasc.biopet.core.summary.SummaryQScript import nl.lumc.sasc.biopet.extensions.kraken.{ Kraken, KrakenReport } -import nl.lumc.sasc.biopet.extensions.picard.SamToFastq +import nl.lumc.sasc.biopet.extensions.picard.{SortSam, SamToFastq} import nl.lumc.sasc.biopet.extensions.sambamba.SambambaView import nl.lumc.sasc.biopet.extensions.tools.{ KrakenReportToJson, FastqSync } import nl.lumc.sasc.biopet.utils.config.Configurable @@ -44,6 +44,8 @@ class Gears(val root: Configurable) extends QScript with SummaryQScript { @Argument(required = false) var outputName: String = _ + var GearsOutputFiles: Map[String, File] = Map.empty + /** Executed before running the script */ def init(): Unit = { require(fastqFileR1.isDefined || bamFile.isDefined, "Must define fastq file(s) or a bam file") @@ -69,14 +71,22 @@ class Gears(val root: Configurable) extends QScript with SummaryQScript { samFilterUnmapped.input = bamfile samFilterUnmapped.filter = Some("unmapped or mate_is_unmapped") samFilterUnmapped.output = new File(outputDir, s"$outputName.unmapped.bam") - samFilterUnmapped.isIntermediate = true + samFilterUnmapped.isIntermediate = false add(samFilterUnmapped) + val samNameSort = new SortSam(qscript) + samNameSort.input = samFilterUnmapped.output + samNameSort.output = new File(outputDir, s"$outputName.unmapped.nsort.bam") + samNameSort.sortOrder = "queryname" + samNameSort.isIntermediate = false + add(samNameSort) + // start bam to fastq (only on unaligned reads) also extract the matesam - val samToFastq = SamToFastq(qscript, samFilterUnmapped.output, - new File(outputDir, s"$outputName.unmapped.R1.fq.gz"), - new File(outputDir, s"$outputName.unmapped.R2.fq.gz") - ) + val samToFastq = new SamToFastq(qscript) + samToFastq.input= samNameSort.output + samToFastq.fastqR1 = new File(outputDir, s"$outputName.unmapped.R1.fq.gz") + samToFastq.fastqR2 = new File(outputDir, s"$outputName.unmapped.R2.fq.gz") + samToFastq.fastqUnpaired = new File(outputDir, s"$outputName.unmapped.singleton.fq.gz") samToFastq.isIntermediate = true add(samToFastq) @@ -92,6 +102,10 @@ class Gears(val root: Configurable) extends QScript with SummaryQScript { fastqSync.outputStats = new File(outputDir, s"$outputName.sync.stats.json") add(fastqSync) + GearsOutputFiles ++ Map("fastqsync_stats" -> fastqSync.outputStats) + GearsOutputFiles ++ Map("fastqsync_R1" -> fastqSync.outputFastq1) + GearsOutputFiles ++ Map("fastqsync_R2" -> fastqSync.outputFastq2) + List(fastqSync.outputFastq1, fastqSync.outputFastq2) }.getOrElse(List(fastqFileR1, fastqFileR2).flatten) @@ -106,6 +120,10 @@ class Gears(val root: Configurable) extends QScript with SummaryQScript { krakenAnalysis.unclassified_out = Option(new File(outputDir, s"$outputName.krkn.unclassified.fastq")) add(krakenAnalysis) + GearsOutputFiles ++ Map("kraken_output_raw" -> krakenAnalysis.output) + GearsOutputFiles ++ Map("kraken_classified_out" -> krakenAnalysis.classified_out) + GearsOutputFiles ++ Map("kraken_unclassified_out" -> krakenAnalysis.unclassified_out) + // create kraken summary file val krakenReport = new KrakenReport(qscript) @@ -114,6 +132,9 @@ class Gears(val root: Configurable) extends QScript with SummaryQScript { krakenReport.output = new File(outputDir, s"$outputName.krkn.full") add(krakenReport) + GearsOutputFiles ++ Map("kraken_report_input" -> krakenReport.input) + GearsOutputFiles ++ Map("kraken_report_output" -> krakenReport.output) + val krakenReportJSON = new KrakenReportToJson(qscript) krakenReportJSON.inputReport = krakenAnalysis.output krakenReportJSON.output = new File(outputDir, s"$outputName.krkn.json") @@ -121,18 +142,24 @@ class Gears(val root: Configurable) extends QScript with SummaryQScript { add(krakenReportJSON) addSummaryJobs() + + GearsOutputFiles ++ Map("kraken_report_json_input" -> krakenReportJSON.inputReport) + GearsOutputFiles ++ Map("kraken_report_json_output" -> krakenReportJSON.output) } /** Location of summary file */ def summaryFile = new File(outputDir, "gears.summary.json") - /** Settings of pipeline for summary */ - def summarySettings = Map() + /** Pipeline settings shown in the summary file */ + def summarySettings: Map[String, Any] = Map.empty ++ + (if (bamFile.isDefined) Map("input_bam" -> bamFile.get) else Map()) ++ + (if (fastqFileR1.isDefined) Map("input_R1" -> fastqFileR1.get) else Map()) - /** Files for the summary */ + /** Statistics shown in the summary file */ def summaryFiles: Map[String, File] = Map.empty ++ (if (bamFile.isDefined) Map("input_bam" -> bamFile.get) else Map()) ++ - (if (fastqFileR1.isDefined) Map("input_R1" -> fastqFileR1.get) else Map()) + (if (fastqFileR1.isDefined) Map("input_R1" -> fastqFileR1.get) else Map()) ++ + GearsOutputFiles } /** This object give a default main method to the pipelines */