From b674053a1c567172451e2dbc1769becbb53d1275 Mon Sep 17 00:00:00 2001
From: Wai Yi Leung <w.y.leung@e-sensei.nl>
Date: Mon, 5 Oct 2015 15:37:44 +0200
Subject: [PATCH] Add extension wrapper for biopet, Allow to skipNames in
 reporting for concise report containing only counts and taxonID's

---
 .../extensions/tools/KrakenReportToJson.scala | 61 +++++++++++++++++++
 .../biopet/tools/KrakenReportToJson.scala     | 18 ++++--
 .../sasc/biopet/pipelines/gears/Gears.scala   | 17 ++++--
 3 files changed, 85 insertions(+), 11 deletions(-)
 create mode 100644 public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/KrakenReportToJson.scala

diff --git a/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/KrakenReportToJson.scala b/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/KrakenReportToJson.scala
new file mode 100644
index 000000000..ff54170ba
--- /dev/null
+++ b/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/KrakenReportToJson.scala
@@ -0,0 +1,61 @@
+package nl.lumc.sasc.biopet.extensions.tools
+
+/**
+ * Created by waiyileung on 05-10-15.
+ */
+
+import java.io.File
+
+import nl.lumc.sasc.biopet.core.ToolCommandFuntion
+import nl.lumc.sasc.biopet.core.summary.Summarizable
+import nl.lumc.sasc.biopet.utils.ConfigUtils
+import nl.lumc.sasc.biopet.utils.config.Configurable
+import org.broadinstitute.gatk.utils.commandline.{Argument, Output, Input}
+
+/**
+ * KrakenReportToJson function class for usage in Biopet pipelines
+ *
+ * @param root Configuration object for the pipeline
+ */
+class KrakenReportToJson(val root: Configurable) extends ToolCommandFuntion with Summarizable {
+  def toolObject = nl.lumc.sasc.biopet.tools.KrakenReportToJson
+
+  @Input(doc = "Input Kraken Full report", shortName = "inputReport", required = true)
+  var inputReport: File = null
+
+  @Argument(required = false)
+  var skipNames: Boolean = true
+
+  @Output(doc = "Output JSON", shortName = "output", required = true)
+  var output: File = null
+
+  override def defaultCoreMemory = 1.0
+
+  override def commandLine = super.commandLine + required("-i", inputReport) + required("-o", output)
+
+  def summaryStats: Map[String, Any] = {
+    val map = ConfigUtils.fileToConfigMap(output)
+
+    ConfigUtils.any2map(map.getOrElse("stats", Map()))
+  }
+
+  def summaryFiles: Map[String, File] = Map()
+
+}
+
+object KrakenReportToJson {
+  def apply(root: Configurable, input: File, output: File): KrakenReportToJson = {
+    val report = new KrakenReportToJson(root)
+    report.inputReport = input
+    report.output = new File(output, input.getName.substring(0, input.getName.lastIndexOf(".")) + ".kraken.json")
+    report
+  }
+
+  def apply(root: Configurable, input: File, outDir: String): KrakenReportToJson = {
+    val report = new KrakenReportToJson(root)
+    report.inputReport = input
+    report.output = new File(outDir, input.getName.substring(0, input.getName.lastIndexOf(".")) + ".kraken.json")
+    report
+  }
+}
+
diff --git a/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/KrakenReportToJson.scala b/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/KrakenReportToJson.scala
index 8985fbcf7..7d2989a8e 100644
--- a/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/KrakenReportToJson.scala
+++ b/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/KrakenReportToJson.scala
@@ -56,7 +56,7 @@ object KrakenReportToJson extends ToolCommand {
   var cladeIDs: mutable.ArrayBuffer[Long] = mutable.ArrayBuffer.fill(32)(0)
   val spacePattern = "^( +)".r
 
-  case class Args(krakenreport: File = null, outputJson: Option[File] = None) extends AbstractArgs
+  case class Args(krakenreport: File = null, outputJson: Option[File] = None, skipNames: Boolean = true) extends AbstractArgs
 
   class OptParser extends AbstractOptParser {
 
@@ -70,9 +70,15 @@ object KrakenReportToJson extends ToolCommand {
     } validate {
       x => if (x.exists) success else failure("Krakenreport not found")
     } text "Kraken report to generate stats from"
+
     opt[File]('o', "output") unbounded () valueName "<json>" action { (x, c) =>
       c.copy(outputJson = Some(x))
     } text "File to write output to, if not supplied output go to stdout"
+
+    opt[Boolean]('n', "skipnames") unbounded () valueName "<skipnames>" action { (x, c) =>
+      c.copy(skipNames = x)
+    } text "Don't report the scientific name of the taxon."
+
   }
 
   /**
@@ -85,7 +91,7 @@ object KrakenReportToJson extends ToolCommand {
     .parse(args, Args())
     .getOrElse(sys.exit(1))
 
-  def parseLine(krakenRawHit: String): Map[Long, KrakenHit] = {
+  def parseLine(krakenRawHit: String, skipNames: Boolean): Map[Long, KrakenHit] = {
     val values: Array[String] = krakenRawHit.stripLineEnd.split("\t")
     val scientificName: String = values(5)
     val cladeLevel = spacePattern.findFirstIn(scientificName).getOrElse("").length / 2
@@ -98,7 +104,7 @@ object KrakenReportToJson extends ToolCommand {
     Map(
       values(4).toLong -> new KrakenHit(
         taxonomyID = values(4).toLong,
-        taxonomyName = scientificName.trim,
+        taxonomyName = if (skipNames) "" else scientificName.trim,
         cladeCount = values(2).toLong,
         cladeSize = values(1).toLong,
         taxonRank = values(3),
@@ -108,7 +114,7 @@ object KrakenReportToJson extends ToolCommand {
       ))
   }
 
-  def reportToJson(reportRaw: File): String = {
+  def reportToJson(reportRaw: File, skipNames: Boolean): String = {
     val reader = Source.fromFile(reportRaw)
     //    val lines = reader.getLines().toList.filter(!_.isEmpty)
 
@@ -124,7 +130,7 @@ object KrakenReportToJson extends ToolCommand {
     * */
 
     val lines = reader.getLines()
-      .map(line => parseLine(line))
+      .map(line => parseLine(line, skipNames))
       .filter(p => p.head._2.cladeSize > 0)
       .foldLeft(Map.empty[Long, KrakenHit])((a, b) => {
         a + b.head
@@ -142,7 +148,7 @@ object KrakenReportToJson extends ToolCommand {
   def main(args: Array[String]): Unit = {
     val commandArgs: Args = parseArgs(args)
 
-    val jsonString: String = reportToJson(commandArgs.krakenreport)
+    val jsonString: String = reportToJson(commandArgs.krakenreport, skipNames = commandArgs.skipNames)
     commandArgs.outputJson match {
       case Some(file) => {
         val writer = new PrintWriter(file)
diff --git a/public/gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/Gears.scala b/public/gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/Gears.scala
index 4899906b3..caef8b4de 100644
--- a/public/gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/Gears.scala
+++ b/public/gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/Gears.scala
@@ -20,16 +20,16 @@ import nl.lumc.sasc.biopet.core.summary.SummaryQScript
 import nl.lumc.sasc.biopet.extensions.kraken.{ Kraken, KrakenReport }
 import nl.lumc.sasc.biopet.extensions.picard.SamToFastq
 import nl.lumc.sasc.biopet.extensions.sambamba.SambambaView
-import nl.lumc.sasc.biopet.extensions.tools.FastqSync
+import nl.lumc.sasc.biopet.extensions.tools.{KrakenReportToJson, FastqSync}
 import nl.lumc.sasc.biopet.utils.config.Configurable
-import nl.lumc.sasc.biopet.tools.KrakenReportToJson
 import org.broadinstitute.gatk.queue.QScript
 
 /**
  * This is a trait for the Gears pipeline
  * The ShivaTrait is used as template for this pipeline
  */
-class Gears(val root: Configurable) extends QScript with SummaryQScript { qscript =>
+class Gears(val root: Configurable) extends QScript with SummaryQScript {
+  qscript =>
   def this() = this(null)
 
   @Input(shortName = "R1", required = false)
@@ -115,10 +115,16 @@ class Gears(val root: Configurable) extends QScript with SummaryQScript { qscrip
     add(krakenReport)
 
     val krakenReportJSON = new KrakenReportToJson(qscript)
-    krakenReportJSON.input = krakenReport.output
+    krakenReportJSON.inputReport = krakenAnalysis.output
     krakenReportJSON.output = new File(outputDir, s"$outputName.krkn.json")
+    krakenReportJSON.skipNames = config("skipNames", default = true)
     add(krakenReportJSON)
 
+//    val krakenReportJSON = new KrakenReportToJson(qscript)
+//    krakenReportJSON.input = krakenReport.output
+//    krakenReportJSON.output = new File(outputDir, s"$outputName.krkn.json")
+//    add(krakenReportJSON)
+
     addSummaryJobs()
   }
 
@@ -129,7 +135,8 @@ class Gears(val root: Configurable) extends QScript with SummaryQScript { qscrip
   def summarySettings = Map()
 
   /** Files for the summary */
-  def summaryFiles = Map()
+  def summaryFiles = (if (bamFile.isDefined) Map("input_bam" -> bamFile.get) else Map()) ++
+    (if (fastqFileR1.isDefined) Map("input_R1" -> fastqFileR1.get) else Map())
 }
 
 /** This object give a default main method to the pipelines */
-- 
GitLab