From b674053a1c567172451e2dbc1769becbb53d1275 Mon Sep 17 00:00:00 2001 From: Wai Yi Leung <w.y.leung@e-sensei.nl> Date: Mon, 5 Oct 2015 15:37:44 +0200 Subject: [PATCH] Add extension wrapper for biopet, Allow to skipNames in reporting for concise report containing only counts and taxonID's --- .../extensions/tools/KrakenReportToJson.scala | 61 +++++++++++++++++++ .../biopet/tools/KrakenReportToJson.scala | 18 ++++-- .../sasc/biopet/pipelines/gears/Gears.scala | 17 ++++-- 3 files changed, 85 insertions(+), 11 deletions(-) create mode 100644 public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/KrakenReportToJson.scala diff --git a/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/KrakenReportToJson.scala b/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/KrakenReportToJson.scala new file mode 100644 index 000000000..ff54170ba --- /dev/null +++ b/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/KrakenReportToJson.scala @@ -0,0 +1,61 @@ +package nl.lumc.sasc.biopet.extensions.tools + +/** + * Created by waiyileung on 05-10-15. + */ + +import java.io.File + +import nl.lumc.sasc.biopet.core.ToolCommandFuntion +import nl.lumc.sasc.biopet.core.summary.Summarizable +import nl.lumc.sasc.biopet.utils.ConfigUtils +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.utils.commandline.{Argument, Output, Input} + +/** + * KrakenReportToJson function class for usage in Biopet pipelines + * + * @param root Configuration object for the pipeline + */ +class KrakenReportToJson(val root: Configurable) extends ToolCommandFuntion with Summarizable { + def toolObject = nl.lumc.sasc.biopet.tools.KrakenReportToJson + + @Input(doc = "Input Kraken Full report", shortName = "inputReport", required = true) + var inputReport: File = null + + @Argument(required = false) + var skipNames: Boolean = true + + @Output(doc = "Output JSON", shortName = "output", required = true) + var output: File = null + + override def defaultCoreMemory = 1.0 + + override def commandLine = super.commandLine + required("-i", inputReport) + required("-o", output) + + def summaryStats: Map[String, Any] = { + val map = ConfigUtils.fileToConfigMap(output) + + ConfigUtils.any2map(map.getOrElse("stats", Map())) + } + + def summaryFiles: Map[String, File] = Map() + +} + +object KrakenReportToJson { + def apply(root: Configurable, input: File, output: File): KrakenReportToJson = { + val report = new KrakenReportToJson(root) + report.inputReport = input + report.output = new File(output, input.getName.substring(0, input.getName.lastIndexOf(".")) + ".kraken.json") + report + } + + def apply(root: Configurable, input: File, outDir: String): KrakenReportToJson = { + val report = new KrakenReportToJson(root) + report.inputReport = input + report.output = new File(outDir, input.getName.substring(0, input.getName.lastIndexOf(".")) + ".kraken.json") + report + } +} + diff --git a/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/KrakenReportToJson.scala b/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/KrakenReportToJson.scala index 8985fbcf7..7d2989a8e 100644 --- a/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/KrakenReportToJson.scala +++ b/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/KrakenReportToJson.scala @@ -56,7 +56,7 @@ object KrakenReportToJson extends ToolCommand { var cladeIDs: mutable.ArrayBuffer[Long] = mutable.ArrayBuffer.fill(32)(0) val spacePattern = "^( +)".r - case class Args(krakenreport: File = null, outputJson: Option[File] = None) extends AbstractArgs + case class Args(krakenreport: File = null, outputJson: Option[File] = None, skipNames: Boolean = true) extends AbstractArgs class OptParser extends AbstractOptParser { @@ -70,9 +70,15 @@ object KrakenReportToJson extends ToolCommand { } validate { x => if (x.exists) success else failure("Krakenreport not found") } text "Kraken report to generate stats from" + opt[File]('o', "output") unbounded () valueName "<json>" action { (x, c) => c.copy(outputJson = Some(x)) } text "File to write output to, if not supplied output go to stdout" + + opt[Boolean]('n', "skipnames") unbounded () valueName "<skipnames>" action { (x, c) => + c.copy(skipNames = x) + } text "Don't report the scientific name of the taxon." + } /** @@ -85,7 +91,7 @@ object KrakenReportToJson extends ToolCommand { .parse(args, Args()) .getOrElse(sys.exit(1)) - def parseLine(krakenRawHit: String): Map[Long, KrakenHit] = { + def parseLine(krakenRawHit: String, skipNames: Boolean): Map[Long, KrakenHit] = { val values: Array[String] = krakenRawHit.stripLineEnd.split("\t") val scientificName: String = values(5) val cladeLevel = spacePattern.findFirstIn(scientificName).getOrElse("").length / 2 @@ -98,7 +104,7 @@ object KrakenReportToJson extends ToolCommand { Map( values(4).toLong -> new KrakenHit( taxonomyID = values(4).toLong, - taxonomyName = scientificName.trim, + taxonomyName = if (skipNames) "" else scientificName.trim, cladeCount = values(2).toLong, cladeSize = values(1).toLong, taxonRank = values(3), @@ -108,7 +114,7 @@ object KrakenReportToJson extends ToolCommand { )) } - def reportToJson(reportRaw: File): String = { + def reportToJson(reportRaw: File, skipNames: Boolean): String = { val reader = Source.fromFile(reportRaw) // val lines = reader.getLines().toList.filter(!_.isEmpty) @@ -124,7 +130,7 @@ object KrakenReportToJson extends ToolCommand { * */ val lines = reader.getLines() - .map(line => parseLine(line)) + .map(line => parseLine(line, skipNames)) .filter(p => p.head._2.cladeSize > 0) .foldLeft(Map.empty[Long, KrakenHit])((a, b) => { a + b.head @@ -142,7 +148,7 @@ object KrakenReportToJson extends ToolCommand { def main(args: Array[String]): Unit = { val commandArgs: Args = parseArgs(args) - val jsonString: String = reportToJson(commandArgs.krakenreport) + val jsonString: String = reportToJson(commandArgs.krakenreport, skipNames = commandArgs.skipNames) commandArgs.outputJson match { case Some(file) => { val writer = new PrintWriter(file) diff --git a/public/gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/Gears.scala b/public/gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/Gears.scala index 4899906b3..caef8b4de 100644 --- a/public/gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/Gears.scala +++ b/public/gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/Gears.scala @@ -20,16 +20,16 @@ import nl.lumc.sasc.biopet.core.summary.SummaryQScript import nl.lumc.sasc.biopet.extensions.kraken.{ Kraken, KrakenReport } import nl.lumc.sasc.biopet.extensions.picard.SamToFastq import nl.lumc.sasc.biopet.extensions.sambamba.SambambaView -import nl.lumc.sasc.biopet.extensions.tools.FastqSync +import nl.lumc.sasc.biopet.extensions.tools.{KrakenReportToJson, FastqSync} import nl.lumc.sasc.biopet.utils.config.Configurable -import nl.lumc.sasc.biopet.tools.KrakenReportToJson import org.broadinstitute.gatk.queue.QScript /** * This is a trait for the Gears pipeline * The ShivaTrait is used as template for this pipeline */ -class Gears(val root: Configurable) extends QScript with SummaryQScript { qscript => +class Gears(val root: Configurable) extends QScript with SummaryQScript { + qscript => def this() = this(null) @Input(shortName = "R1", required = false) @@ -115,10 +115,16 @@ class Gears(val root: Configurable) extends QScript with SummaryQScript { qscrip add(krakenReport) val krakenReportJSON = new KrakenReportToJson(qscript) - krakenReportJSON.input = krakenReport.output + krakenReportJSON.inputReport = krakenAnalysis.output krakenReportJSON.output = new File(outputDir, s"$outputName.krkn.json") + krakenReportJSON.skipNames = config("skipNames", default = true) add(krakenReportJSON) +// val krakenReportJSON = new KrakenReportToJson(qscript) +// krakenReportJSON.input = krakenReport.output +// krakenReportJSON.output = new File(outputDir, s"$outputName.krkn.json") +// add(krakenReportJSON) + addSummaryJobs() } @@ -129,7 +135,8 @@ class Gears(val root: Configurable) extends QScript with SummaryQScript { qscrip def summarySettings = Map() /** Files for the summary */ - def summaryFiles = Map() + def summaryFiles = (if (bamFile.isDefined) Map("input_bam" -> bamFile.get) else Map()) ++ + (if (fastqFileR1.isDefined) Map("input_R1" -> fastqFileR1.get) else Map()) } /** This object give a default main method to the pipelines */ -- GitLab