Skip to content
Snippets Groups Projects
Commit b674053a authored by Wai Yi Leung's avatar Wai Yi Leung
Browse files

Add extension wrapper for biopet, Allow to skipNames in reporting for concise...

Add extension wrapper for biopet, Allow to skipNames in reporting for concise report containing only counts and taxonID's
parent 6f58b7e2
No related branches found
No related tags found
No related merge requests found
package nl.lumc.sasc.biopet.extensions.tools
/**
* Created by waiyileung on 05-10-15.
*/
import java.io.File
import nl.lumc.sasc.biopet.core.ToolCommandFuntion
import nl.lumc.sasc.biopet.core.summary.Summarizable
import nl.lumc.sasc.biopet.utils.ConfigUtils
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{Argument, Output, Input}
/**
* KrakenReportToJson function class for usage in Biopet pipelines
*
* @param root Configuration object for the pipeline
*/
class KrakenReportToJson(val root: Configurable) extends ToolCommandFuntion with Summarizable {
def toolObject = nl.lumc.sasc.biopet.tools.KrakenReportToJson
@Input(doc = "Input Kraken Full report", shortName = "inputReport", required = true)
var inputReport: File = null
@Argument(required = false)
var skipNames: Boolean = true
@Output(doc = "Output JSON", shortName = "output", required = true)
var output: File = null
override def defaultCoreMemory = 1.0
override def commandLine = super.commandLine + required("-i", inputReport) + required("-o", output)
def summaryStats: Map[String, Any] = {
val map = ConfigUtils.fileToConfigMap(output)
ConfigUtils.any2map(map.getOrElse("stats", Map()))
}
def summaryFiles: Map[String, File] = Map()
}
object KrakenReportToJson {
def apply(root: Configurable, input: File, output: File): KrakenReportToJson = {
val report = new KrakenReportToJson(root)
report.inputReport = input
report.output = new File(output, input.getName.substring(0, input.getName.lastIndexOf(".")) + ".kraken.json")
report
}
def apply(root: Configurable, input: File, outDir: String): KrakenReportToJson = {
val report = new KrakenReportToJson(root)
report.inputReport = input
report.output = new File(outDir, input.getName.substring(0, input.getName.lastIndexOf(".")) + ".kraken.json")
report
}
}
......@@ -56,7 +56,7 @@ object KrakenReportToJson extends ToolCommand {
var cladeIDs: mutable.ArrayBuffer[Long] = mutable.ArrayBuffer.fill(32)(0)
val spacePattern = "^( +)".r
case class Args(krakenreport: File = null, outputJson: Option[File] = None) extends AbstractArgs
case class Args(krakenreport: File = null, outputJson: Option[File] = None, skipNames: Boolean = true) extends AbstractArgs
class OptParser extends AbstractOptParser {
......@@ -70,9 +70,15 @@ object KrakenReportToJson extends ToolCommand {
} validate {
x => if (x.exists) success else failure("Krakenreport not found")
} text "Kraken report to generate stats from"
opt[File]('o', "output") unbounded () valueName "<json>" action { (x, c) =>
c.copy(outputJson = Some(x))
} text "File to write output to, if not supplied output go to stdout"
opt[Boolean]('n', "skipnames") unbounded () valueName "<skipnames>" action { (x, c) =>
c.copy(skipNames = x)
} text "Don't report the scientific name of the taxon."
}
/**
......@@ -85,7 +91,7 @@ object KrakenReportToJson extends ToolCommand {
.parse(args, Args())
.getOrElse(sys.exit(1))
def parseLine(krakenRawHit: String): Map[Long, KrakenHit] = {
def parseLine(krakenRawHit: String, skipNames: Boolean): Map[Long, KrakenHit] = {
val values: Array[String] = krakenRawHit.stripLineEnd.split("\t")
val scientificName: String = values(5)
val cladeLevel = spacePattern.findFirstIn(scientificName).getOrElse("").length / 2
......@@ -98,7 +104,7 @@ object KrakenReportToJson extends ToolCommand {
Map(
values(4).toLong -> new KrakenHit(
taxonomyID = values(4).toLong,
taxonomyName = scientificName.trim,
taxonomyName = if (skipNames) "" else scientificName.trim,
cladeCount = values(2).toLong,
cladeSize = values(1).toLong,
taxonRank = values(3),
......@@ -108,7 +114,7 @@ object KrakenReportToJson extends ToolCommand {
))
}
def reportToJson(reportRaw: File): String = {
def reportToJson(reportRaw: File, skipNames: Boolean): String = {
val reader = Source.fromFile(reportRaw)
// val lines = reader.getLines().toList.filter(!_.isEmpty)
......@@ -124,7 +130,7 @@ object KrakenReportToJson extends ToolCommand {
* */
val lines = reader.getLines()
.map(line => parseLine(line))
.map(line => parseLine(line, skipNames))
.filter(p => p.head._2.cladeSize > 0)
.foldLeft(Map.empty[Long, KrakenHit])((a, b) => {
a + b.head
......@@ -142,7 +148,7 @@ object KrakenReportToJson extends ToolCommand {
def main(args: Array[String]): Unit = {
val commandArgs: Args = parseArgs(args)
val jsonString: String = reportToJson(commandArgs.krakenreport)
val jsonString: String = reportToJson(commandArgs.krakenreport, skipNames = commandArgs.skipNames)
commandArgs.outputJson match {
case Some(file) => {
val writer = new PrintWriter(file)
......
......@@ -20,16 +20,16 @@ import nl.lumc.sasc.biopet.core.summary.SummaryQScript
import nl.lumc.sasc.biopet.extensions.kraken.{ Kraken, KrakenReport }
import nl.lumc.sasc.biopet.extensions.picard.SamToFastq
import nl.lumc.sasc.biopet.extensions.sambamba.SambambaView
import nl.lumc.sasc.biopet.extensions.tools.FastqSync
import nl.lumc.sasc.biopet.extensions.tools.{KrakenReportToJson, FastqSync}
import nl.lumc.sasc.biopet.utils.config.Configurable
import nl.lumc.sasc.biopet.tools.KrakenReportToJson
import org.broadinstitute.gatk.queue.QScript
/**
* This is a trait for the Gears pipeline
* The ShivaTrait is used as template for this pipeline
*/
class Gears(val root: Configurable) extends QScript with SummaryQScript { qscript =>
class Gears(val root: Configurable) extends QScript with SummaryQScript {
qscript =>
def this() = this(null)
@Input(shortName = "R1", required = false)
......@@ -115,10 +115,16 @@ class Gears(val root: Configurable) extends QScript with SummaryQScript { qscrip
add(krakenReport)
val krakenReportJSON = new KrakenReportToJson(qscript)
krakenReportJSON.input = krakenReport.output
krakenReportJSON.inputReport = krakenAnalysis.output
krakenReportJSON.output = new File(outputDir, s"$outputName.krkn.json")
krakenReportJSON.skipNames = config("skipNames", default = true)
add(krakenReportJSON)
// val krakenReportJSON = new KrakenReportToJson(qscript)
// krakenReportJSON.input = krakenReport.output
// krakenReportJSON.output = new File(outputDir, s"$outputName.krkn.json")
// add(krakenReportJSON)
addSummaryJobs()
}
......@@ -129,7 +135,8 @@ class Gears(val root: Configurable) extends QScript with SummaryQScript { qscrip
def summarySettings = Map()
/** Files for the summary */
def summaryFiles = Map()
def summaryFiles = (if (bamFile.isDefined) Map("input_bam" -> bamFile.get) else Map()) ++
(if (fastqFileR1.isDefined) Map("input_R1" -> fastqFileR1.get) else Map())
}
/** This object give a default main method to the pipelines */
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment