GearsKraken.scala 7.02 KB
Newer Older
Peter van 't Hof's avatar
Peter van 't Hof committed
1 2 3 4 5 6 7 8 9 10
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
11
 * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
Peter van 't Hof's avatar
Peter van 't Hof committed
12 13 14
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
15 16
package nl.lumc.sasc.biopet.pipelines.gears

17 18
import java.io.{ File, PrintWriter }

19 20
import nl.lumc.sasc.biopet.core.SampleLibraryTag
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
Peter van 't Hof's avatar
Peter van 't Hof committed
21
import nl.lumc.sasc.biopet.extensions.kraken.{ KrakenReport, Kraken }
22
import nl.lumc.sasc.biopet.extensions.seqtk.SeqtkSeq
23
import nl.lumc.sasc.biopet.extensions.tools.KrakenReportToJson
24
import nl.lumc.sasc.biopet.utils.ConfigUtils
25 26 27
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.queue.QScript

28
import scala.collection.mutable
Peter van 't Hof's avatar
Peter van 't Hof committed
29
import scala.xml.{ PrettyPrinter, Node }
30

31
/**
Peter van 't Hof's avatar
Peter van 't Hof committed
32 33
 * Created by pjvanthof on 04/12/15.
 */
34 35 36 37 38 39 40 41 42 43
class GearsKraken(val root: Configurable) extends QScript with SummaryQScript with SampleLibraryTag {

  var fastqR1: File = _

  var fastqR2: Option[File] = None

  var outputName: String = _

  def init(): Unit = {
    require(fastqR1 != null)
Peter van 't Hof's avatar
Peter van 't Hof committed
44
    require(outputName != null)
45 46
  }

47 48 49 50 51 52 53 54 55 56 57 58
  lazy val krakenConvertToFasta: Boolean = config("kraken_discard_quality", default = false)

  protected def fastqToFasta(input: File): File = {
    val seqtk = new SeqtkSeq(this)
    seqtk.input = input
    seqtk.output = new File(outputDir, input.getName + ".fasta")
    seqtk.A = true
    seqtk.isIntermediate = true
    add(seqtk)
    seqtk.output
  }

59 60
  def biopetScript(): Unit = {
    // start kraken
61 62 63 64 65

    val (fqR1, fqR2) = if (krakenConvertToFasta)
      (fastqToFasta(fastqR1), fastqR2.map(fastqToFasta))
    else (fastqR1, fastqR2)

66
    val krakenAnalysis = new Kraken(this)
67
    krakenAnalysis.input = fqR1 :: fqR2.toList
68 69 70 71
    krakenAnalysis.output = new File(outputDir, s"$outputName.krkn.raw")

    krakenAnalysis.paired = fastqR2.isDefined

Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
72 73
    krakenAnalysis.classifiedOut = Some(new File(outputDir, s"$outputName.krkn.classified.fastq"))
    krakenAnalysis.unclassifiedOut = Some(new File(outputDir, s"$outputName.krkn.unclassified.fastq"))
74 75 76
    add(krakenAnalysis)

    outputFiles += ("kraken_output_raw" -> krakenAnalysis.output)
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
77 78
    outputFiles += ("kraken_classified_out" -> krakenAnalysis.classifiedOut.getOrElse(""))
    outputFiles += ("kraken_unclassified_out" -> krakenAnalysis.unclassifiedOut.getOrElse(""))
79 80 81 82

    // create kraken summary file
    val krakenReport = new KrakenReport(this)
    krakenReport.input = krakenAnalysis.output
Sander van der Zeeuw's avatar
Sander van der Zeeuw committed
83
    krakenReport.showZeros = true
84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106
    krakenReport.output = new File(outputDir, s"$outputName.krkn.full")
    add(krakenReport)

    outputFiles += ("kraken_report_input" -> krakenReport.input)
    outputFiles += ("kraken_report_output" -> krakenReport.output)

    val krakenReportJSON = new KrakenReportToJson(this)
    krakenReportJSON.inputReport = krakenReport.output
    krakenReportJSON.output = new File(outputDir, s"$outputName.krkn.json")
    krakenReportJSON.skipNames = config("skipNames", default = false)
    add(krakenReportJSON)
    addSummarizable(krakenReportJSON, "krakenreport")

    outputFiles += ("kraken_report_json_input" -> krakenReportJSON.inputReport)
    outputFiles += ("kraken_report_json_output" -> krakenReportJSON.output)

    addSummaryJobs()
  }

  /** Location of summary file */
  def summaryFile = new File(outputDir, sampleId.getOrElse("sampleName_unknown") + ".kraken.summary.json")

  /** Pipeline settings shown in the summary file */
Peter van 't Hof's avatar
Peter van 't Hof committed
107
  def summarySettings: Map[String, Any] = Map()
108 109 110

  /** Statistics shown in the summary file */
  def summaryFiles: Map[String, File] = outputFiles + ("input_R1" -> fastqR1) ++ (fastqR2 match {
Peter van 't Hof's avatar
Peter van 't Hof committed
111
    case Some(file) => Map("input_R2" -> file)
Peter van 't Hof's avatar
Peter van 't Hof committed
112
    case _          => Map()
113 114
  })
}
115 116

object GearsKraken {
Peter van 't Hof's avatar
Peter van 't Hof committed
117

118
  def convertKrakenJsonToKronaXml(files: Map[String, File], outputFile: File): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
119 120 121 122
    val summaries = files.map { case (k, v) => k -> ConfigUtils.fileToConfigMap(v) }
    convertKrakenSummariesToKronaXml(summaries, outputFile)
  }

123
  def convertKrakenSummariesToKronaXml(summaries: Map[String, Map[String, Any]], outputFile: File, totalReads: Option[Map[String, Long]] = None): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
124 125

    val samples = summaries.keys.toList.sorted
126 127

    val taxs: mutable.Map[String, Any] = mutable.Map()
Peter van 't Hof's avatar
Peter van 't Hof committed
128

129 130 131 132 133 134 135 136 137 138 139
    def addTax(map: Map[String, Any], path: List[String] = Nil): Unit = {
      val name = map("name").toString
      val x = path.foldLeft(taxs)((a, b) => if (a.contains(b)) a(b).asInstanceOf[mutable.Map[String, Any]] else {
        a += b -> mutable.Map[String, Any]()
        a(b).asInstanceOf[mutable.Map[String, Any]]
      })

      if (!x.contains(name)) x += name -> mutable.Map[String, Any]()

      map("children").asInstanceOf[List[Any]].foreach(x => addTax(x.asInstanceOf[Map[String, Any]], path ::: name :: Nil))
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
140 141

    summaries.foreach { x => addTax(x._2("classified").asInstanceOf[Map[String, Any]]) }
142 143

    def getValue(sample: String, path: List[String], key: String) = {
Peter van 't Hof's avatar
Peter van 't Hof committed
144
      path.foldLeft(summaries(sample)("classified").asInstanceOf[Map[String, Any]]) { (b, a) =>
145 146 147 148 149 150 151
        b.getOrElse("children", List[Map[String, Any]]())
          .asInstanceOf[List[Map[String, Any]]]
          .find(_.getOrElse("name", "") == a).getOrElse(Map[String, Any]())
      }.get(key)
    }

    def createNodes(map: mutable.Map[String, Any], path: List[String] = Nil): Seq[Node] = {
Peter van 't Hof's avatar
Peter van 't Hof committed
152
      map.map {
153
        case (k, v) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
154 155
          val node = <node name={ k }></node>
          val sizes = samples.map { sample =>
Peter van 't Hof's avatar
Peter van 't Hof committed
156 157 158
            if (k == "root") {
              val unclassified = summaries(sample)("unclassified").asInstanceOf[Map[String, Any]]("size").asInstanceOf[Long]
              <val>
159
                { totalReads.flatMap(_.get(sample)).getOrElse(getValue(sample, (path ::: k :: Nil).tail, "size").getOrElse(0).toString.toLong + unclassified) }
Peter van 't Hof's avatar
Peter van 't Hof committed
160 161 162
              </val>
            } else {
              <val>
Peter van 't Hof's avatar
Peter van 't Hof committed
163
                { getValue(sample, (path ::: k :: Nil).tail, "size").getOrElse(0) }
Peter van 't Hof's avatar
Peter van 't Hof committed
164 165
              </val>
            }
Peter van 't Hof's avatar
Peter van 't Hof committed
166
          }
Peter van 't Hof's avatar
Peter van 't Hof committed
167
          val size = <size>{ sizes }</size>
168
          node.copy(child = size ++ createNodes(v.asInstanceOf[mutable.Map[String, Any]], path ::: k :: Nil))
Peter van 't Hof's avatar
Peter van 't Hof committed
169
      }.toSeq
170
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
171

172 173 174 175 176
    val xml = <krona>
                <attributes magnitude="size">
                  <attribute display="size">size</attribute>
                </attributes>
                <datasets>
Peter van 't Hof's avatar
Peter van 't Hof committed
177
                  { samples.map { sample => <dataset>{ sample }</dataset> } }
178 179 180 181
                </datasets>
              </krona>

    val writer = new PrintWriter(outputFile)
Peter van 't Hof's avatar
Peter van 't Hof committed
182 183
    val prettyXml = new PrettyPrinter(80, 2)
    writer.println(prettyXml.format(xml.copy(child = xml.child ++ createNodes(taxs))))
184 185 186
    writer.close()
  }
}