GearsKraken.scala 6.37 KB
Newer Older
1
2
package nl.lumc.sasc.biopet.pipelines.gears

3
4
import java.io.{ File, PrintWriter }

5
6
import nl.lumc.sasc.biopet.core.SampleLibraryTag
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
Peter van 't Hof's avatar
Peter van 't Hof committed
7
import nl.lumc.sasc.biopet.extensions.kraken.{ KrakenReport, Kraken }
8
import nl.lumc.sasc.biopet.extensions.seqtk.SeqtkSeq
9
import nl.lumc.sasc.biopet.extensions.tools.KrakenReportToJson
10
import nl.lumc.sasc.biopet.utils.ConfigUtils
11
12
13
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.queue.QScript

14
import scala.collection.mutable
Peter van 't Hof's avatar
Peter van 't Hof committed
15
import scala.xml.{ PrettyPrinter, Node }
16

17
/**
Peter van 't Hof's avatar
Peter van 't Hof committed
18
19
 * Created by pjvanthof on 04/12/15.
 */
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
class GearsKraken(val root: Configurable) extends QScript with SummaryQScript with SampleLibraryTag {

  var fastqR1: File = _

  var fastqR2: Option[File] = None

  var outputName: String = _

  def init(): Unit = {
    require(fastqR1 != null)
    if (outputName == null) outputName = fastqR1.getName
      .stripSuffix(".gz")
      .stripSuffix(".fq")
      .stripSuffix(".fastq")
  }

36
37
38
39
40
41
42
43
44
45
46
47
  lazy val krakenConvertToFasta: Boolean = config("kraken_discard_quality", default = false)

  protected def fastqToFasta(input: File): File = {
    val seqtk = new SeqtkSeq(this)
    seqtk.input = input
    seqtk.output = new File(outputDir, input.getName + ".fasta")
    seqtk.A = true
    seqtk.isIntermediate = true
    add(seqtk)
    seqtk.output
  }

48
49
  def biopetScript(): Unit = {
    // start kraken
50
51
52
53
54

    val (fqR1, fqR2) = if (krakenConvertToFasta)
      (fastqToFasta(fastqR1), fastqR2.map(fastqToFasta))
    else (fastqR1, fastqR2)

55
    val krakenAnalysis = new Kraken(this)
56
    krakenAnalysis.input = fqR1 :: fqR2.toList
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
    krakenAnalysis.output = new File(outputDir, s"$outputName.krkn.raw")

    krakenAnalysis.paired = fastqR2.isDefined

    krakenAnalysis.classified_out = Some(new File(outputDir, s"$outputName.krkn.classified.fastq"))
    krakenAnalysis.unclassified_out = Some(new File(outputDir, s"$outputName.krkn.unclassified.fastq"))
    add(krakenAnalysis)

    outputFiles += ("kraken_output_raw" -> krakenAnalysis.output)
    outputFiles += ("kraken_classified_out" -> krakenAnalysis.classified_out.getOrElse(""))
    outputFiles += ("kraken_unclassified_out" -> krakenAnalysis.unclassified_out.getOrElse(""))

    // create kraken summary file
    val krakenReport = new KrakenReport(this)
    krakenReport.input = krakenAnalysis.output
    krakenReport.show_zeros = true
    krakenReport.output = new File(outputDir, s"$outputName.krkn.full")
    add(krakenReport)

    outputFiles += ("kraken_report_input" -> krakenReport.input)
    outputFiles += ("kraken_report_output" -> krakenReport.output)

    val krakenReportJSON = new KrakenReportToJson(this)
    krakenReportJSON.inputReport = krakenReport.output
    krakenReportJSON.output = new File(outputDir, s"$outputName.krkn.json")
    krakenReportJSON.skipNames = config("skipNames", default = false)
    add(krakenReportJSON)
    addSummarizable(krakenReportJSON, "krakenreport")

    outputFiles += ("kraken_report_json_input" -> krakenReportJSON.inputReport)
    outputFiles += ("kraken_report_json_output" -> krakenReportJSON.output)

    addSummaryJobs()
  }

  /** Location of summary file */
  def summaryFile = new File(outputDir, sampleId.getOrElse("sampleName_unknown") + ".kraken.summary.json")

  /** Pipeline settings shown in the summary file */
  def summarySettings: Map[String, Any] = Map.empty

  /** Statistics shown in the summary file */
  def summaryFiles: Map[String, File] = outputFiles + ("input_R1" -> fastqR1) ++ (fastqR2 match {
Peter van 't Hof's avatar
Peter van 't Hof committed
100
    case Some(file) => Map("input_R2" -> file)
Peter van 't Hof's avatar
Peter van 't Hof committed
101
    case _          => Map()
102
103
  })
}
104
105

object GearsKraken {
Peter van 't Hof's avatar
Peter van 't Hof committed
106

107
  def convertKrakenJsonToKronaXml(files: Map[String, File], outputFile: File): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
108
109
110
111
112
113
114
    val summaries = files.map { case (k, v) => k -> ConfigUtils.fileToConfigMap(v) }
    convertKrakenSummariesToKronaXml(summaries, outputFile)
  }

  def convertKrakenSummariesToKronaXml(summaries: Map[String, Map[String, Any]], outputFile: File): Unit = {

    val samples = summaries.keys.toList.sorted
115
116

    val taxs: mutable.Map[String, Any] = mutable.Map()
Peter van 't Hof's avatar
Peter van 't Hof committed
117

118
119
120
121
122
123
124
125
126
127
128
    def addTax(map: Map[String, Any], path: List[String] = Nil): Unit = {
      val name = map("name").toString
      val x = path.foldLeft(taxs)((a, b) => if (a.contains(b)) a(b).asInstanceOf[mutable.Map[String, Any]] else {
        a += b -> mutable.Map[String, Any]()
        a(b).asInstanceOf[mutable.Map[String, Any]]
      })

      if (!x.contains(name)) x += name -> mutable.Map[String, Any]()

      map("children").asInstanceOf[List[Any]].foreach(x => addTax(x.asInstanceOf[Map[String, Any]], path ::: name :: Nil))
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
129
130

    summaries.foreach { x => addTax(x._2("classified").asInstanceOf[Map[String, Any]]) }
131
132

    def getValue(sample: String, path: List[String], key: String) = {
Peter van 't Hof's avatar
Peter van 't Hof committed
133
      path.foldLeft(summaries(sample)("classified").asInstanceOf[Map[String, Any]]) { (b, a) =>
134
135
136
137
138
139
140
        b.getOrElse("children", List[Map[String, Any]]())
          .asInstanceOf[List[Map[String, Any]]]
          .find(_.getOrElse("name", "") == a).getOrElse(Map[String, Any]())
      }.get(key)
    }

    def createNodes(map: mutable.Map[String, Any], path: List[String] = Nil): Seq[Node] = {
Peter van 't Hof's avatar
Peter van 't Hof committed
141
      map.map {
142
        case (k, v) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
143
144
          val node = <node name={ k }></node>
          val sizes = samples.map { sample =>
Peter van 't Hof's avatar
Peter van 't Hof committed
145
146
147
            if (k == "root") {
              val unclassified = summaries(sample)("unclassified").asInstanceOf[Map[String, Any]]("size").asInstanceOf[Long]
              <val>
Peter van 't Hof's avatar
Peter van 't Hof committed
148
                { getValue(sample, (path ::: k :: Nil).tail, "size").getOrElse(0).toString.toLong + unclassified }
Peter van 't Hof's avatar
Peter van 't Hof committed
149
150
151
              </val>
            } else {
              <val>
Peter van 't Hof's avatar
Peter van 't Hof committed
152
                { getValue(sample, (path ::: k :: Nil).tail, "size").getOrElse(0) }
Peter van 't Hof's avatar
Peter van 't Hof committed
153
154
              </val>
            }
Peter van 't Hof's avatar
Peter van 't Hof committed
155
          }
Peter van 't Hof's avatar
Peter van 't Hof committed
156
          val size = <size>{ sizes }</size>
157
          node.copy(child = size ++ createNodes(v.asInstanceOf[mutable.Map[String, Any]], path ::: k :: Nil))
Peter van 't Hof's avatar
Peter van 't Hof committed
158
      }.toSeq
159
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
160

161
162
163
164
165
    val xml = <krona>
                <attributes magnitude="size">
                  <attribute display="size">size</attribute>
                </attributes>
                <datasets>
Peter van 't Hof's avatar
Peter van 't Hof committed
166
                  { samples.map { sample => <dataset>{ sample }</dataset> } }
167
168
169
170
                </datasets>
              </krona>

    val writer = new PrintWriter(outputFile)
Peter van 't Hof's avatar
Peter van 't Hof committed
171
172
    val prettyXml = new PrettyPrinter(80, 2)
    writer.println(prettyXml.format(xml.copy(child = xml.child ++ createNodes(taxs))))
173
174
175
    writer.close()
  }
}