GearsQiimeClosed.scala 5.08 KB
Newer Older
Peter van 't Hof's avatar
Peter van 't Hof committed
1
/**
2
3
4
5
6
7
8
9
10
11
12
13
14
  * Biopet is built on top of GATK Queue for building bioinformatic
  * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
  * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
  * should also be able to execute Biopet tools and pipelines.
  *
  * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
  *
  * Contact us at: sasc@lumc.nl
  *
  * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
  * license; For commercial users or users who do not want to follow the AGPL
  * license, please contact us to obtain a separate license.
  */
15
16
package nl.lumc.sasc.biopet.pipelines.gears

17
import java.io.{File, PrintWriter}
Peter van 't Hof's avatar
Peter van 't Hof committed
18

19
20
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
import nl.lumc.sasc.biopet.core.SampleLibraryTag
21
import nl.lumc.sasc.biopet.extensions.qiime._
22
import nl.lumc.sasc.biopet.extensions.seqtk.SeqtkSample
Peter van 't Hof's avatar
Peter van 't Hof committed
23
import nl.lumc.sasc.biopet.utils.ConfigUtils
24
25
26
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.queue.QScript

Peter van 't Hof's avatar
Peter van 't Hof committed
27
28
import scala.collection.mutable
import scala.collection.mutable.ListBuffer
29
import scala.xml.{Elem, PrettyPrinter}
Peter van 't Hof's avatar
Peter van 't Hof committed
30

31
/**
32
33
34
35
36
37
  * Created by pjvan_thof on 12/4/15.
  */
class GearsQiimeClosed(val parent: Configurable)
    extends QScript
    with SummaryQScript
    with SampleLibraryTag {
38

39
  var fastaInput: File = _
40

Peter van 't Hof's avatar
Peter van 't Hof committed
41
  def init(): Unit = {
42
    require(fastaInput != null)
Peter van 't Hof's avatar
Peter van 't Hof committed
43
    require(sampleId.isDefined)
44
45
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
46
  private var _otuMap: File = _
Peter van 't Hof's avatar
Peter van 't Hof committed
47
  def otuMap: File = _otuMap
Peter van 't Hof's avatar
Peter van 't Hof committed
48
49

  private var _otuTable: File = _
Peter van 't Hof's avatar
Peter van 't Hof committed
50
  def otuTable: File = _otuTable
Peter van 't Hof's avatar
Peter van 't Hof committed
51

Peter van 't Hof's avatar
Peter van 't Hof committed
52
  def biopetScript(): Unit = {
53
54

    val closedReference = new PickClosedReferenceOtus(this)
55
56
    closedReference.inputFasta =
      addDownsample(fastaInput, new File(outputDir, s"${sampleId.get}.downsample.fna"))
57
58
    closedReference.outputDir = new File(outputDir, "pick_closed_reference_otus")
    add(closedReference)
Peter van 't Hof's avatar
Peter van 't Hof committed
59
60
    _otuMap = closedReference.otuMap
    _otuTable = closedReference.otuTable
Peter van 't Hof's avatar
Peter van 't Hof committed
61
62

    addSummaryJobs()
63
  }
64
65
66
67
68

  /** Must return a map with used settings for this pipeline */
  def summarySettings: Map[String, Any] = Map()

  /** File to put in the summary for thie pipeline */
Peter van 't Hof's avatar
Peter van 't Hof committed
69
  def summaryFiles: Map[String, File] = Map("otu_table" -> otuTable, "otu_map" -> otuMap)
70

71
72
  val downSample: Option[Double] = config("downsample")

73
  def addDownsample(input: File, output: File): File = {
74
75
76
77
78
    downSample match {
      case Some(x) =>
        val seqtk = new SeqtkSample(this)
        seqtk.input = input
        seqtk.sample = x
79
80
        seqtk.output = output
        add(seqtk)
81
82
83
84
        output
      case _ => input
    }
  }
85
}
Peter van 't Hof's avatar
Peter van 't Hof committed
86
87
88
89
90
91
92
93
94
95
96
97
98

object GearsQiimeClosed {
  def qiimeBiomToKrona(inputFile: File, outputFile: File): Unit = {
    val biom = ConfigUtils.fileToConfigMap(inputFile)

    val samples = biom("columns").asInstanceOf[List[Map[String, Any]]].toArray.map(_("id"))

    val sortedSamples = samples.toList.map(_.toString).sorted

    case class TaxNode(name: String, level: String) {
      val childs: ListBuffer[TaxNode] = ListBuffer()

      val counts: mutable.Map[String, Long] = mutable.Map()
99
100
      def totalCount(sample: String): Long =
        counts.getOrElse(sample, 0L) + childs.map(_.totalCount(sample)).sum
Peter van 't Hof's avatar
Peter van 't Hof committed
101
102

      def node: Elem = {
103
104
105
        val sizes = sortedSamples.map { sample =>
          <val>{ totalCount(sample) }</val>
        }
Peter van 't Hof's avatar
Peter van 't Hof committed
106
107
108
109
110
111
112
113
114
115
116
        val size = <size>{ sizes }</size>

        val node = <node name={ name }>{ size }</node>

        node.copy(child = node.child ++ childs.map(_.node))
      }
    }

    val root = TaxNode("root", "-")

    val taxs = biom("rows").asInstanceOf[List[Map[String, Any]]].toArray.map { row =>
117
118
119
120
      val taxonomy = row("metadata")
        .asInstanceOf[Map[String, Any]]("taxonomy")
        .asInstanceOf[List[String]]
        .filter(!_.endsWith("__"))
Peter van 't Hof's avatar
Peter van 't Hof committed
121
122
123
      taxonomy.foldLeft(root) { (a, b) =>
        val n = b.split("__", 2)
        val level = n(0)
Peter van 't Hof's avatar
Peter van 't Hof committed
124
        val name = if (level == "Unassigned") "Unassigned" else n(1)
125
        a.childs.find(_ == TaxNode(name, level)) match {
Peter van 't Hof's avatar
Peter van 't Hof committed
126
127
128
129
130
131
132
133
134
          case Some(node) => node
          case _ =>
            val node = TaxNode(name, level)
            a.childs += node
            node
        }
      }
    }

135
136
137
    biom("data")
      .asInstanceOf[List[List[Any]]]
      .map { data =>
Peter van 't Hof's avatar
Peter van 't Hof committed
138
        val row = data.head.asInstanceOf[Long]
139
140
141
142
143
144
145
        val column = data(1).asInstanceOf[Long]
        val value = data(2).asInstanceOf[Long]
        val sample = samples(column.toInt).toString
        taxs(row.toInt).counts += sample -> (value + taxs(row.toInt).counts.getOrElse(sample, 0L))
        value
      }
      .sum
Peter van 't Hof's avatar
Peter van 't Hof committed
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161

    val xml = <krona>
                <attributes magnitude="size">
                  <attribute display="size">size</attribute>
                </attributes>
                <datasets>
                  { sortedSamples.map { sample => <dataset>{ sample }</dataset> } }
                </datasets>
              </krona>

    val writer = new PrintWriter(outputFile)
    val prettyXml = new PrettyPrinter(80, 2)
    writer.println(prettyXml.format(xml.copy(child = xml.child :+ root.node)))
    writer.close()
  }
}