Commit 05bcfc19 authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Add biom to krona method

parent 30233a07
......@@ -11,7 +11,7 @@ import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.queue.QScript
import scala.collection.mutable
import scala.xml.{PrettyPrinter, Node}
import scala.xml.{ PrettyPrinter, Node }
/**
* Created by pjvanthof on 04/12/15.
......@@ -122,12 +122,13 @@ object GearsKraken {
def createNodes(map: mutable.Map[String, Any], path: List[String] = Nil): Seq[Node] = {
map.map {
case (k, v) =>
val node = <node name={k}></node>
val sizes = samples.map { sample => <val>
{getValue(sample, (path ::: k :: Nil).tail, "size").getOrElse(0)}
</val>
val node = <node name={ k }></node>
val sizes = samples.map { sample =>
<val>
{ getValue(sample, (path ::: k :: Nil).tail, "size").getOrElse(0) }
</val>
}
val size = <size>{sizes}</size>
val size = <size>{ sizes }</size>
node.copy(child = size ++ createNodes(v.asInstanceOf[mutable.Map[String, Any]], path ::: k :: Nil))
}.toSeq
}
......
package nl.lumc.sasc.biopet.pipelines.gears
import java.io.{ File, PrintWriter }
import nl.lumc.sasc.biopet.core.{ BiopetQScript, SampleLibraryTag }
import nl.lumc.sasc.biopet.extensions.Flash
import nl.lumc.sasc.biopet.extensions.qiime._
import nl.lumc.sasc.biopet.utils.ConfigUtils
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.queue.QScript
import scala.collection.mutable
import scala.collection.mutable.ListBuffer
import scala.xml.{ PrettyPrinter, Elem }
/**
* Created by pjvan_thof on 12/4/15.
*/
......@@ -58,3 +65,74 @@ class GearsQiimeClosed(val root: Configurable) extends QScript with BiopetQScrip
_otuTable = closedReference.otuTable
}
}
object GearsQiimeClosed {
def qiimeBiomToKrona(inputFile: File, outputFile: File): Unit = {
val biom = ConfigUtils.fileToConfigMap(inputFile)
val samples = biom("columns").asInstanceOf[List[Map[String, Any]]].toArray.map(_("id"))
val sortedSamples = samples.toList.map(_.toString).sorted
case class TaxNode(name: String, level: String) {
val childs: ListBuffer[TaxNode] = ListBuffer()
val counts: mutable.Map[String, Long] = mutable.Map()
def totalCount(sample: String): Long = counts.getOrElse(sample, 0L) + childs.map(_.totalCount(sample)).sum
override def toString() = s"$level:$name:$counts"
def node: Elem = {
val sizes = sortedSamples.map { sample => <val>{ totalCount(sample) }</val> }
val size = <size>{ sizes }</size>
val node = <node name={ name }>{ size }</node>
node.copy(child = node.child ++ childs.map(_.node))
}
}
val root = TaxNode("root", "-")
val taxs = biom("rows").asInstanceOf[List[Map[String, Any]]].toArray.map { row =>
val taxonomy = row("metadata").asInstanceOf[Map[String, Any]]("taxonomy")
.asInstanceOf[List[String]].filter(!_.endsWith("__"))
taxonomy.foldLeft(root) { (a, b) =>
val n = b.split("__", 2)
val level = n(0)
val name = n(1)
val bla = a.childs.find(_ == TaxNode(name, level))
bla match {
case Some(node) => node
case _ =>
val node = TaxNode(name, level)
a.childs += node
node
}
}
}
biom("data").asInstanceOf[List[List[Any]]].map { data =>
val row = data(0).asInstanceOf[Long]
val column = data(1).asInstanceOf[Long]
val value = data(2).asInstanceOf[Long]
val sample = samples(column.toInt).toString
taxs(row.toInt).counts += sample -> (value + taxs(row.toInt).counts.getOrElse(sample, 0L))
value
}.sum
val xml = <krona>
<attributes magnitude="size">
<attribute display="size">size</attribute>
</attributes>
<datasets>
{ sortedSamples.map { sample => <dataset>{ sample }</dataset> } }
</datasets>
</krona>
val writer = new PrintWriter(outputFile)
val prettyXml = new PrettyPrinter(80, 2)
writer.println(prettyXml.format(xml.copy(child = xml.child :+ root.node)))
writer.close()
}
}
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment