diff --git a/public/gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/GearsKraken.scala b/public/gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/GearsKraken.scala index 7916c361e9efec6efdf957e74086f90e84cdbac0..650bfc3051187e4e28927cb7da1ffd19d8a3e268 100644 --- a/public/gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/GearsKraken.scala +++ b/public/gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/GearsKraken.scala @@ -1,12 +1,18 @@ package nl.lumc.sasc.biopet.pipelines.gears +import java.io.{ File, PrintWriter } + import nl.lumc.sasc.biopet.core.SampleLibraryTag import nl.lumc.sasc.biopet.core.summary.SummaryQScript import nl.lumc.sasc.biopet.extensions.kraken.{ KrakenReport, Kraken } import nl.lumc.sasc.biopet.extensions.tools.KrakenReportToJson +import nl.lumc.sasc.biopet.utils.ConfigUtils import nl.lumc.sasc.biopet.utils.config.Configurable import org.broadinstitute.gatk.queue.QScript +import scala.collection.mutable +import scala.xml.Node + /** * Created by pjvanthof on 04/12/15. */ @@ -77,3 +83,55 @@ class GearsKraken(val root: Configurable) extends QScript with SummaryQScript wi case _ => Map() }) } + +object GearsKraken { + def convertKrakenJsonToKronaXml(files: Map[String, File], outputFile: File): Unit = { + + val oriMap = files.map { case (k, v) => k -> ConfigUtils.fileToConfigMap(v) } + val taxs: mutable.Map[String, Any] = mutable.Map() + def addTax(map: Map[String, Any], path: List[String] = Nil): Unit = { + val name = map("name").toString + val x = path.foldLeft(taxs)((a, b) => if (a.contains(b)) a(b).asInstanceOf[mutable.Map[String, Any]] else { + a += b -> mutable.Map[String, Any]() + a(b).asInstanceOf[mutable.Map[String, Any]] + }) + + if (!x.contains(name)) x += name -> mutable.Map[String, Any]() + + map("children").asInstanceOf[List[Any]].foreach(x => addTax(x.asInstanceOf[Map[String, Any]], path ::: name :: Nil)) + } + oriMap.foreach { x => + addTax(x._2("classified").asInstanceOf[Map[String, Any]]) + } + + def getValue(sample: String, path: List[String], key: String) = { + path.foldLeft(oriMap(sample)("classified").asInstanceOf[Map[String, Any]]) { (b, a) => + b.getOrElse("children", List[Map[String, Any]]()) + .asInstanceOf[List[Map[String, Any]]] + .find(_.getOrElse("name", "") == a).getOrElse(Map[String, Any]()) + }.get(key) + } + + def createNodes(map: mutable.Map[String, Any], path: List[String] = Nil): Seq[Node] = { + (map.map { + case (k, v) => + val node = <node name={ k }></node> + val sizes = oriMap.keySet.toList.map { sample => <val>{ getValue(sample, (path ::: k :: Nil).tail, "size").getOrElse(0) }</val> } + val size = <size>{ sizes }</size> + node.copy(child = size ++ createNodes(v.asInstanceOf[mutable.Map[String, Any]], path ::: k :: Nil)) + }).toSeq + } + val xml = <krona> + <attributes magnitude="size"> + <attribute display="size">size</attribute> + </attributes> + <datasets> + { oriMap.keySet.map { sample => <dataset>{ sample }</dataset> } } + </datasets> + </krona> + + val writer = new PrintWriter(outputFile) + writer.println(xml.copy(child = xml.child ++ createNodes(taxs)).toString()) + writer.close() + } +} \ No newline at end of file