From 05bff3abb9908d56af9d351c6a8f021bbc7e457f Mon Sep 17 00:00:00 2001
From: Peter van 't Hof <p.j.van_t_hof@lumc.nl>
Date: Fri, 23 Oct 2015 19:46:58 +0200
Subject: [PATCH] Added GenotypeConcordance

---
 .../extensions/gatk/GenotypeConcordance.scala | 95 +++++++++++++++++++
 1 file changed, 95 insertions(+)
 create mode 100644 public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/GenotypeConcordance.scala

diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/GenotypeConcordance.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/GenotypeConcordance.scala
new file mode 100644
index 000000000..00a70f8fa
--- /dev/null
+++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/GenotypeConcordance.scala
@@ -0,0 +1,95 @@
+/**
+ * Biopet is built on top of GATK Queue for building bioinformatic
+ * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
+ * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
+ * should also be able to execute Biopet tools and pipelines.
+ *
+ * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
+ *
+ * Contact us at: sasc@lumc.nl
+ *
+ * A dual licensing mode is applied. The source code within this project that are
+ * not part of GATK Queue is freely available for non-commercial use under an AGPL
+ * license; For commercial users or users who do not want to follow the AGPL
+ * license, please contact us to obtain a separate license.
+ */
+package nl.lumc.sasc.biopet.extensions.gatk
+
+import java.io.File
+
+import nl.lumc.sasc.biopet.core.summary.Summarizable
+import nl.lumc.sasc.biopet.utils.config.Configurable
+import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
+import org.broadinstitute.gatk.utils.report.{GATKReportTable, GATKReport}
+
+/**
+ * Extension for CombineVariants from GATK
+ *
+ * Created by pjvan_thof on 2/26/15.
+ */
+class GenotypeConcordance(val root: Configurable) extends Gatk with Summarizable {
+  val analysisType = "GenotypeConcordance"
+
+  @Input(required = true)
+  var evalFile: File = null
+
+  @Input(required = true)
+  var compFile: File = null
+
+  @Output(required = true)
+  var outputFile: File = null
+
+  var moltenize = true
+
+  def summaryFiles = Map("output" -> outputFile)
+
+  def summaryStats = {
+    val report = new GATKReport(outputFile)
+    val compProportions = report.getTable("GenotypeConcordance_CompProportions")
+    val counts = report.getTable("GenotypeConcordance_Counts")
+    val evalProportions = report.getTable("GenotypeConcordance_EvalProportions")
+    val genotypeSummary = report.getTable("GenotypeConcordance_Summary")
+    val siteSummary = report.getTable("SiteConcordance_Summary")
+
+    val samples = for (i <- 0 until genotypeSummary.getNumRows) yield genotypeSummary.get(i, "Sample").toString
+
+    def getMap(table: GATKReportTable, column: String) = samples.distinct.map(sample => sample -> {
+      (for (i <- 0 until table.getNumRows if table.get(i, "Sample") == sample) yield
+        s"${table.get(i, "Eval_Genotype")}__${table.get(i, "Comp_Genotype")}" -> table.get(i, column)).toMap
+    }).toMap
+
+    Map(
+      "compProportions" -> getMap(compProportions, "Proportion"),
+      "counts" -> getMap(counts, "Count"),
+      "evalProportions" -> getMap(evalProportions, "Proportion"),
+      "genotypeSummary" -> samples.distinct.map(sample => {
+        val i = samples.indexOf(sample)
+        sample -> Map(
+          "Non-Reference_Discrepancy" -> genotypeSummary.get(i, "Non-Reference_Discrepancy"),
+          "Non-Reference_Sensitivity" -> genotypeSummary.get(i, "Non-Reference_Sensitivity"),
+          "Overall_Genotype_Concordance" -> genotypeSummary.get(i, "Overall_Genotype_Concordance")
+        )
+      }).toMap,
+      "siteSummary" -> Map(
+        "ALLELES_MATCH" -> siteSummary.get(0, "ALLELES_MATCH"),
+        "EVAL_SUPERSET_TRUTH" -> siteSummary.get(0, "EVAL_SUPERSET_TRUTH"),
+        "EVAL_SUBSET_TRUTH" -> siteSummary.get(0, "EVAL_SUBSET_TRUTH"),
+        "ALLELES_DO_NOT_MATCH" -> siteSummary.get(0, "ALLELES_DO_NOT_MATCH"),
+        "EVAL_ONLY" -> siteSummary.get(0, "EVAL_ONLY"),
+        "TRUTH_ONLY" -> siteSummary.get(0, "TRUTH_ONLY")
+      )
+    )
+  }
+
+  override def beforeGraph(): Unit = {
+    super.beforeGraph()
+    deps :::= (evalFile :: compFile :: Nil).filter(_.getName.endsWith("vcf.gz")).map(x => new File(x.getAbsolutePath + ".tbi"))
+    deps = deps.distinct
+  }
+
+  override def cmdLine = super.cmdLine +
+    required("--eval", evalFile) +
+    required("--comp", compFile) +
+    required("-o", outputFile) +
+    conditional(moltenize, "--moltenize")
+}
-- 
GitLab