BammetricsReport.scala 15.2 KB
Newer Older
Peter van 't Hof's avatar
Peter van 't Hof committed
1 2 3 4 5 6 7 8 9 10
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
11
 * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
Peter van 't Hof's avatar
Peter van 't Hof committed
12 13 14
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
15 16
package nl.lumc.sasc.biopet.pipelines.bammetrics

Peter van 't Hof's avatar
Peter van 't Hof committed
17
import java.io.{ File, PrintWriter }
18

Peter van 't Hof's avatar
Peter van 't Hof committed
19
import nl.lumc.sasc.biopet.utils.config.Configurable
Peter van 't Hof's avatar
Peter van 't Hof committed
20
import nl.lumc.sasc.biopet.core.report.{ ReportBuilder, ReportBuilderExtension, ReportPage, ReportSection }
Peter van 't Hof's avatar
Peter van 't Hof committed
21
import nl.lumc.sasc.biopet.utils.ConfigUtils
Peter van 't Hof's avatar
Peter van 't Hof committed
22
import nl.lumc.sasc.biopet.utils.rscript.{ LinePlot, StackedBarPlot }
Peter van 't Hof's avatar
Peter van 't Hof committed
23
import nl.lumc.sasc.biopet.utils.summary.db.SummaryDb
Peter van 't Hof's avatar
Peter van 't Hof committed
24 25
import nl.lumc.sasc.biopet.utils.summary.db.SummaryDb.Implicts._
import nl.lumc.sasc.biopet.utils.summary.db.SummaryDb._
Peter van 't Hof's avatar
Peter van 't Hof committed
26 27 28 29

import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.Await
import scala.concurrent.duration.Duration
30

Peter van 't Hof's avatar
Peter van 't Hof committed
31
class BammetricsReport(val parent: Configurable) extends ReportBuilderExtension {
32
  def builder = BammetricsReport
33
}
34

35
/**
Peter van 't Hof's avatar
Peter van 't Hof committed
36 37
 * Object to create a report for [[BamMetrics]]
 *
38 39 40
 * Created by pjvan_thof on 3/30/15.
 */
object BammetricsReport extends ReportBuilder {
41

42
  /** Name of report */
43 44
  val reportName = "Bam Metrics"

45
  /** Root page for single BamMetrcis report */
Peter van 't Hof's avatar
Peter van 't Hof committed
46 47 48
  def indexPage = {
    val bamMetricsPage = this.bamMetricsPage(summary, sampleId, libId)
    ReportPage(bamMetricsPage.subPages ::: List(
Peter van 't Hof's avatar
Peter van 't Hof committed
49 50
      "Versions" -> ReportPage(List(), List("Executables" -> ReportSection("/nl/lumc/sasc/biopet/core/report/executables.ssp"
      )), Map()),
Peter van 't Hof's avatar
Peter van 't Hof committed
51 52 53 54 55 56 57 58 59
      "Files" -> ReportPage(List(), List(
        "Input fastq files" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/bammetricsInputFile.ssp")
      ), Map())
    ), List(
      "Report" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/bamMetricsFront.ssp")
    ) ::: bamMetricsPage.sections,
      Map()
    )
  }
60

61
  /** Generates a page with alignment stats */
Peter van 't Hof's avatar
Peter van 't Hof committed
62 63 64
  def bamMetricsPage(summary: SummaryDb,
                     sampleId: Option[Int],
                     libId: Option[Int],
65
                     metricsTag: String = "bammetrics") = {
Peter van 't Hof's avatar
Peter van 't Hof committed
66

Peter van 't Hof's avatar
Peter van 't Hof committed
67
    //val pipelineId: Int = summary.getPipelineId(runId, metricsTag).map(_.get)
Peter van 't Hof's avatar
Peter van 't Hof committed
68

Peter van 't Hof's avatar
Peter van 't Hof committed
69 70
    val wgsExecuted = summary.getStatsSize(runId, metricsTag, "wgs", sample = sampleId.map(SampleId), library = libId.map(LibraryId)) >= 1
    val rnaExecuted = summary.getStatsSize(runId, metricsTag, "rna", sample = sampleId.map(SampleId), library = libId.map(LibraryId)) >= 1
Peter van 't Hof's avatar
Peter van 't Hof committed
71

Peter van 't Hof's avatar
Peter van 't Hof committed
72 73
    val insertsizeMetrics = summary.getStatKeys(runId, metricsTag, "CollectInsertSizeMetrics",
      sampleId.map(SampleId).getOrElse(NoSample), libId.map(LibraryId).getOrElse(NoLibrary), Map("metrics" -> List("metrics")))
Peter van 't Hof's avatar
Peter van 't Hof committed
74
      .exists(_._2.isDefined)
75

Peter van 't Hof's avatar
Peter van 't Hof committed
76 77
    val targetSettings = summary.getSettingKeys(runId, metricsTag, NoModule,
      sample = sampleId.map(SampleId).getOrElse(NoSample), library = libId.map(LibraryId).getOrElse(NoLibrary),
Peter van 't Hof's avatar
Peter van 't Hof committed
78
      Map("amplicon_name" -> List("amplicon_name"), "roi_name" -> List("roi_name")))
79
    val targets = (
Peter van 't Hof's avatar
Peter van 't Hof committed
80 81
      targetSettings("amplicon_name"),
      targetSettings("roi_name")
Peter van 't Hof's avatar
Peter van 't Hof committed
82 83 84 85 86
    ) match {
        case (Some(amplicon: String), Some(roi: List[_])) => amplicon :: roi.map(_.toString)
        case (_, Some(roi: List[_])) => roi.map(_.toString)
        case _ => Nil
      }
87 88

    ReportPage(
Peter van 't Hof's avatar
Peter van 't Hof committed
89 90
      if (targets.isEmpty) List()
      else List("Targets" -> ReportPage(
91
        List(),
92
        targets.map(t => t -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/covstatsPlot.ssp", Map("target" -> Some(t)))),
Peter van 't Hof's avatar
Peter van 't Hof committed
93
        Map())),
94
      List(
95 96 97
        "Summary" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/alignmentSummary.ssp"),
        "Mapping Quality" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/mappingQuality.ssp", Map("showPlot" -> true)),
        "Clipping" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/clipping.ssp", Map("showPlot" -> true))) ++
98
        (if (insertsizeMetrics) List("Insert Size" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/insertSize.ssp", Map("showPlot" -> true))
Peter van 't Hof's avatar
Peter van 't Hof committed
99 100
        )
        else Nil) ++ (if (wgsExecuted) List("Whole genome coverage" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/wgsHistogram.ssp",
Peter van 't Hof's avatar
Peter van 't Hof committed
101 102 103 104 105
          Map("showPlot" -> true)))
        else Nil) ++
        (if (rnaExecuted) List("Rna coverage" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/rnaHistogram.ssp",
          Map("showPlot" -> true)))
        else Nil),
106
      Map("metricsTag" -> metricsTag)
107 108 109
    )
  }

110 111
  /**
   * Generate a stackbar plot for alignment stats
Peter van 't Hof's avatar
Peter van 't Hof committed
112
   *
113 114 115 116 117 118
   * @param outputDir OutputDir for the tsv and png file
   * @param prefix Prefix of the tsv and png file
   * @param summary Summary class
   * @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats
   * @param sampleId Default it selects all sampples, when sample is giving it limits to selected sample
   */
119 120
  def alignmentSummaryPlot(outputDir: File,
                           prefix: String,
Peter van 't Hof's avatar
Peter van 't Hof committed
121
                           summary: SummaryDb,
122
                           libraryLevel: Boolean = false,
Peter van 't Hof's avatar
Peter van 't Hof committed
123
                           sampleId: Option[Int] = None): Unit = {
124 125 126 127 128 129
    val tsvFile = new File(outputDir, prefix + ".tsv")
    val pngFile = new File(outputDir, prefix + ".png")
    val tsvWriter = new PrintWriter(tsvFile)
    if (libraryLevel) tsvWriter.print("Library") else tsvWriter.print("Sample")
    tsvWriter.println("\tMapped\tDuplicates\tUnmapped\tSecondary")

Peter van 't Hof's avatar
Peter van 't Hof committed
130 131 132 133 134 135 136 137
    val statsPaths = Map(
      "Mapped" -> List("flagstats", "Mapped"),
      "Duplicates" -> List("flagstats", "Duplicates"),
      "All" -> List("flagstats", "All"),
      "NotPrimaryAlignment" -> List("flagstats", "NotPrimaryAlignment")
    )

    val results: Map[(Int, Option[Int]), Map[String, Option[Any]]] = if (libraryLevel) {
Peter van 't Hof's avatar
Peter van 't Hof committed
138
      summary.getStatsForLibraries(runId, "bammetrics", "bamstats",
Peter van 't Hof's avatar
Peter van 't Hof committed
139
        sampleId = sampleId, keyValues = statsPaths).map(x => (x._1._1, Some(x._1._2)) -> x._2)
Peter van 't Hof's avatar
Peter van 't Hof committed
140 141
    } else summary.getStatsForSamples(runId, "bammetrics", "bamstats",
      sample = sampleId.map(SampleId), keyValues = statsPaths).map(x => (x._1, None) -> x._2)
Peter van 't Hof's avatar
Peter van 't Hof committed
142

Peter van 't Hof's avatar
Peter van 't Hof committed
143
    for (((s, l), result) <- results) {
Peter van 't Hof's avatar
Peter van 't Hof committed
144 145
      val sampleName: String = summary.getSampleName(s).map(_.get)
      val libName: Option[String] = l.flatMap(x => Await.result(summary.getLibraryName(x), Duration.Inf))
146
      val sb = new StringBuffer()
Peter van 't Hof's avatar
Peter van 't Hof committed
147 148 149 150 151
      if (libName.isDefined) sb.append(sampleName + "-" + libName.get + "\t") else sb.append(sampleName + "\t")
      val mapped = ConfigUtils.any2long(result("Mapped"))
      val duplicates = ConfigUtils.any2long(result("Duplicates"))
      val total = ConfigUtils.any2long(result("All"))
      val secondary = ConfigUtils.any2long(result("NotPrimaryAlignment"))
Peter van 't Hof's avatar
Peter van 't Hof committed
152
      sb.append((mapped - duplicates - secondary) + "\t")
153
      sb.append(duplicates + "\t")
Peter van 't Hof's avatar
Peter van 't Hof committed
154
      sb.append((total - mapped) + "\t")
155
      sb.append(secondary)
Peter van 't Hof's avatar
Peter van 't Hof committed
156
      tsvWriter.println(sb.toString)
157 158 159 160 161 162 163 164
    }

    tsvWriter.close()

    val plot = new StackedBarPlot(null)
    plot.input = tsvFile
    plot.output = pngFile
    plot.ylabel = Some("Reads")
165
    if (libraryLevel) {
Peter van 't Hof's avatar
Peter van 't Hof committed
166 167
      plot.width = Some(200 + (libraries.filter(s => sampleId.getOrElse(s.id) == s.id).size) * 10)
    } else plot.width = Some(200 + (samples.count(s => sampleId.getOrElse(s) == s) * 10))
Peter van 't Hof's avatar
Peter van 't Hof committed
168
    plot.title = Some("Aligned reads")
169 170 171
    plot.runLocal()
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
172
  def writePlotFromSummary(outputDir: File,
Peter van 't Hof's avatar
Peter van 't Hof committed
173 174 175 176 177 178 179
                           prefix: String,
                           summary: SummaryDb,
                           libraryLevel: Boolean = false,
                           sampleId: Option[Int] = None,
                           libraryId: Option[Int] = None,
                           statsPaths: Map[String, List[String]],
                           yKey: String,
Peter van 't Hof's avatar
Peter van 't Hof committed
180
                           xKey: String,
Peter van 't Hof's avatar
Peter van 't Hof committed
181 182
                           pipeline: PipelineQuery,
                           module: ModuleQuery,
Peter van 't Hof's avatar
Peter van 't Hof committed
183 184 185 186
                           xlabel: Option[String] = None,
                           ylabel: Option[String] = None,
                           title: Option[String] = None,
                           removeZero: Boolean = true): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
187 188 189 190 191 192
    val tsvFile = new File(outputDir, prefix + ".tsv")
    val pngFile = new File(outputDir, prefix + ".png")

    val results: Map[(Int, Option[Int]), Map[String, Option[Array[Any]]]] = if (libraryLevel) {
      summary.getStatsForLibraries(runId, pipeline, module, sampleId = sampleId, keyValues = statsPaths)
        .map(x => (x._1._1, Some(x._1._2)) -> x._2.map(x => x._1 -> x._2.map(ConfigUtils.any2list(_).toArray)))
Peter van 't Hof's avatar
Peter van 't Hof committed
193
    } else summary.getStatsForSamples(runId, pipeline, module, sample = sampleId.map(SampleId), keyValues = statsPaths)
Peter van 't Hof's avatar
Peter van 't Hof committed
194 195
      .map(x => (x._1, None) -> x._2.map(x => x._1 -> x._2.map(ConfigUtils.any2list(_).toArray)))

Peter van 't Hof's avatar
Peter van 't Hof committed
196 197
    val tables: Array[Map[String, Array[Any]]] = results.map {
      case ((sample, library), map) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
198 199 200
        val sampleName = Await.result(summary.getSampleName(sample), Duration.Inf)
          .getOrElse(throw new IllegalStateException("Sample must be there"))
        val libraryName = library.flatMap(l => Await.result(summary.getLibraryName(l), Duration.Inf))
Peter van 't Hof's avatar
Peter van 't Hof committed
201 202
        Map(
          yKey -> map(yKey).getOrElse(Array()),
Peter van 't Hof's avatar
Peter van 't Hof committed
203
          (sampleName + libraryName.map("-" + _).getOrElse("")) -> map(xKey).getOrElse(Array())
Peter van 't Hof's avatar
Peter van 't Hof committed
204
        )
Peter van 't Hof's avatar
Peter van 't Hof committed
205 206 207 208 209 210 211 212 213 214 215
    }.toArray

    writeTableToTsv(tsvFile, mergeTables(tables, yKey), yKey)

    LinePlot(tsvFile, pngFile,
      xlabel = xlabel,
      ylabel = ylabel,
      title = title,
      removeZero = removeZero).runLocal()
  }

216 217
  /**
   * Generate a line plot for insertsize
Peter van 't Hof's avatar
Peter van 't Hof committed
218
   *
219 220 221 222 223 224
   * @param outputDir OutputDir for the tsv and png file
   * @param prefix Prefix of the tsv and png file
   * @param summary Summary class
   * @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats
   * @param sampleId Default it selects all sampples, when sample is giving it limits to selected sample
   */
Peter van 't Hof's avatar
Peter van 't Hof committed
225
  def insertSizePlot(outputDir: File,
Peter van 't Hof's avatar
Peter van 't Hof committed
226
                     prefix: String,
Peter van 't Hof's avatar
Peter van 't Hof committed
227
                     summary: SummaryDb,
Peter van 't Hof's avatar
Peter van 't Hof committed
228
                     libraryLevel: Boolean = false,
Peter van 't Hof's avatar
Peter van 't Hof committed
229
                     sampleId: Option[Int] = None,
Peter van 't Hof's avatar
Peter van 't Hof committed
230
                     libraryId: Option[Int] = None): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
231 232
    val statsPaths = Map(
      "insert_size" -> List("histogram", "insert_size"),
Peter van 't Hof's avatar
Peter van 't Hof committed
233
      "count" -> List("histogram", "All_Reads.fr_count")
Peter van 't Hof's avatar
Peter van 't Hof committed
234
    )
Peter van 't Hof's avatar
Peter van 't Hof committed
235

Peter van 't Hof's avatar
Peter van 't Hof committed
236
    writePlotFromSummary(outputDir, prefix, summary, libraryLevel, sampleId, libraryId, statsPaths,
Peter van 't Hof's avatar
Peter van 't Hof committed
237
      "insert_size", "count", "bammetrics", "CollectInsertSizeMetrics",
Peter van 't Hof's avatar
Peter van 't Hof committed
238
      "Insert size", "Reads", "Insert size")
Peter van 't Hof's avatar
Peter van 't Hof committed
239
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
240

241
  def mappingQualityPlot(outputDir: File,
Peter van 't Hof's avatar
Peter van 't Hof committed
242
                         prefix: String,
Peter van 't Hof's avatar
Peter van 't Hof committed
243
                         summary: SummaryDb,
Peter van 't Hof's avatar
Peter van 't Hof committed
244
                         libraryLevel: Boolean = false,
Peter van 't Hof's avatar
Peter van 't Hof committed
245
                         sampleId: Option[Int] = None,
Peter van 't Hof's avatar
Peter van 't Hof committed
246 247 248 249
                         libraryId: Option[Int] = None): Unit = {
    val statsPaths = Map(
      "mapping_quality" -> List("mapping_quality", "histogram", "values"),
      "count" -> List("mapping_quality", "histogram", "counts")
250 251
    )

Peter van 't Hof's avatar
Peter van 't Hof committed
252
    writePlotFromSummary(outputDir, prefix, summary, libraryLevel, sampleId, libraryId, statsPaths,
Peter van 't Hof's avatar
Peter van 't Hof committed
253
      "mapping_quality", "count", "bammetrics", "bamstats",
Peter van 't Hof's avatar
Peter van 't Hof committed
254
      "Mapping quality", "Reads", "Mapping quality")
255 256 257
  }

  def clippingPlot(outputDir: File,
Peter van 't Hof's avatar
Peter van 't Hof committed
258
                   prefix: String,
Peter van 't Hof's avatar
Peter van 't Hof committed
259
                   summary: SummaryDb,
Peter van 't Hof's avatar
Peter van 't Hof committed
260
                   libraryLevel: Boolean = false,
Peter van 't Hof's avatar
Peter van 't Hof committed
261
                   sampleId: Option[Int] = None,
Peter van 't Hof's avatar
Peter van 't Hof committed
262 263 264 265
                   libraryId: Option[Int] = None): Unit = {
    val statsPaths = Map(
      "clipping" -> List("clipping", "histogram", "values"),
      "count" -> List("clipping", "histogram", "counts")
266 267
    )

Peter van 't Hof's avatar
Peter van 't Hof committed
268
    writePlotFromSummary(outputDir, prefix, summary, libraryLevel, sampleId, libraryId, statsPaths,
Peter van 't Hof's avatar
Peter van 't Hof committed
269
      "clipping", "count", "bammetrics", "bamstats",
Peter van 't Hof's avatar
Peter van 't Hof committed
270
      "Clipping", "Reads", "Clipping")
271 272
  }

273 274
  /**
   * Generate a line plot for wgs coverage
Peter van 't Hof's avatar
Peter van 't Hof committed
275
   *
276 277 278 279 280 281
   * @param outputDir OutputDir for the tsv and png file
   * @param prefix Prefix of the tsv and png file
   * @param summary Summary class
   * @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats
   * @param sampleId Default it selects all sampples, when sample is giving it limits to selected sample
   */
Peter van 't Hof's avatar
Peter van 't Hof committed
282
  def wgsHistogramPlot(outputDir: File,
Peter van 't Hof's avatar
Peter van 't Hof committed
283
                       prefix: String,
Peter van 't Hof's avatar
Peter van 't Hof committed
284
                       summary: SummaryDb,
Peter van 't Hof's avatar
Peter van 't Hof committed
285
                       libraryLevel: Boolean = false,
Peter van 't Hof's avatar
Peter van 't Hof committed
286
                       sampleId: Option[Int] = None,
Peter van 't Hof's avatar
Peter van 't Hof committed
287 288
                       libraryId: Option[Int] = None): Unit = {
    val statsPaths = Map(
Peter van 't Hof's avatar
Peter van 't Hof committed
289 290
      "coverage" -> List("histogram", "coverage"),
      "count" -> List("histogram", "count")
Peter van 't Hof's avatar
Peter van 't Hof committed
291
    )
Peter van 't Hof's avatar
Peter van 't Hof committed
292

Peter van 't Hof's avatar
Peter van 't Hof committed
293
    writePlotFromSummary(outputDir, prefix, summary, libraryLevel, sampleId, libraryId, statsPaths,
Peter van 't Hof's avatar
Peter van 't Hof committed
294
      "coverage", "count", "bammetrics", "wgs",
Peter van 't Hof's avatar
Peter van 't Hof committed
295
      "Coverage", "Bases", "Whole genome coverage")
Peter van 't Hof's avatar
Peter van 't Hof committed
296
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
297 298

  /**
Peter van 't Hof's avatar
Peter van 't Hof committed
299
   * Generate a line plot for rna coverage
Peter van 't Hof's avatar
Peter van 't Hof committed
300
   *
Peter van 't Hof's avatar
Peter van 't Hof committed
301 302 303 304 305 306
   * @param outputDir OutputDir for the tsv and png file
   * @param prefix Prefix of the tsv and png file
   * @param summary Summary class
   * @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats
   * @param sampleId Default it selects all sampples, when sample is giving it limits to selected sample
   */
Peter van 't Hof's avatar
Peter van 't Hof committed
307 308
  def rnaHistogramPlot(outputDir: File,
                       prefix: String,
Peter van 't Hof's avatar
Peter van 't Hof committed
309
                       summary: SummaryDb,
Peter van 't Hof's avatar
Peter van 't Hof committed
310
                       libraryLevel: Boolean = false,
Peter van 't Hof's avatar
Peter van 't Hof committed
311 312 313 314 315
                       sampleId: Option[Int] = None,
                       libraryId: Option[Int] = None): Unit = {
    val statsPaths = Map(
      "position" -> List("rna", "histogram", "normalized_position"),
      "count" -> List("rna", "histogram", "All_Reads.normalized_coverage")
Peter van 't Hof's avatar
Peter van 't Hof committed
316
    )
Peter van 't Hof's avatar
Peter van 't Hof committed
317

Peter van 't Hof's avatar
Peter van 't Hof committed
318
    writePlotFromSummary(outputDir, prefix, summary, libraryLevel, sampleId, libraryId, statsPaths,
Peter van 't Hof's avatar
Peter van 't Hof committed
319
      "coverage", "count", "bammetrics", "rna",
Peter van 't Hof's avatar
Peter van 't Hof committed
320
      "Relative position", "Coverage", "Rna coverage")
Peter van 't Hof's avatar
Peter van 't Hof committed
321 322
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
323 324
  def mergeTables(tables: Array[Map[String, Array[Any]]],
                  mergeColumn: String, defaultValue: Any = 0): Map[String, Array[Any]] = {
Peter van 't Hof's avatar
Peter van 't Hof committed
325 326 327 328 329 330 331
    val keys = tables.flatMap(x => x(mergeColumn)).distinct
    (for (table <- tables; (columnKey, columnValues) <- table if columnKey != mergeColumn) yield {
      columnKey -> keys.map(x => table(mergeColumn).zip(columnValues).toMap.getOrElse(x, defaultValue))
    }).toMap + (mergeColumn -> keys)
  }

  def writeTableToTsv(tsvFile: File, table: Map[String, Array[Any]], firstColumn: String): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
332
    require(table.map(_._2.size).toList.distinct.size == 1, "Not all values has the same number or rows")
Peter van 't Hof's avatar
Peter van 't Hof committed
333 334 335
    val keys = table.keys.filterNot(_ == firstColumn).toList.sorted
    val writer = new PrintWriter(tsvFile)
    writer.println((firstColumn :: keys).mkString("\t"))
Peter van 't Hof's avatar
Peter van 't Hof committed
336 337 338
    table(firstColumn).zipWithIndex.foreach {
      case (c, i) =>
        writer.println((c :: keys.map(x => table(x)(i))).mkString("\t"))
Peter van 't Hof's avatar
Peter van 't Hof committed
339 340 341
    }
    writer.close()
  }
342
}