BammetricsReport.scala 18.2 KB
Newer Older
Peter van 't Hof's avatar
Peter van 't Hof committed
1
/**
2 3 4 5 6 7 8 9 10 11 12 13 14
  * Biopet is built on top of GATK Queue for building bioinformatic
  * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
  * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
  * should also be able to execute Biopet tools and pipelines.
  *
  * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
  *
  * Contact us at: sasc@lumc.nl
  *
  * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
  * license; For commercial users or users who do not want to follow the AGPL
  * license, please contact us to obtain a separate license.
  */
15 16
package nl.lumc.sasc.biopet.pipelines.bammetrics

17
import java.io.{File, PrintWriter}
18

Peter van 't Hof's avatar
Peter van 't Hof committed
19
import nl.lumc.sasc.biopet.utils.config.Configurable
20 21 22 23 24 25
import nl.lumc.sasc.biopet.core.report.{
  ReportBuilder,
  ReportBuilderExtension,
  ReportPage,
  ReportSection
}
Peter van 't Hof's avatar
Peter van 't Hof committed
26
import nl.lumc.sasc.biopet.utils.ConfigUtils
27
import nl.lumc.sasc.biopet.utils.rscript.{LinePlot, StackedBarPlot}
Peter van 't Hof's avatar
Peter van 't Hof committed
28
import nl.lumc.sasc.biopet.utils.summary.db.SummaryDb
Peter van 't Hof's avatar
Peter van 't Hof committed
29 30
import nl.lumc.sasc.biopet.utils.summary.db.SummaryDb.Implicts._
import nl.lumc.sasc.biopet.utils.summary.db.SummaryDb._
Peter van 't Hof's avatar
Peter van 't Hof committed
31

32
import scala.concurrent.{Await, Future}
Peter van 't Hof's avatar
Peter van 't Hof committed
33
import scala.concurrent.duration.Duration
34

Peter van 't Hof's avatar
Peter van 't Hof committed
35
class BammetricsReport(val parent: Configurable) extends ReportBuilderExtension {
36
  def builder = BammetricsReport
37
}
38

39
/**
40 41 42 43
  * Object to create a report for [[BamMetrics]]
  *
  * Created by pjvan_thof on 3/30/15.
  */
44
object BammetricsReport extends ReportBuilder {
45

46
  /** Name of report */
47 48
  val reportName = "Bam Metrics"

49 50
  def pipelineName = "bammetrics"

51
  /** Root page for single BamMetrcis report */
Peter van 't Hof's avatar
Peter van 't Hof committed
52 53
  def indexPage: Future[ReportPage] =
    bamMetricsPage(summary, sampleId, libId).map { bamMetricsPage =>
54 55 56 57 58 59 60 61 62 63 64 65 66
      ReportPage(
        bamMetricsPage.subPages ::: List(
          "Versions" -> Future(
            ReportPage(List(),
                       List("Executables" -> ReportSection(
                         "/nl/lumc/sasc/biopet/core/report/executables.ssp")),
                       Map())),
          "Files" -> filesPage(sampleId, libId)
        ),
        List(
          "Report" -> ReportSection(
            "/nl/lumc/sasc/biopet/pipelines/bammetrics/bamMetricsFront.ssp")
        ) ::: bamMetricsPage.sections,
Peter van 't Hof's avatar
Peter van 't Hof committed
67 68 69
        Map()
      )
    }
70

71
  /** Generates a page with alignment stats */
Peter van 't Hof's avatar
Peter van 't Hof committed
72 73 74
  def bamMetricsPage(summary: SummaryDb,
                     sampleId: Option[Int],
                     libId: Option[Int],
Peter van 't Hof's avatar
Peter van 't Hof committed
75
                     metricsTag: String = "bammetrics"): Future[ReportPage] = Future {
76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
    val wgsExecuted = summary.getStatsSize(runId,
                                           metricsTag,
                                           "wgs",
                                           sample = sampleId.map(SampleId),
                                           library = libId.map(LibraryId)) >= 1
    val rnaExecuted = summary.getStatsSize(runId,
                                           metricsTag,
                                           "rna",
                                           sample = sampleId.map(SampleId),
                                           library = libId.map(LibraryId)) >= 1

    val insertsizeMetrics = summary
      .getStatKeys(
        runId,
        metricsTag,
        "CollectInsertSizeMetrics",
        sampleId.map(SampleId).getOrElse(NoSample),
        libId.map(LibraryId).getOrElse(NoLibrary),
        Map("metrics" -> List("metrics"))
      )
Peter van 't Hof's avatar
Peter van 't Hof committed
96
      .exists(_._2.isDefined)
97

98 99 100 101 102 103 104 105
    val targetSettings = summary.getSettingKeys(
      runId,
      metricsTag,
      NoModule,
      sample = sampleId.map(SampleId).getOrElse(NoSample),
      library = libId.map(LibraryId).getOrElse(NoLibrary),
      Map("amplicon_name" -> List("amplicon_name"), "roi_name" -> List("roi_name"))
    )
106
    val targets = (
Peter van 't Hof's avatar
Peter van 't Hof committed
107 108
      targetSettings("amplicon_name"),
      targetSettings("roi_name")
Peter van 't Hof's avatar
Peter van 't Hof committed
109
    ) match {
110 111 112 113
      case (Some(amplicon: String), Some(roi: List[_])) => amplicon :: roi.map(_.toString)
      case (_, Some(roi: List[_])) => roi.map(_.toString)
      case _ => Nil
    }
114 115

    ReportPage(
Peter van 't Hof's avatar
Peter van 't Hof committed
116
      if (targets.isEmpty) List()
117 118 119 120 121 122 123 124 125
      else
        List(
          "Targets" -> Future.successful(
            ReportPage(
              List(),
              targets.map(t =>
                t -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/covstatsPlot.ssp",
                                   Map("target" -> Some(t)))),
              Map()))),
126
      List(
127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151
        "Summary" -> ReportSection(
          "/nl/lumc/sasc/biopet/pipelines/bammetrics/alignmentSummary.ssp"),
        "Mapping Quality" -> ReportSection(
          "/nl/lumc/sasc/biopet/pipelines/bammetrics/mappingQuality.ssp",
          Map("showPlot" -> true)),
        "Clipping" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/clipping.ssp",
                                    Map("showPlot" -> true))
      ) ++
        (if (insertsizeMetrics)
           List(
             "Insert Size" -> ReportSection(
               "/nl/lumc/sasc/biopet/pipelines/bammetrics/insertSize.ssp",
               Map("showPlot" -> true)))
         else Nil) ++ (if (wgsExecuted)
                         List(
                           "Whole genome coverage" -> ReportSection(
                             "/nl/lumc/sasc/biopet/pipelines/bammetrics/wgsHistogram.ssp",
                             Map("showPlot" -> true)))
                       else Nil) ++
        (if (rnaExecuted)
           List(
             "Rna coverage" -> ReportSection(
               "/nl/lumc/sasc/biopet/pipelines/bammetrics/rnaHistogram.ssp",
               Map("showPlot" -> true)))
         else Nil),
152
      Map("metricsTag" -> metricsTag)
153 154 155
    )
  }

156
  /**
157 158 159 160 161 162 163 164
    * Generate a stackbar plot for alignment stats
    *
    * @param outputDir OutputDir for the tsv and png file
    * @param prefix Prefix of the tsv and png file
    * @param summary Summary class
    * @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats
    * @param sampleId Default it selects all sampples, when sample is giving it limits to selected sample
    */
165 166
  def alignmentSummaryPlot(outputDir: File,
                           prefix: String,
Peter van 't Hof's avatar
Peter van 't Hof committed
167
                           summary: SummaryDb,
168
                           libraryLevel: Boolean = false,
Peter van 't Hof's avatar
Peter van 't Hof committed
169
                           sampleId: Option[Int] = None): Unit = {
170 171 172 173 174 175
    val tsvFile = new File(outputDir, prefix + ".tsv")
    val pngFile = new File(outputDir, prefix + ".png")
    val tsvWriter = new PrintWriter(tsvFile)
    if (libraryLevel) tsvWriter.print("Library") else tsvWriter.print("Sample")
    tsvWriter.println("\tMapped\tDuplicates\tUnmapped\tSecondary")

Peter van 't Hof's avatar
Peter van 't Hof committed
176 177 178 179 180 181 182 183
    val statsPaths = Map(
      "Mapped" -> List("flagstats", "Mapped"),
      "Duplicates" -> List("flagstats", "Duplicates"),
      "All" -> List("flagstats", "All"),
      "NotPrimaryAlignment" -> List("flagstats", "NotPrimaryAlignment")
    )

    val results: Map[(Int, Option[Int]), Map[String, Option[Any]]] = if (libraryLevel) {
184 185 186 187 188 189 190 191 192 193 194 195 196 197 198
      summary
        .getStatsForLibraries(runId,
                              "bammetrics",
                              "bamstats",
                              sampleId = sampleId,
                              keyValues = statsPaths)
        .map(x => (x._1._1, Some(x._1._2)) -> x._2)
    } else
      summary
        .getStatsForSamples(runId,
                            "bammetrics",
                            "bamstats",
                            sample = sampleId.map(SampleId),
                            keyValues = statsPaths)
        .map(x => (x._1, None) -> x._2)
Peter van 't Hof's avatar
Peter van 't Hof committed
199

Peter van 't Hof's avatar
Peter van 't Hof committed
200
    for (((s, l), result) <- results) {
Peter van 't Hof's avatar
Peter van 't Hof committed
201
      val sampleName: String = summary.getSampleName(s).map(_.get)
202 203
      val libName: Option[String] =
        l.flatMap(x => Await.result(summary.getLibraryName(x), Duration.Inf))
204
      val sb = new StringBuffer()
205 206
      if (libName.isDefined) sb.append(sampleName + "-" + libName.get + "\t")
      else sb.append(sampleName + "\t")
Peter van 't Hof's avatar
Peter van 't Hof committed
207 208 209 210
      val mapped = ConfigUtils.any2long(result("Mapped"))
      val duplicates = ConfigUtils.any2long(result("Duplicates"))
      val total = ConfigUtils.any2long(result("All"))
      val secondary = ConfigUtils.any2long(result("NotPrimaryAlignment"))
Peter van 't Hof's avatar
Peter van 't Hof committed
211
      sb.append((mapped - duplicates - secondary) + "\t")
212
      sb.append(duplicates + "\t")
Peter van 't Hof's avatar
Peter van 't Hof committed
213
      sb.append((total - mapped) + "\t")
214
      sb.append(secondary)
Peter van 't Hof's avatar
Peter van 't Hof committed
215
      tsvWriter.println(sb.toString)
216 217 218 219 220 221 222 223
    }

    tsvWriter.close()

    val plot = new StackedBarPlot(null)
    plot.input = tsvFile
    plot.output = pngFile
    plot.ylabel = Some("Reads")
224
    plot.width = Some(200 + (results.size * 10))
Peter van 't Hof's avatar
WIP  
Peter van 't Hof committed
225
    plot.title = Some("Aligned_reads")
226 227 228
    plot.runLocal()
  }

229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246
  /**
    * This is a generic method to create plots
    * @param outputDir Outputdir of the plot
    * @param prefix Files will start with this name
    * @param summary Summary where the data is
    * @param libraryLevel If enabled the plots will show data per library
    * @param sampleId If set only this sample is shown
    * @param libraryId If set onlt this library is shown
    * @param statsPaths Paths in summary where the tables can be found
    * @param yKeyList Keys to search from, first has prio over second one
    * @param xKeyList Keys to search from, first has prio over second one
    * @param pipeline Query for the pipeline
    * @param module Query for the module
    * @param xlabel X label shown on the plot
    * @param ylabel Y label shown on the plot
    * @param title Title of the plot
    * @param removeZero
    */
Peter van 't Hof's avatar
Peter van 't Hof committed
247
  def writePlotFromSummary(outputDir: File,
Peter van 't Hof's avatar
Peter van 't Hof committed
248 249 250 251 252 253
                           prefix: String,
                           summary: SummaryDb,
                           libraryLevel: Boolean = false,
                           sampleId: Option[Int] = None,
                           libraryId: Option[Int] = None,
                           statsPaths: Map[String, List[String]],
254 255
                           yKeyList: List[String],
                           xKeyList: List[String],
Peter van 't Hof's avatar
Peter van 't Hof committed
256 257
                           pipeline: PipelineQuery,
                           module: ModuleQuery,
Peter van 't Hof's avatar
Peter van 't Hof committed
258 259 260 261
                           xlabel: Option[String] = None,
                           ylabel: Option[String] = None,
                           title: Option[String] = None,
                           removeZero: Boolean = true): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
262 263 264 265
    val tsvFile = new File(outputDir, prefix + ".tsv")
    val pngFile = new File(outputDir, prefix + ".png")

    val results: Map[(Int, Option[Int]), Map[String, Option[Array[Any]]]] = if (libraryLevel) {
266 267 268 269 270 271 272 273 274 275 276 277 278
      summary
        .getStatsForLibraries(runId, pipeline, module, sampleId = sampleId, keyValues = statsPaths)
        .map(x =>
          (x._1._1, Some(x._1._2)) -> x._2.map(x =>
            x._1 -> x._2.map(ConfigUtils.any2list(_).toArray)))
    } else
      summary
        .getStatsForSamples(runId,
                            pipeline,
                            module,
                            sample = sampleId.map(SampleId),
                            keyValues = statsPaths)
        .map(x => (x._1, None) -> x._2.map(x => x._1 -> x._2.map(ConfigUtils.any2list(_).toArray)))
Peter van 't Hof's avatar
Peter van 't Hof committed
279

Peter van 't Hof's avatar
Peter van 't Hof committed
280 281
    val tables: Array[Map[String, Array[Any]]] = results.map {
      case ((sample, library), map) =>
282 283
        val sampleName = Await
          .result(summary.getSampleName(sample), Duration.Inf)
Peter van 't Hof's avatar
Peter van 't Hof committed
284
          .getOrElse(throw new IllegalStateException("Sample must be there"))
285 286
        val libraryName =
          library.flatMap(l => Await.result(summary.getLibraryName(l), Duration.Inf))
287 288 289
        if (yKeyList.find(x => map.contains(x) && map(x).isDefined).isEmpty) {
          ""
        }
290 291
        val yKey = yKeyList.find(x => map.contains(x) && map(x).isDefined).get
        val xKey = xKeyList.find(x => map.contains(x) && map(x).isDefined).get
Peter van 't Hof's avatar
Peter van 't Hof committed
292
        Map(
293
          yKeyList.head -> map(yKey).getOrElse(Array()),
Peter van 't Hof's avatar
Peter van 't Hof committed
294
          (sampleName + libraryName.map("-" + _).getOrElse("")) -> map(xKey).getOrElse(Array())
Peter van 't Hof's avatar
Peter van 't Hof committed
295
        )
Peter van 't Hof's avatar
Peter van 't Hof committed
296 297
    }.toArray

298
    writeTableToTsv(tsvFile, mergeTables(tables, yKeyList.head), yKeyList.head)
Peter van 't Hof's avatar
Peter van 't Hof committed
299

300 301 302 303 304 305 306
    LinePlot(tsvFile,
             pngFile,
             xlabel = xlabel,
             ylabel = ylabel,
             title = title,
             hideLegend = results.size > 40,
             removeZero = removeZero).runLocal()
Peter van 't Hof's avatar
Peter van 't Hof committed
307 308
  }

309
  /**
310 311 312 313 314 315 316 317
    * Generate a line plot for insertsize
    *
    * @param outputDir OutputDir for the tsv and png file
    * @param prefix Prefix of the tsv and png file
    * @param summary Summary class
    * @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats
    * @param sampleId Default it selects all sampples, when sample is giving it limits to selected sample
    */
Peter van 't Hof's avatar
Peter van 't Hof committed
318
  def insertSizePlot(outputDir: File,
Peter van 't Hof's avatar
Peter van 't Hof committed
319
                     prefix: String,
Peter van 't Hof's avatar
Peter van 't Hof committed
320
                     summary: SummaryDb,
Peter van 't Hof's avatar
Peter van 't Hof committed
321
                     libraryLevel: Boolean = false,
Peter van 't Hof's avatar
Peter van 't Hof committed
322
                     sampleId: Option[Int] = None,
Peter van 't Hof's avatar
Peter van 't Hof committed
323
                     libraryId: Option[Int] = None): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
324 325
    val statsPaths = Map(
      "insert_size" -> List("histogram", "insert_size"),
Peter van 't Hof's avatar
Peter van 't Hof committed
326
      "count" -> List("histogram", "All_Reads.fr_count")
Peter van 't Hof's avatar
Peter van 't Hof committed
327
    )
Peter van 't Hof's avatar
Peter van 't Hof committed
328

329 330 331 332 333 334 335 336
    writePlotFromSummary(
      outputDir,
      prefix,
      summary,
      libraryLevel,
      sampleId,
      libraryId,
      statsPaths,
337 338
      "insert_size" :: Nil,
      "count" :: Nil,
339 340 341 342 343 344
      "bammetrics",
      "CollectInsertSizeMetrics",
      "Insert size",
      "Reads",
      "Insert size"
    )
Peter van 't Hof's avatar
Peter van 't Hof committed
345
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
346

347
  def mappingQualityPlot(outputDir: File,
Peter van 't Hof's avatar
Peter van 't Hof committed
348
                         prefix: String,
Peter van 't Hof's avatar
Peter van 't Hof committed
349
                         summary: SummaryDb,
Peter van 't Hof's avatar
Peter van 't Hof committed
350
                         libraryLevel: Boolean = false,
Peter van 't Hof's avatar
Peter van 't Hof committed
351
                         sampleId: Option[Int] = None,
Peter van 't Hof's avatar
Peter van 't Hof committed
352 353 354 355
                         libraryId: Option[Int] = None): Unit = {
    val statsPaths = Map(
      "mapping_quality" -> List("mapping_quality", "histogram", "values"),
      "count" -> List("mapping_quality", "histogram", "counts")
356 357
    )

358 359 360 361 362 363 364 365
    writePlotFromSummary(
      outputDir,
      prefix,
      summary,
      libraryLevel,
      sampleId,
      libraryId,
      statsPaths,
366 367
      "mapping_quality" :: Nil,
      "count" :: Nil,
368 369 370 371 372 373
      "bammetrics",
      "bamstats",
      "Mapping quality",
      "Reads",
      "Mapping quality"
    )
374 375 376
  }

  def clippingPlot(outputDir: File,
Peter van 't Hof's avatar
Peter van 't Hof committed
377
                   prefix: String,
Peter van 't Hof's avatar
Peter van 't Hof committed
378
                   summary: SummaryDb,
Peter van 't Hof's avatar
Peter van 't Hof committed
379
                   libraryLevel: Boolean = false,
Peter van 't Hof's avatar
Peter van 't Hof committed
380
                   sampleId: Option[Int] = None,
Peter van 't Hof's avatar
Peter van 't Hof committed
381 382 383 384
                   libraryId: Option[Int] = None): Unit = {
    val statsPaths = Map(
      "clipping" -> List("clipping", "histogram", "values"),
      "count" -> List("clipping", "histogram", "counts")
385 386
    )

387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402
    writePlotFromSummary(
      outputDir,
      prefix,
      summary,
      libraryLevel,
      sampleId,
      libraryId,
      statsPaths,
      "clipping" :: Nil,
      "count" :: Nil,
      "bammetrics",
      "bamstats",
      "Clipping",
      "Reads",
      "Clipping"
    )
403 404
  }

405
  /**
406 407 408 409 410 411 412 413
    * Generate a line plot for wgs coverage
    *
    * @param outputDir OutputDir for the tsv and png file
    * @param prefix Prefix of the tsv and png file
    * @param summary Summary class
    * @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats
    * @param sampleId Default it selects all sampples, when sample is giving it limits to selected sample
    */
Peter van 't Hof's avatar
Peter van 't Hof committed
414
  def wgsHistogramPlot(outputDir: File,
Peter van 't Hof's avatar
Peter van 't Hof committed
415
                       prefix: String,
Peter van 't Hof's avatar
Peter van 't Hof committed
416
                       summary: SummaryDb,
Peter van 't Hof's avatar
Peter van 't Hof committed
417
                       libraryLevel: Boolean = false,
Peter van 't Hof's avatar
Peter van 't Hof committed
418
                       sampleId: Option[Int] = None,
Peter van 't Hof's avatar
Peter van 't Hof committed
419 420
                       libraryId: Option[Int] = None): Unit = {
    val statsPaths = Map(
Peter van 't Hof's avatar
Peter van 't Hof committed
421
      "coverage" -> List("histogram", "coverage"),
422 423
      "count" -> List("histogram", "count"),
      "high_quality_coverage_count" -> List("histogram", "high_quality_coverage_count")
Peter van 't Hof's avatar
Peter van 't Hof committed
424
    )
Peter van 't Hof's avatar
Peter van 't Hof committed
425

426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441
    writePlotFromSummary(
      outputDir,
      prefix,
      summary,
      libraryLevel,
      sampleId,
      libraryId,
      statsPaths,
      "coverage" :: Nil,
      "count" :: "high_quality_coverage_count" :: Nil,
      "bammetrics",
      "wgs",
      "Coverage",
      "Bases",
      "Whole genome coverage"
    )
Peter van 't Hof's avatar
Peter van 't Hof committed
442
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
443 444

  /**
445 446 447 448 449 450 451 452
    * Generate a line plot for rna coverage
    *
    * @param outputDir OutputDir for the tsv and png file
    * @param prefix Prefix of the tsv and png file
    * @param summary Summary class
    * @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats
    * @param sampleId Default it selects all sampples, when sample is giving it limits to selected sample
    */
Peter van 't Hof's avatar
Peter van 't Hof committed
453 454
  def rnaHistogramPlot(outputDir: File,
                       prefix: String,
Peter van 't Hof's avatar
Peter van 't Hof committed
455
                       summary: SummaryDb,
Peter van 't Hof's avatar
Peter van 't Hof committed
456
                       libraryLevel: Boolean = false,
Peter van 't Hof's avatar
Peter van 't Hof committed
457 458 459
                       sampleId: Option[Int] = None,
                       libraryId: Option[Int] = None): Unit = {
    val statsPaths = Map(
Peter van 't Hof's avatar
Peter van 't Hof committed
460 461
      "position" -> List("histogram", "normalized_position"),
      "count" -> List("histogram", "All_Reads.normalized_coverage")
Peter van 't Hof's avatar
Peter van 't Hof committed
462
    )
Peter van 't Hof's avatar
Peter van 't Hof committed
463

464 465 466 467 468 469 470 471
    writePlotFromSummary(
      outputDir,
      prefix,
      summary,
      libraryLevel,
      sampleId,
      libraryId,
      statsPaths,
472 473
      "position" :: Nil,
      "count" :: Nil,
474 475 476 477 478 479
      "bammetrics",
      "rna",
      "Relative position",
      "Coverage",
      "Rna coverage"
    )
Peter van 't Hof's avatar
Peter van 't Hof committed
480 481
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
482
  def mergeTables(tables: Array[Map[String, Array[Any]]],
483 484
                  mergeColumn: String,
                  defaultValue: Any = 0): Map[String, Array[Any]] = {
Peter van 't Hof's avatar
Peter van 't Hof committed
485 486
    val keys = tables.flatMap(x => x(mergeColumn)).distinct
    (for (table <- tables; (columnKey, columnValues) <- table if columnKey != mergeColumn) yield {
487 488
      columnKey -> keys.map(x =>
        table(mergeColumn).zip(columnValues).toMap.getOrElse(x, defaultValue))
Peter van 't Hof's avatar
Peter van 't Hof committed
489 490 491 492
    }).toMap + (mergeColumn -> keys)
  }

  def writeTableToTsv(tsvFile: File, table: Map[String, Array[Any]], firstColumn: String): Unit = {
493
    require(table.map(_._2.length).toList.distinct.size == 1,
494
            "Not all values has the same number or rows")
Peter van 't Hof's avatar
Peter van 't Hof committed
495 496 497
    val keys = table.keys.filterNot(_ == firstColumn).toList.sorted
    val writer = new PrintWriter(tsvFile)
    writer.println((firstColumn :: keys).mkString("\t"))
Peter van 't Hof's avatar
Peter van 't Hof committed
498 499 500
    table(firstColumn).zipWithIndex.foreach {
      case (c, i) =>
        writer.println((c :: keys.map(x => table(x)(i))).mkString("\t"))
Peter van 't Hof's avatar
Peter van 't Hof committed
501 502 503
    }
    writer.close()
  }
504
}