BammetricsReport.scala 15.1 KB
Newer Older
Peter van 't Hof's avatar
Peter van 't Hof committed
1 2 3 4 5 6 7 8 9 10
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
11
 * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
Peter van 't Hof's avatar
Peter van 't Hof committed
12 13 14
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
15 16
package nl.lumc.sasc.biopet.pipelines.bammetrics

Peter van 't Hof's avatar
Peter van 't Hof committed
17
import java.io.{ File, PrintWriter }
18

Peter van 't Hof's avatar
Peter van 't Hof committed
19
import nl.lumc.sasc.biopet.utils.config.Configurable
Peter van 't Hof's avatar
Peter van 't Hof committed
20
import nl.lumc.sasc.biopet.core.report.{ ReportBuilder, ReportBuilderExtension, ReportPage, ReportSection }
Peter van 't Hof's avatar
Peter van 't Hof committed
21
import nl.lumc.sasc.biopet.utils.ConfigUtils
Peter van 't Hof's avatar
Peter van 't Hof committed
22
import nl.lumc.sasc.biopet.utils.rscript.{ LinePlot, StackedBarPlot }
Peter van 't Hof's avatar
Peter van 't Hof committed
23
import nl.lumc.sasc.biopet.utils.summary.db.SummaryDb
Peter van 't Hof's avatar
Peter van 't Hof committed
24 25
import nl.lumc.sasc.biopet.utils.summary.db.SummaryDb.Implicts._
import nl.lumc.sasc.biopet.utils.summary.db.SummaryDb._
Peter van 't Hof's avatar
Peter van 't Hof committed
26

Peter van 't Hof's avatar
Peter van 't Hof committed
27
import scala.concurrent.{ Await, Future }
Peter van 't Hof's avatar
Peter van 't Hof committed
28
import scala.concurrent.duration.Duration
29

Peter van 't Hof's avatar
Peter van 't Hof committed
30
class BammetricsReport(val parent: Configurable) extends ReportBuilderExtension {
31
  def builder = BammetricsReport
32
}
33

34
/**
Peter van 't Hof's avatar
Peter van 't Hof committed
35 36
 * Object to create a report for [[BamMetrics]]
 *
37 38 39
 * Created by pjvan_thof on 3/30/15.
 */
object BammetricsReport extends ReportBuilder {
40

41
  /** Name of report */
42 43
  val reportName = "Bam Metrics"

44
  /** Root page for single BamMetrcis report */
Peter van 't Hof's avatar
Peter van 't Hof committed
45 46
  def indexPage: Future[ReportPage] = Future {
    val bamMetricsPage = Await.result(this.bamMetricsPage(summary, sampleId, libId), Duration.Inf)
Peter van 't Hof's avatar
Peter van 't Hof committed
47
    ReportPage(bamMetricsPage.subPages ::: List(
Peter van 't Hof's avatar
Peter van 't Hof committed
48 49 50
      "Versions" -> Future(ReportPage(List(), List("Executables" -> ReportSection("/nl/lumc/sasc/biopet/core/report/executables.ssp"
      )), Map())),
      "Files" -> Future(ReportPage(List(), List(), Map()))
Peter van 't Hof's avatar
Peter van 't Hof committed
51 52 53 54 55 56
    ), List(
      "Report" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/bamMetricsFront.ssp")
    ) ::: bamMetricsPage.sections,
      Map()
    )
  }
57

58
  /** Generates a page with alignment stats */
Peter van 't Hof's avatar
Peter van 't Hof committed
59 60 61
  def bamMetricsPage(summary: SummaryDb,
                     sampleId: Option[Int],
                     libId: Option[Int],
Peter van 't Hof's avatar
Peter van 't Hof committed
62
                     metricsTag: String = "bammetrics"): Future[ReportPage] = Future {
Peter van 't Hof's avatar
Peter van 't Hof committed
63

Peter van 't Hof's avatar
Peter van 't Hof committed
64
    //val pipelineId: Int = summary.getPipelineId(runId, metricsTag).map(_.get)
Peter van 't Hof's avatar
Peter van 't Hof committed
65

Peter van 't Hof's avatar
Peter van 't Hof committed
66 67
    val wgsExecuted = summary.getStatsSize(runId, metricsTag, "wgs", sample = sampleId.map(SampleId), library = libId.map(LibraryId)) >= 1
    val rnaExecuted = summary.getStatsSize(runId, metricsTag, "rna", sample = sampleId.map(SampleId), library = libId.map(LibraryId)) >= 1
Peter van 't Hof's avatar
Peter van 't Hof committed
68

Peter van 't Hof's avatar
Peter van 't Hof committed
69 70
    val insertsizeMetrics = summary.getStatKeys(runId, metricsTag, "CollectInsertSizeMetrics",
      sampleId.map(SampleId).getOrElse(NoSample), libId.map(LibraryId).getOrElse(NoLibrary), Map("metrics" -> List("metrics")))
Peter van 't Hof's avatar
Peter van 't Hof committed
71
      .exists(_._2.isDefined)
72

Peter van 't Hof's avatar
Peter van 't Hof committed
73 74
    val targetSettings = summary.getSettingKeys(runId, metricsTag, NoModule,
      sample = sampleId.map(SampleId).getOrElse(NoSample), library = libId.map(LibraryId).getOrElse(NoLibrary),
Peter van 't Hof's avatar
Peter van 't Hof committed
75
      Map("amplicon_name" -> List("amplicon_name"), "roi_name" -> List("roi_name")))
76
    val targets = (
Peter van 't Hof's avatar
Peter van 't Hof committed
77 78
      targetSettings("amplicon_name"),
      targetSettings("roi_name")
Peter van 't Hof's avatar
Peter van 't Hof committed
79 80 81 82 83
    ) match {
        case (Some(amplicon: String), Some(roi: List[_])) => amplicon :: roi.map(_.toString)
        case (_, Some(roi: List[_])) => roi.map(_.toString)
        case _ => Nil
      }
84 85

    ReportPage(
Peter van 't Hof's avatar
Peter van 't Hof committed
86
      if (targets.isEmpty) List()
Peter van 't Hof's avatar
Peter van 't Hof committed
87
      else List("Targets" -> Future(ReportPage(
88
        List(),
89
        targets.map(t => t -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/covstatsPlot.ssp", Map("target" -> Some(t)))),
Peter van 't Hof's avatar
Peter van 't Hof committed
90
        Map()))),
91
      List(
92 93 94
        "Summary" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/alignmentSummary.ssp"),
        "Mapping Quality" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/mappingQuality.ssp", Map("showPlot" -> true)),
        "Clipping" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/clipping.ssp", Map("showPlot" -> true))) ++
95
        (if (insertsizeMetrics) List("Insert Size" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/insertSize.ssp", Map("showPlot" -> true))
Peter van 't Hof's avatar
Peter van 't Hof committed
96 97
        )
        else Nil) ++ (if (wgsExecuted) List("Whole genome coverage" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/wgsHistogram.ssp",
Peter van 't Hof's avatar
Peter van 't Hof committed
98 99 100 101 102
          Map("showPlot" -> true)))
        else Nil) ++
        (if (rnaExecuted) List("Rna coverage" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/rnaHistogram.ssp",
          Map("showPlot" -> true)))
        else Nil),
103
      Map("metricsTag" -> metricsTag)
104 105 106
    )
  }

107 108
  /**
   * Generate a stackbar plot for alignment stats
Peter van 't Hof's avatar
Peter van 't Hof committed
109
   *
110 111 112 113 114 115
   * @param outputDir OutputDir for the tsv and png file
   * @param prefix Prefix of the tsv and png file
   * @param summary Summary class
   * @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats
   * @param sampleId Default it selects all sampples, when sample is giving it limits to selected sample
   */
116 117
  def alignmentSummaryPlot(outputDir: File,
                           prefix: String,
Peter van 't Hof's avatar
Peter van 't Hof committed
118
                           summary: SummaryDb,
119
                           libraryLevel: Boolean = false,
Peter van 't Hof's avatar
Peter van 't Hof committed
120
                           sampleId: Option[Int] = None): Unit = {
121 122 123 124 125 126
    val tsvFile = new File(outputDir, prefix + ".tsv")
    val pngFile = new File(outputDir, prefix + ".png")
    val tsvWriter = new PrintWriter(tsvFile)
    if (libraryLevel) tsvWriter.print("Library") else tsvWriter.print("Sample")
    tsvWriter.println("\tMapped\tDuplicates\tUnmapped\tSecondary")

Peter van 't Hof's avatar
Peter van 't Hof committed
127 128 129 130 131 132 133 134
    val statsPaths = Map(
      "Mapped" -> List("flagstats", "Mapped"),
      "Duplicates" -> List("flagstats", "Duplicates"),
      "All" -> List("flagstats", "All"),
      "NotPrimaryAlignment" -> List("flagstats", "NotPrimaryAlignment")
    )

    val results: Map[(Int, Option[Int]), Map[String, Option[Any]]] = if (libraryLevel) {
Peter van 't Hof's avatar
Peter van 't Hof committed
135
      summary.getStatsForLibraries(runId, "bammetrics", "bamstats",
Peter van 't Hof's avatar
Peter van 't Hof committed
136
        sampleId = sampleId, keyValues = statsPaths).map(x => (x._1._1, Some(x._1._2)) -> x._2)
Peter van 't Hof's avatar
Peter van 't Hof committed
137 138
    } else summary.getStatsForSamples(runId, "bammetrics", "bamstats",
      sample = sampleId.map(SampleId), keyValues = statsPaths).map(x => (x._1, None) -> x._2)
Peter van 't Hof's avatar
Peter van 't Hof committed
139

Peter van 't Hof's avatar
Peter van 't Hof committed
140
    for (((s, l), result) <- results) {
Peter van 't Hof's avatar
Peter van 't Hof committed
141 142
      val sampleName: String = summary.getSampleName(s).map(_.get)
      val libName: Option[String] = l.flatMap(x => Await.result(summary.getLibraryName(x), Duration.Inf))
143
      val sb = new StringBuffer()
Peter van 't Hof's avatar
Peter van 't Hof committed
144 145 146 147 148
      if (libName.isDefined) sb.append(sampleName + "-" + libName.get + "\t") else sb.append(sampleName + "\t")
      val mapped = ConfigUtils.any2long(result("Mapped"))
      val duplicates = ConfigUtils.any2long(result("Duplicates"))
      val total = ConfigUtils.any2long(result("All"))
      val secondary = ConfigUtils.any2long(result("NotPrimaryAlignment"))
Peter van 't Hof's avatar
Peter van 't Hof committed
149
      sb.append((mapped - duplicates - secondary) + "\t")
150
      sb.append(duplicates + "\t")
Peter van 't Hof's avatar
Peter van 't Hof committed
151
      sb.append((total - mapped) + "\t")
152
      sb.append(secondary)
Peter van 't Hof's avatar
Peter van 't Hof committed
153
      tsvWriter.println(sb.toString)
154 155 156 157 158 159 160 161
    }

    tsvWriter.close()

    val plot = new StackedBarPlot(null)
    plot.input = tsvFile
    plot.output = pngFile
    plot.ylabel = Some("Reads")
162
    plot.width = Some(200 + (results.size * 10))
Peter van 't Hof's avatar
WIP  
Peter van 't Hof committed
163
    plot.title = Some("Aligned_reads")
164 165 166
    plot.runLocal()
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
167
  def writePlotFromSummary(outputDir: File,
Peter van 't Hof's avatar
Peter van 't Hof committed
168 169 170 171 172 173 174
                           prefix: String,
                           summary: SummaryDb,
                           libraryLevel: Boolean = false,
                           sampleId: Option[Int] = None,
                           libraryId: Option[Int] = None,
                           statsPaths: Map[String, List[String]],
                           yKey: String,
Peter van 't Hof's avatar
Peter van 't Hof committed
175
                           xKey: String,
Peter van 't Hof's avatar
Peter van 't Hof committed
176 177
                           pipeline: PipelineQuery,
                           module: ModuleQuery,
Peter van 't Hof's avatar
Peter van 't Hof committed
178 179 180 181
                           xlabel: Option[String] = None,
                           ylabel: Option[String] = None,
                           title: Option[String] = None,
                           removeZero: Boolean = true): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
182 183 184 185 186 187
    val tsvFile = new File(outputDir, prefix + ".tsv")
    val pngFile = new File(outputDir, prefix + ".png")

    val results: Map[(Int, Option[Int]), Map[String, Option[Array[Any]]]] = if (libraryLevel) {
      summary.getStatsForLibraries(runId, pipeline, module, sampleId = sampleId, keyValues = statsPaths)
        .map(x => (x._1._1, Some(x._1._2)) -> x._2.map(x => x._1 -> x._2.map(ConfigUtils.any2list(_).toArray)))
Peter van 't Hof's avatar
Peter van 't Hof committed
188
    } else summary.getStatsForSamples(runId, pipeline, module, sample = sampleId.map(SampleId), keyValues = statsPaths)
Peter van 't Hof's avatar
Peter van 't Hof committed
189 190
      .map(x => (x._1, None) -> x._2.map(x => x._1 -> x._2.map(ConfigUtils.any2list(_).toArray)))

Peter van 't Hof's avatar
Peter van 't Hof committed
191 192
    val tables: Array[Map[String, Array[Any]]] = results.map {
      case ((sample, library), map) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
193 194 195
        val sampleName = Await.result(summary.getSampleName(sample), Duration.Inf)
          .getOrElse(throw new IllegalStateException("Sample must be there"))
        val libraryName = library.flatMap(l => Await.result(summary.getLibraryName(l), Duration.Inf))
Peter van 't Hof's avatar
Peter van 't Hof committed
196 197
        Map(
          yKey -> map(yKey).getOrElse(Array()),
Peter van 't Hof's avatar
Peter van 't Hof committed
198
          (sampleName + libraryName.map("-" + _).getOrElse("")) -> map(xKey).getOrElse(Array())
Peter van 't Hof's avatar
Peter van 't Hof committed
199
        )
Peter van 't Hof's avatar
Peter van 't Hof committed
200 201 202 203 204 205 206 207
    }.toArray

    writeTableToTsv(tsvFile, mergeTables(tables, yKey), yKey)

    LinePlot(tsvFile, pngFile,
      xlabel = xlabel,
      ylabel = ylabel,
      title = title,
208
      hideLegend = results.size > 40,
Peter van 't Hof's avatar
Peter van 't Hof committed
209 210 211
      removeZero = removeZero).runLocal()
  }

212 213
  /**
   * Generate a line plot for insertsize
Peter van 't Hof's avatar
Peter van 't Hof committed
214
   *
215 216 217 218 219 220
   * @param outputDir OutputDir for the tsv and png file
   * @param prefix Prefix of the tsv and png file
   * @param summary Summary class
   * @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats
   * @param sampleId Default it selects all sampples, when sample is giving it limits to selected sample
   */
Peter van 't Hof's avatar
Peter van 't Hof committed
221
  def insertSizePlot(outputDir: File,
Peter van 't Hof's avatar
Peter van 't Hof committed
222
                     prefix: String,
Peter van 't Hof's avatar
Peter van 't Hof committed
223
                     summary: SummaryDb,
Peter van 't Hof's avatar
Peter van 't Hof committed
224
                     libraryLevel: Boolean = false,
Peter van 't Hof's avatar
Peter van 't Hof committed
225
                     sampleId: Option[Int] = None,
Peter van 't Hof's avatar
Peter van 't Hof committed
226
                     libraryId: Option[Int] = None): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
227 228
    val statsPaths = Map(
      "insert_size" -> List("histogram", "insert_size"),
Peter van 't Hof's avatar
Peter van 't Hof committed
229
      "count" -> List("histogram", "All_Reads.fr_count")
Peter van 't Hof's avatar
Peter van 't Hof committed
230
    )
Peter van 't Hof's avatar
Peter van 't Hof committed
231

Peter van 't Hof's avatar
Peter van 't Hof committed
232
    writePlotFromSummary(outputDir, prefix, summary, libraryLevel, sampleId, libraryId, statsPaths,
Peter van 't Hof's avatar
Peter van 't Hof committed
233
      "insert_size", "count", "bammetrics", "CollectInsertSizeMetrics",
Peter van 't Hof's avatar
Peter van 't Hof committed
234
      "Insert size", "Reads", "Insert size")
Peter van 't Hof's avatar
Peter van 't Hof committed
235
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
236

237
  def mappingQualityPlot(outputDir: File,
Peter van 't Hof's avatar
Peter van 't Hof committed
238
                         prefix: String,
Peter van 't Hof's avatar
Peter van 't Hof committed
239
                         summary: SummaryDb,
Peter van 't Hof's avatar
Peter van 't Hof committed
240
                         libraryLevel: Boolean = false,
Peter van 't Hof's avatar
Peter van 't Hof committed
241
                         sampleId: Option[Int] = None,
Peter van 't Hof's avatar
Peter van 't Hof committed
242 243 244 245
                         libraryId: Option[Int] = None): Unit = {
    val statsPaths = Map(
      "mapping_quality" -> List("mapping_quality", "histogram", "values"),
      "count" -> List("mapping_quality", "histogram", "counts")
246 247
    )

Peter van 't Hof's avatar
Peter van 't Hof committed
248
    writePlotFromSummary(outputDir, prefix, summary, libraryLevel, sampleId, libraryId, statsPaths,
Peter van 't Hof's avatar
Peter van 't Hof committed
249
      "mapping_quality", "count", "bammetrics", "bamstats",
Peter van 't Hof's avatar
Peter van 't Hof committed
250
      "Mapping quality", "Reads", "Mapping quality")
251 252 253
  }

  def clippingPlot(outputDir: File,
Peter van 't Hof's avatar
Peter van 't Hof committed
254
                   prefix: String,
Peter van 't Hof's avatar
Peter van 't Hof committed
255
                   summary: SummaryDb,
Peter van 't Hof's avatar
Peter van 't Hof committed
256
                   libraryLevel: Boolean = false,
Peter van 't Hof's avatar
Peter van 't Hof committed
257
                   sampleId: Option[Int] = None,
Peter van 't Hof's avatar
Peter van 't Hof committed
258 259 260 261
                   libraryId: Option[Int] = None): Unit = {
    val statsPaths = Map(
      "clipping" -> List("clipping", "histogram", "values"),
      "count" -> List("clipping", "histogram", "counts")
262 263
    )

Peter van 't Hof's avatar
Peter van 't Hof committed
264
    writePlotFromSummary(outputDir, prefix, summary, libraryLevel, sampleId, libraryId, statsPaths,
Peter van 't Hof's avatar
Peter van 't Hof committed
265
      "clipping", "count", "bammetrics", "bamstats",
Peter van 't Hof's avatar
Peter van 't Hof committed
266
      "Clipping", "Reads", "Clipping")
267 268
  }

269 270
  /**
   * Generate a line plot for wgs coverage
Peter van 't Hof's avatar
Peter van 't Hof committed
271
   *
272 273 274 275 276 277
   * @param outputDir OutputDir for the tsv and png file
   * @param prefix Prefix of the tsv and png file
   * @param summary Summary class
   * @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats
   * @param sampleId Default it selects all sampples, when sample is giving it limits to selected sample
   */
Peter van 't Hof's avatar
Peter van 't Hof committed
278
  def wgsHistogramPlot(outputDir: File,
Peter van 't Hof's avatar
Peter van 't Hof committed
279
                       prefix: String,
Peter van 't Hof's avatar
Peter van 't Hof committed
280
                       summary: SummaryDb,
Peter van 't Hof's avatar
Peter van 't Hof committed
281
                       libraryLevel: Boolean = false,
Peter van 't Hof's avatar
Peter van 't Hof committed
282
                       sampleId: Option[Int] = None,
Peter van 't Hof's avatar
Peter van 't Hof committed
283 284
                       libraryId: Option[Int] = None): Unit = {
    val statsPaths = Map(
Peter van 't Hof's avatar
Peter van 't Hof committed
285 286
      "coverage" -> List("histogram", "coverage"),
      "count" -> List("histogram", "count")
Peter van 't Hof's avatar
Peter van 't Hof committed
287
    )
Peter van 't Hof's avatar
Peter van 't Hof committed
288

Peter van 't Hof's avatar
Peter van 't Hof committed
289
    writePlotFromSummary(outputDir, prefix, summary, libraryLevel, sampleId, libraryId, statsPaths,
Peter van 't Hof's avatar
Peter van 't Hof committed
290
      "coverage", "count", "bammetrics", "wgs",
Peter van 't Hof's avatar
Peter van 't Hof committed
291
      "Coverage", "Bases", "Whole genome coverage")
Peter van 't Hof's avatar
Peter van 't Hof committed
292
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
293 294

  /**
Peter van 't Hof's avatar
Peter van 't Hof committed
295
   * Generate a line plot for rna coverage
Peter van 't Hof's avatar
Peter van 't Hof committed
296
   *
Peter van 't Hof's avatar
Peter van 't Hof committed
297 298 299 300 301 302
   * @param outputDir OutputDir for the tsv and png file
   * @param prefix Prefix of the tsv and png file
   * @param summary Summary class
   * @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats
   * @param sampleId Default it selects all sampples, when sample is giving it limits to selected sample
   */
Peter van 't Hof's avatar
Peter van 't Hof committed
303 304
  def rnaHistogramPlot(outputDir: File,
                       prefix: String,
Peter van 't Hof's avatar
Peter van 't Hof committed
305
                       summary: SummaryDb,
Peter van 't Hof's avatar
Peter van 't Hof committed
306
                       libraryLevel: Boolean = false,
Peter van 't Hof's avatar
Peter van 't Hof committed
307 308 309 310 311
                       sampleId: Option[Int] = None,
                       libraryId: Option[Int] = None): Unit = {
    val statsPaths = Map(
      "position" -> List("rna", "histogram", "normalized_position"),
      "count" -> List("rna", "histogram", "All_Reads.normalized_coverage")
Peter van 't Hof's avatar
Peter van 't Hof committed
312
    )
Peter van 't Hof's avatar
Peter van 't Hof committed
313

Peter van 't Hof's avatar
Peter van 't Hof committed
314
    writePlotFromSummary(outputDir, prefix, summary, libraryLevel, sampleId, libraryId, statsPaths,
Peter van 't Hof's avatar
Peter van 't Hof committed
315
      "coverage", "count", "bammetrics", "rna",
Peter van 't Hof's avatar
Peter van 't Hof committed
316
      "Relative position", "Coverage", "Rna coverage")
Peter van 't Hof's avatar
Peter van 't Hof committed
317 318
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
319 320
  def mergeTables(tables: Array[Map[String, Array[Any]]],
                  mergeColumn: String, defaultValue: Any = 0): Map[String, Array[Any]] = {
Peter van 't Hof's avatar
Peter van 't Hof committed
321 322 323 324 325 326 327
    val keys = tables.flatMap(x => x(mergeColumn)).distinct
    (for (table <- tables; (columnKey, columnValues) <- table if columnKey != mergeColumn) yield {
      columnKey -> keys.map(x => table(mergeColumn).zip(columnValues).toMap.getOrElse(x, defaultValue))
    }).toMap + (mergeColumn -> keys)
  }

  def writeTableToTsv(tsvFile: File, table: Map[String, Array[Any]], firstColumn: String): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
328
    require(table.map(_._2.size).toList.distinct.size == 1, "Not all values has the same number or rows")
Peter van 't Hof's avatar
Peter van 't Hof committed
329 330 331
    val keys = table.keys.filterNot(_ == firstColumn).toList.sorted
    val writer = new PrintWriter(tsvFile)
    writer.println((firstColumn :: keys).mkString("\t"))
Peter van 't Hof's avatar
Peter van 't Hof committed
332 333 334
    table(firstColumn).zipWithIndex.foreach {
      case (c, i) =>
        writer.println((c :: keys.map(x => table(x)(i))).mkString("\t"))
Peter van 't Hof's avatar
Peter van 't Hof committed
335 336 337
    }
    writer.close()
  }
338
}