BammetricsReport.scala 15.1 KB
Newer Older
Peter van 't Hof's avatar
Peter van 't Hof committed
1 2 3 4 5 6 7 8 9 10
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
11
 * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
Peter van 't Hof's avatar
Peter van 't Hof committed
12 13 14
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
15 16
package nl.lumc.sasc.biopet.pipelines.bammetrics

Peter van 't Hof's avatar
Peter van 't Hof committed
17
import java.io.{ File, PrintWriter }
18

Peter van 't Hof's avatar
Peter van 't Hof committed
19
import nl.lumc.sasc.biopet.utils.config.Configurable
Peter van 't Hof's avatar
Peter van 't Hof committed
20
import nl.lumc.sasc.biopet.core.report.{ ReportBuilder, ReportBuilderExtension, ReportPage, ReportSection }
Peter van 't Hof's avatar
Peter van 't Hof committed
21
import nl.lumc.sasc.biopet.utils.ConfigUtils
Peter van 't Hof's avatar
Peter van 't Hof committed
22
import nl.lumc.sasc.biopet.utils.rscript.{ LinePlot, StackedBarPlot }
Peter van 't Hof's avatar
Peter van 't Hof committed
23
import nl.lumc.sasc.biopet.utils.summary.db.SummaryDb
Peter van 't Hof's avatar
Peter van 't Hof committed
24 25
import nl.lumc.sasc.biopet.utils.summary.db.SummaryDb.Implicts._
import nl.lumc.sasc.biopet.utils.summary.db.SummaryDb._
Peter van 't Hof's avatar
Peter van 't Hof committed
26

Peter van 't Hof's avatar
Peter van 't Hof committed
27
import scala.concurrent.{ Await, Future }
Peter van 't Hof's avatar
Peter van 't Hof committed
28
import scala.concurrent.duration.Duration
29

Peter van 't Hof's avatar
Peter van 't Hof committed
30
class BammetricsReport(val parent: Configurable) extends ReportBuilderExtension {
31
  def builder = BammetricsReport
32
}
33

34
/**
Peter van 't Hof's avatar
Peter van 't Hof committed
35 36
 * Object to create a report for [[BamMetrics]]
 *
37 38 39
 * Created by pjvan_thof on 3/30/15.
 */
object BammetricsReport extends ReportBuilder {
40

41
  /** Name of report */
42 43
  val reportName = "Bam Metrics"

44 45
  def pipelineName = "bammetrics"

46
  /** Root page for single BamMetrcis report */
Peter van 't Hof's avatar
Peter van 't Hof committed
47 48
  def indexPage: Future[ReportPage] = Future {
    val bamMetricsPage = Await.result(this.bamMetricsPage(summary, sampleId, libId), Duration.Inf)
Peter van 't Hof's avatar
Peter van 't Hof committed
49
    ReportPage(bamMetricsPage.subPages ::: List(
Peter van 't Hof's avatar
Peter van 't Hof committed
50 51
      "Versions" -> Future(ReportPage(List(), List("Executables" -> ReportSection("/nl/lumc/sasc/biopet/core/report/executables.ssp"
      )), Map())),
52
      "Files" -> filesPage(sampleId, libId)
Peter van 't Hof's avatar
Peter van 't Hof committed
53 54 55 56 57 58
    ), List(
      "Report" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/bamMetricsFront.ssp")
    ) ::: bamMetricsPage.sections,
      Map()
    )
  }
59

60
  /** Generates a page with alignment stats */
Peter van 't Hof's avatar
Peter van 't Hof committed
61 62 63
  def bamMetricsPage(summary: SummaryDb,
                     sampleId: Option[Int],
                     libId: Option[Int],
Peter van 't Hof's avatar
Peter van 't Hof committed
64
                     metricsTag: String = "bammetrics"): Future[ReportPage] = Future {
Peter van 't Hof's avatar
Peter van 't Hof committed
65

Peter van 't Hof's avatar
Peter van 't Hof committed
66
    //val pipelineId: Int = summary.getPipelineId(runId, metricsTag).map(_.get)
Peter van 't Hof's avatar
Peter van 't Hof committed
67

Peter van 't Hof's avatar
Peter van 't Hof committed
68 69
    val wgsExecuted = summary.getStatsSize(runId, metricsTag, "wgs", sample = sampleId.map(SampleId), library = libId.map(LibraryId)) >= 1
    val rnaExecuted = summary.getStatsSize(runId, metricsTag, "rna", sample = sampleId.map(SampleId), library = libId.map(LibraryId)) >= 1
Peter van 't Hof's avatar
Peter van 't Hof committed
70

Peter van 't Hof's avatar
Peter van 't Hof committed
71 72
    val insertsizeMetrics = summary.getStatKeys(runId, metricsTag, "CollectInsertSizeMetrics",
      sampleId.map(SampleId).getOrElse(NoSample), libId.map(LibraryId).getOrElse(NoLibrary), Map("metrics" -> List("metrics")))
Peter van 't Hof's avatar
Peter van 't Hof committed
73
      .exists(_._2.isDefined)
74

Peter van 't Hof's avatar
Peter van 't Hof committed
75 76
    val targetSettings = summary.getSettingKeys(runId, metricsTag, NoModule,
      sample = sampleId.map(SampleId).getOrElse(NoSample), library = libId.map(LibraryId).getOrElse(NoLibrary),
Peter van 't Hof's avatar
Peter van 't Hof committed
77
      Map("amplicon_name" -> List("amplicon_name"), "roi_name" -> List("roi_name")))
78
    val targets = (
Peter van 't Hof's avatar
Peter van 't Hof committed
79 80
      targetSettings("amplicon_name"),
      targetSettings("roi_name")
Peter van 't Hof's avatar
Peter van 't Hof committed
81 82 83 84 85
    ) match {
        case (Some(amplicon: String), Some(roi: List[_])) => amplicon :: roi.map(_.toString)
        case (_, Some(roi: List[_])) => roi.map(_.toString)
        case _ => Nil
      }
86 87

    ReportPage(
Peter van 't Hof's avatar
Peter van 't Hof committed
88
      if (targets.isEmpty) List()
Peter van 't Hof's avatar
Peter van 't Hof committed
89
      else List("Targets" -> Future(ReportPage(
90
        List(),
91
        targets.map(t => t -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/covstatsPlot.ssp", Map("target" -> Some(t)))),
Peter van 't Hof's avatar
Peter van 't Hof committed
92
        Map()))),
93
      List(
94 95 96
        "Summary" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/alignmentSummary.ssp"),
        "Mapping Quality" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/mappingQuality.ssp", Map("showPlot" -> true)),
        "Clipping" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/clipping.ssp", Map("showPlot" -> true))) ++
97
        (if (insertsizeMetrics) List("Insert Size" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/insertSize.ssp", Map("showPlot" -> true))
Peter van 't Hof's avatar
Peter van 't Hof committed
98 99
        )
        else Nil) ++ (if (wgsExecuted) List("Whole genome coverage" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/wgsHistogram.ssp",
Peter van 't Hof's avatar
Peter van 't Hof committed
100 101 102 103 104
          Map("showPlot" -> true)))
        else Nil) ++
        (if (rnaExecuted) List("Rna coverage" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/rnaHistogram.ssp",
          Map("showPlot" -> true)))
        else Nil),
105
      Map("metricsTag" -> metricsTag)
106 107 108
    )
  }

109 110
  /**
   * Generate a stackbar plot for alignment stats
Peter van 't Hof's avatar
Peter van 't Hof committed
111
   *
112 113 114 115 116 117
   * @param outputDir OutputDir for the tsv and png file
   * @param prefix Prefix of the tsv and png file
   * @param summary Summary class
   * @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats
   * @param sampleId Default it selects all sampples, when sample is giving it limits to selected sample
   */
118 119
  def alignmentSummaryPlot(outputDir: File,
                           prefix: String,
Peter van 't Hof's avatar
Peter van 't Hof committed
120
                           summary: SummaryDb,
121
                           libraryLevel: Boolean = false,
Peter van 't Hof's avatar
Peter van 't Hof committed
122
                           sampleId: Option[Int] = None): Unit = {
123 124 125 126 127 128
    val tsvFile = new File(outputDir, prefix + ".tsv")
    val pngFile = new File(outputDir, prefix + ".png")
    val tsvWriter = new PrintWriter(tsvFile)
    if (libraryLevel) tsvWriter.print("Library") else tsvWriter.print("Sample")
    tsvWriter.println("\tMapped\tDuplicates\tUnmapped\tSecondary")

Peter van 't Hof's avatar
Peter van 't Hof committed
129 130 131 132 133 134 135 136
    val statsPaths = Map(
      "Mapped" -> List("flagstats", "Mapped"),
      "Duplicates" -> List("flagstats", "Duplicates"),
      "All" -> List("flagstats", "All"),
      "NotPrimaryAlignment" -> List("flagstats", "NotPrimaryAlignment")
    )

    val results: Map[(Int, Option[Int]), Map[String, Option[Any]]] = if (libraryLevel) {
Peter van 't Hof's avatar
Peter van 't Hof committed
137
      summary.getStatsForLibraries(runId, "bammetrics", "bamstats",
Peter van 't Hof's avatar
Peter van 't Hof committed
138
        sampleId = sampleId, keyValues = statsPaths).map(x => (x._1._1, Some(x._1._2)) -> x._2)
Peter van 't Hof's avatar
Peter van 't Hof committed
139 140
    } else summary.getStatsForSamples(runId, "bammetrics", "bamstats",
      sample = sampleId.map(SampleId), keyValues = statsPaths).map(x => (x._1, None) -> x._2)
Peter van 't Hof's avatar
Peter van 't Hof committed
141

Peter van 't Hof's avatar
Peter van 't Hof committed
142
    for (((s, l), result) <- results) {
Peter van 't Hof's avatar
Peter van 't Hof committed
143 144
      val sampleName: String = summary.getSampleName(s).map(_.get)
      val libName: Option[String] = l.flatMap(x => Await.result(summary.getLibraryName(x), Duration.Inf))
145
      val sb = new StringBuffer()
Peter van 't Hof's avatar
Peter van 't Hof committed
146 147 148 149 150
      if (libName.isDefined) sb.append(sampleName + "-" + libName.get + "\t") else sb.append(sampleName + "\t")
      val mapped = ConfigUtils.any2long(result("Mapped"))
      val duplicates = ConfigUtils.any2long(result("Duplicates"))
      val total = ConfigUtils.any2long(result("All"))
      val secondary = ConfigUtils.any2long(result("NotPrimaryAlignment"))
Peter van 't Hof's avatar
Peter van 't Hof committed
151
      sb.append((mapped - duplicates - secondary) + "\t")
152
      sb.append(duplicates + "\t")
Peter van 't Hof's avatar
Peter van 't Hof committed
153
      sb.append((total - mapped) + "\t")
154
      sb.append(secondary)
Peter van 't Hof's avatar
Peter van 't Hof committed
155
      tsvWriter.println(sb.toString)
156 157 158 159 160 161 162 163
    }

    tsvWriter.close()

    val plot = new StackedBarPlot(null)
    plot.input = tsvFile
    plot.output = pngFile
    plot.ylabel = Some("Reads")
164
    plot.width = Some(200 + (results.size * 10))
Peter van 't Hof's avatar
WIP  
Peter van 't Hof committed
165
    plot.title = Some("Aligned_reads")
166 167 168
    plot.runLocal()
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
169
  def writePlotFromSummary(outputDir: File,
Peter van 't Hof's avatar
Peter van 't Hof committed
170 171 172 173 174 175 176
                           prefix: String,
                           summary: SummaryDb,
                           libraryLevel: Boolean = false,
                           sampleId: Option[Int] = None,
                           libraryId: Option[Int] = None,
                           statsPaths: Map[String, List[String]],
                           yKey: String,
Peter van 't Hof's avatar
Peter van 't Hof committed
177
                           xKey: String,
Peter van 't Hof's avatar
Peter van 't Hof committed
178 179
                           pipeline: PipelineQuery,
                           module: ModuleQuery,
Peter van 't Hof's avatar
Peter van 't Hof committed
180 181 182 183
                           xlabel: Option[String] = None,
                           ylabel: Option[String] = None,
                           title: Option[String] = None,
                           removeZero: Boolean = true): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
184 185 186 187 188 189
    val tsvFile = new File(outputDir, prefix + ".tsv")
    val pngFile = new File(outputDir, prefix + ".png")

    val results: Map[(Int, Option[Int]), Map[String, Option[Array[Any]]]] = if (libraryLevel) {
      summary.getStatsForLibraries(runId, pipeline, module, sampleId = sampleId, keyValues = statsPaths)
        .map(x => (x._1._1, Some(x._1._2)) -> x._2.map(x => x._1 -> x._2.map(ConfigUtils.any2list(_).toArray)))
Peter van 't Hof's avatar
Peter van 't Hof committed
190
    } else summary.getStatsForSamples(runId, pipeline, module, sample = sampleId.map(SampleId), keyValues = statsPaths)
Peter van 't Hof's avatar
Peter van 't Hof committed
191 192
      .map(x => (x._1, None) -> x._2.map(x => x._1 -> x._2.map(ConfigUtils.any2list(_).toArray)))

Peter van 't Hof's avatar
Peter van 't Hof committed
193 194
    val tables: Array[Map[String, Array[Any]]] = results.map {
      case ((sample, library), map) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
195 196 197
        val sampleName = Await.result(summary.getSampleName(sample), Duration.Inf)
          .getOrElse(throw new IllegalStateException("Sample must be there"))
        val libraryName = library.flatMap(l => Await.result(summary.getLibraryName(l), Duration.Inf))
Peter van 't Hof's avatar
Peter van 't Hof committed
198 199
        Map(
          yKey -> map(yKey).getOrElse(Array()),
Peter van 't Hof's avatar
Peter van 't Hof committed
200
          (sampleName + libraryName.map("-" + _).getOrElse("")) -> map(xKey).getOrElse(Array())
Peter van 't Hof's avatar
Peter van 't Hof committed
201
        )
Peter van 't Hof's avatar
Peter van 't Hof committed
202 203 204 205 206 207 208 209
    }.toArray

    writeTableToTsv(tsvFile, mergeTables(tables, yKey), yKey)

    LinePlot(tsvFile, pngFile,
      xlabel = xlabel,
      ylabel = ylabel,
      title = title,
210
      hideLegend = results.size > 40,
Peter van 't Hof's avatar
Peter van 't Hof committed
211 212 213
      removeZero = removeZero).runLocal()
  }

214 215
  /**
   * Generate a line plot for insertsize
Peter van 't Hof's avatar
Peter van 't Hof committed
216
   *
217 218 219 220 221 222
   * @param outputDir OutputDir for the tsv and png file
   * @param prefix Prefix of the tsv and png file
   * @param summary Summary class
   * @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats
   * @param sampleId Default it selects all sampples, when sample is giving it limits to selected sample
   */
Peter van 't Hof's avatar
Peter van 't Hof committed
223
  def insertSizePlot(outputDir: File,
Peter van 't Hof's avatar
Peter van 't Hof committed
224
                     prefix: String,
Peter van 't Hof's avatar
Peter van 't Hof committed
225
                     summary: SummaryDb,
Peter van 't Hof's avatar
Peter van 't Hof committed
226
                     libraryLevel: Boolean = false,
Peter van 't Hof's avatar
Peter van 't Hof committed
227
                     sampleId: Option[Int] = None,
Peter van 't Hof's avatar
Peter van 't Hof committed
228
                     libraryId: Option[Int] = None): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
229 230
    val statsPaths = Map(
      "insert_size" -> List("histogram", "insert_size"),
Peter van 't Hof's avatar
Peter van 't Hof committed
231
      "count" -> List("histogram", "All_Reads.fr_count")
Peter van 't Hof's avatar
Peter van 't Hof committed
232
    )
Peter van 't Hof's avatar
Peter van 't Hof committed
233

Peter van 't Hof's avatar
Peter van 't Hof committed
234
    writePlotFromSummary(outputDir, prefix, summary, libraryLevel, sampleId, libraryId, statsPaths,
Peter van 't Hof's avatar
Peter van 't Hof committed
235
      "insert_size", "count", "bammetrics", "CollectInsertSizeMetrics",
Peter van 't Hof's avatar
Peter van 't Hof committed
236
      "Insert size", "Reads", "Insert size")
Peter van 't Hof's avatar
Peter van 't Hof committed
237
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
238

239
  def mappingQualityPlot(outputDir: File,
Peter van 't Hof's avatar
Peter van 't Hof committed
240
                         prefix: String,
Peter van 't Hof's avatar
Peter van 't Hof committed
241
                         summary: SummaryDb,
Peter van 't Hof's avatar
Peter van 't Hof committed
242
                         libraryLevel: Boolean = false,
Peter van 't Hof's avatar
Peter van 't Hof committed
243
                         sampleId: Option[Int] = None,
Peter van 't Hof's avatar
Peter van 't Hof committed
244 245 246 247
                         libraryId: Option[Int] = None): Unit = {
    val statsPaths = Map(
      "mapping_quality" -> List("mapping_quality", "histogram", "values"),
      "count" -> List("mapping_quality", "histogram", "counts")
248 249
    )

Peter van 't Hof's avatar
Peter van 't Hof committed
250
    writePlotFromSummary(outputDir, prefix, summary, libraryLevel, sampleId, libraryId, statsPaths,
Peter van 't Hof's avatar
Peter van 't Hof committed
251
      "mapping_quality", "count", "bammetrics", "bamstats",
Peter van 't Hof's avatar
Peter van 't Hof committed
252
      "Mapping quality", "Reads", "Mapping quality")
253 254 255
  }

  def clippingPlot(outputDir: File,
Peter van 't Hof's avatar
Peter van 't Hof committed
256
                   prefix: String,
Peter van 't Hof's avatar
Peter van 't Hof committed
257
                   summary: SummaryDb,
Peter van 't Hof's avatar
Peter van 't Hof committed
258
                   libraryLevel: Boolean = false,
Peter van 't Hof's avatar
Peter van 't Hof committed
259
                   sampleId: Option[Int] = None,
Peter van 't Hof's avatar
Peter van 't Hof committed
260 261 262 263
                   libraryId: Option[Int] = None): Unit = {
    val statsPaths = Map(
      "clipping" -> List("clipping", "histogram", "values"),
      "count" -> List("clipping", "histogram", "counts")
264 265
    )

Peter van 't Hof's avatar
Peter van 't Hof committed
266
    writePlotFromSummary(outputDir, prefix, summary, libraryLevel, sampleId, libraryId, statsPaths,
Peter van 't Hof's avatar
Peter van 't Hof committed
267
      "clipping", "count", "bammetrics", "bamstats",
Peter van 't Hof's avatar
Peter van 't Hof committed
268
      "Clipping", "Reads", "Clipping")
269 270
  }

271 272
  /**
   * Generate a line plot for wgs coverage
Peter van 't Hof's avatar
Peter van 't Hof committed
273
   *
274 275 276 277 278 279
   * @param outputDir OutputDir for the tsv and png file
   * @param prefix Prefix of the tsv and png file
   * @param summary Summary class
   * @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats
   * @param sampleId Default it selects all sampples, when sample is giving it limits to selected sample
   */
Peter van 't Hof's avatar
Peter van 't Hof committed
280
  def wgsHistogramPlot(outputDir: File,
Peter van 't Hof's avatar
Peter van 't Hof committed
281
                       prefix: String,
Peter van 't Hof's avatar
Peter van 't Hof committed
282
                       summary: SummaryDb,
Peter van 't Hof's avatar
Peter van 't Hof committed
283
                       libraryLevel: Boolean = false,
Peter van 't Hof's avatar
Peter van 't Hof committed
284
                       sampleId: Option[Int] = None,
Peter van 't Hof's avatar
Peter van 't Hof committed
285 286
                       libraryId: Option[Int] = None): Unit = {
    val statsPaths = Map(
Peter van 't Hof's avatar
Peter van 't Hof committed
287 288
      "coverage" -> List("histogram", "coverage"),
      "count" -> List("histogram", "count")
Peter van 't Hof's avatar
Peter van 't Hof committed
289
    )
Peter van 't Hof's avatar
Peter van 't Hof committed
290

Peter van 't Hof's avatar
Peter van 't Hof committed
291
    writePlotFromSummary(outputDir, prefix, summary, libraryLevel, sampleId, libraryId, statsPaths,
Peter van 't Hof's avatar
Peter van 't Hof committed
292
      "coverage", "count", "bammetrics", "wgs",
Peter van 't Hof's avatar
Peter van 't Hof committed
293
      "Coverage", "Bases", "Whole genome coverage")
Peter van 't Hof's avatar
Peter van 't Hof committed
294
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
295 296

  /**
Peter van 't Hof's avatar
Peter van 't Hof committed
297
   * Generate a line plot for rna coverage
Peter van 't Hof's avatar
Peter van 't Hof committed
298
   *
Peter van 't Hof's avatar
Peter van 't Hof committed
299 300 301 302 303 304
   * @param outputDir OutputDir for the tsv and png file
   * @param prefix Prefix of the tsv and png file
   * @param summary Summary class
   * @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats
   * @param sampleId Default it selects all sampples, when sample is giving it limits to selected sample
   */
Peter van 't Hof's avatar
Peter van 't Hof committed
305 306
  def rnaHistogramPlot(outputDir: File,
                       prefix: String,
Peter van 't Hof's avatar
Peter van 't Hof committed
307
                       summary: SummaryDb,
Peter van 't Hof's avatar
Peter van 't Hof committed
308
                       libraryLevel: Boolean = false,
Peter van 't Hof's avatar
Peter van 't Hof committed
309 310 311 312 313
                       sampleId: Option[Int] = None,
                       libraryId: Option[Int] = None): Unit = {
    val statsPaths = Map(
      "position" -> List("rna", "histogram", "normalized_position"),
      "count" -> List("rna", "histogram", "All_Reads.normalized_coverage")
Peter van 't Hof's avatar
Peter van 't Hof committed
314
    )
Peter van 't Hof's avatar
Peter van 't Hof committed
315

Peter van 't Hof's avatar
Peter van 't Hof committed
316
    writePlotFromSummary(outputDir, prefix, summary, libraryLevel, sampleId, libraryId, statsPaths,
Peter van 't Hof's avatar
Peter van 't Hof committed
317
      "coverage", "count", "bammetrics", "rna",
Peter van 't Hof's avatar
Peter van 't Hof committed
318
      "Relative position", "Coverage", "Rna coverage")
Peter van 't Hof's avatar
Peter van 't Hof committed
319 320
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
321 322
  def mergeTables(tables: Array[Map[String, Array[Any]]],
                  mergeColumn: String, defaultValue: Any = 0): Map[String, Array[Any]] = {
Peter van 't Hof's avatar
Peter van 't Hof committed
323 324 325 326 327 328 329
    val keys = tables.flatMap(x => x(mergeColumn)).distinct
    (for (table <- tables; (columnKey, columnValues) <- table if columnKey != mergeColumn) yield {
      columnKey -> keys.map(x => table(mergeColumn).zip(columnValues).toMap.getOrElse(x, defaultValue))
    }).toMap + (mergeColumn -> keys)
  }

  def writeTableToTsv(tsvFile: File, table: Map[String, Array[Any]], firstColumn: String): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
330
    require(table.map(_._2.size).toList.distinct.size == 1, "Not all values has the same number or rows")
Peter van 't Hof's avatar
Peter van 't Hof committed
331 332 333
    val keys = table.keys.filterNot(_ == firstColumn).toList.sorted
    val writer = new PrintWriter(tsvFile)
    writer.println((firstColumn :: keys).mkString("\t"))
Peter van 't Hof's avatar
Peter van 't Hof committed
334 335 336
    table(firstColumn).zipWithIndex.foreach {
      case (c, i) =>
        writer.println((c :: keys.map(x => table(x)(i))).mkString("\t"))
Peter van 't Hof's avatar
Peter van 't Hof committed
337 338 339
    }
    writer.close()
  }
340
}