Seqstat.scala 2.55 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
16
17
18
19
20
21
22
23
package nl.lumc.sasc.biopet.extensions

/*
 * Wrapper around the seqstat implemented in D
 * 
 */

import argonaut._, Argonaut._
Peter van 't Hof's avatar
Peter van 't Hof committed
24
25
import nl.lumc.sasc.biopet.core.summary.Summarizable
import nl.lumc.sasc.biopet.utils.ConfigUtils
26
27
import scalaz._, Scalaz._
import scala.io.Source
Peter van 't Hof's avatar
Peter van 't Hof committed
28
import scala.collection.mutable
29
30
31
32
33
34

import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction
import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
import java.io.File

Peter van 't Hof's avatar
Peter van 't Hof committed
35
class Seqstat(val root: Configurable) extends BiopetCommandLineFunction with Summarizable {
36
37
38
39
40
41
42
43
44
45
46
  override val defaultVmem = "4G"

  @Input(doc = "Input FastQ", required = true)
  var input: File = _

  @Output(doc = "JSON summary", required = true)
  var output: File = _

  executable = config("exe", default = "fastq-seqstat")

  def cmdLine = required(executable) + required(input) + " > " + required(output)
Peter van 't Hof's avatar
Peter van 't Hof committed
47

48
  def summaryStats: Map[String, Any] = {
Peter van 't Hof's avatar
Peter van 't Hof committed
49
50
51
52
53
    val map = ConfigUtils.fileToConfigMap(output)

    ConfigUtils.any2map(map.getOrElse("stats", Map()))
  }

54
  def summaryFiles: Map[String, File] = Map()
Peter van 't Hof's avatar
Peter van 't Hof committed
55
56
57
58
59
60
61
62
63

  override def resolveSummaryConflict(v1: Any, v2: Any, key: String): Any = {
    (v1, v2) match {
      case (v1: Int, v2: Int) if key == "len_min" => if (v1 < v2) v1 else v2
      case (v1: Int, v2: Int) if key == "len_max" => if (v1 > v2) v1 else v2
      case (v1: Int, v2: Int)                     => v1 + v2
      case _                                      => v1
    }
  }
64
65
66
67
68
69
70
71
72
73
74
}

object Seqstat {
  def apply(root: Configurable, fastqfile: File, outDir: String): Seqstat = {
    val seqstat = new Seqstat(root)
    val ext = fastqfile.getName.substring(fastqfile.getName.lastIndexOf("."))
    seqstat.input = fastqfile
    seqstat.output = new File(outDir + fastqfile.getName.substring(0, fastqfile.getName.lastIndexOf(".")) + ".seqstats.json")
    return seqstat
  }
}