MultiSampleQScript.scala 8.05 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
16
17
package nl.lumc.sasc.biopet.core

18
19
import java.io.File

20
import nl.lumc.sasc.biopet.core.MultiSampleQScript.Gender
Peter van 't Hof's avatar
Peter van 't Hof committed
21
import nl.lumc.sasc.biopet.core.summary.{ Summarizable, SummaryQScript }
Peter van 't Hof's avatar
Peter van 't Hof committed
22
import nl.lumc.sasc.biopet.utils.{ Logging, ConfigUtils }
23
import org.broadinstitute.gatk.queue.QScript
24

Peter van 't Hof's avatar
Peter van 't Hof committed
25
/** This trait creates a structured way of use multisample pipelines */
26
trait MultiSampleQScript extends SummaryQScript { qscript: QScript =>
27

28
  @Argument(doc = "Only Sample", shortName = "s", required = false, fullName = "sample")
29
  private[core] val onlySamples: List[String] = Nil
Peter van 't Hof's avatar
Peter van 't Hof committed
30

31
  require(globalConfig.map.contains("samples"), "No Samples found in config")
Peter van 't Hof's avatar
Peter van 't Hof committed
32

Peter van 't Hof's avatar
Peter van 't Hof committed
33
  /** Sample class with basic functions build in */
34
  abstract class AbstractSample(val sampleId: String) extends Summarizable {
Peter van 't Hof's avatar
Peter van 't Hof committed
35
    /** Overrules config of qscript with default sample */
Peter van 't Hof's avatar
Peter van 't Hof committed
36
    val config = new ConfigFunctions(defaultSample = sampleId)
Peter van 't Hof's avatar
Peter van 't Hof committed
37

38
39
40
    /** Sample specific settings */
    def summarySettings: Map[String, Any] = Map()

Peter van 't Hof's avatar
Peter van 't Hof committed
41
    /** Library class with basic functions build in */
42
    abstract class AbstractLibrary(val libId: String) extends Summarizable {
Peter van 't Hof's avatar
Peter van 't Hof committed
43
      /** Overrules config of qscript with default sample and default library */
44
      val config = new ConfigFunctions(defaultSample = sampleId, defaultLibrary = libId)
45

Peter van 't Hof's avatar
Peter van 't Hof committed
46
      /** Name overules the one from qscript */
47
48
49
50
      def addSummarizable(summarizable: Summarizable, name: String): Unit = {
        qscript.addSummarizable(summarizable, name, Some(sampleId), Some(libId))
      }

51
52
53
      /** Library specific settings */
      def summarySettings: Map[String, Any] = Map()

Peter van 't Hof's avatar
Peter van 't Hof committed
54
      /** Adds the library jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
55
      final def addAndTrackJobs(): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
56
        if (nameRegex.findFirstIn(libId) == None)
Peter van 't Hof's avatar
Peter van 't Hof committed
57
          Logging.addError(s"Library '$libId' $nameError")
58
        currentSample = Some(sampleId)
59
        currentLib = Some(libId)
Peter van 't Hof's avatar
Peter van 't Hof committed
60
        addJobs()
61
        qscript.addSummarizable(this, "pipeline", Some(sampleId), Some(libId))
Peter van 't Hof's avatar
Peter van 't Hof committed
62
        currentLib = None
63
64
65
        currentSample = None
      }

Peter van 't Hof's avatar
Peter van 't Hof committed
66
      /** Creates a library file with given suffix */
67
      def createFile(suffix: String): File = new File(libDir, sampleId + "-" + libId + suffix)
Peter van 't Hof's avatar
Peter van 't Hof committed
68
69

      /** Returns library directory */
Peter van 't Hof's avatar
Peter van 't Hof committed
70
      def libDir = new File(sampleDir, "lib_" + libId)
71

72
73
      lazy val groups: List[String] = config("groups", sample = sampleId, library = libId)

Peter van 't Hof's avatar
Peter van 't Hof committed
74
      /** Function that add library jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
75
      protected def addJobs()
76
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
77

Peter van 't Hof's avatar
Peter van 't Hof committed
78
    /** Library type, need implementation in pipeline */
79
    type Library <: AbstractLibrary
Peter van 't Hof's avatar
Peter van 't Hof committed
80

Peter van 't Hof's avatar
Peter van 't Hof committed
81
82
    /** Stores all libraries */
    val libraries: Map[String, Library] = libIds.map(id => id -> makeLibrary(id)).toMap
83

84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
    lazy val gender = {
      val g: Option[String] = config("gender", sample = sampleId, library = null)
      g.map(_.toLowerCase) match {
        case Some("male")   => Gender.Male
        case Some("female") => Gender.Female
        case _              => Gender.Unknown
      }
    }

    lazy val father = {
      val g: Option[String] = config("gender", sample = sampleId, library = null)
      g.foreach { father =>
        if (sampleId != father) Logging.addError(s"Father for $sampleId can not be itself")
        if (samples.contains(father)) if (samples(father).gender == Gender.Male)
          Logging.addError(s"Father of $sampleId is not a female")
        else logger.warn(s"For sample '$sampleId' is father '$father' not found in config")
      }
      g
    }

    lazy val mother = {
      val g: Option[String] = config("gender", sample = sampleId, library = null)
      g.foreach { mother =>
        if (sampleId != mother) Logging.addError(s"mother for $sampleId can not be itself")
        if (samples.contains(mother)) if (samples(mother).gender == Gender.Female)
          Logging.addError(s"Mother of $sampleId is not a female")
        else logger.warn(s"For sample '$sampleId' is mother '$mother' not found in config")
      }
      g
    }

    lazy val groups: List[String] = config("groups", sample = sampleId, library = null)

Peter van 't Hof's avatar
Peter van 't Hof committed
117
118
119
120
121
    /**
     * Factory method for Library class
     * @param id SampleId
     * @return Sample class
     */
Peter van 't Hof's avatar
Peter van 't Hof committed
122
    def makeLibrary(id: String): Library
Peter van 't Hof's avatar
Peter van 't Hof committed
123

Peter van 't Hof's avatar
Peter van 't Hof committed
124
125
    /** returns a set with library names */
    protected def libIds: Set[String] = {
126
      ConfigUtils.getMapFromPath(globalConfig.map, List("samples", sampleId, "libraries")).getOrElse(Map()).keySet
127
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
128

Peter van 't Hof's avatar
Peter van 't Hof committed
129
    /** Name overules the one from qscript */
130
131
132
133
    def addSummarizable(summarizable: Summarizable, name: String): Unit = {
      qscript.addSummarizable(summarizable, name, Some(sampleId))
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
134
    /** Adds sample jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
135
    final def addAndTrackJobs(): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
136
      if (nameRegex.findFirstIn(sampleId) == None)
Peter van 't Hof's avatar
Peter van 't Hof committed
137
        Logging.addError(s"Sample '$sampleId' $nameError")
138
      currentSample = Some(sampleId)
Peter van 't Hof's avatar
Peter van 't Hof committed
139
      addJobs()
140
      qscript.addSummarizable(this, "pipeline", Some(sampleId))
141
142
      currentSample = None
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
143

Peter van 't Hof's avatar
Peter van 't Hof committed
144
    /** Function to add sample jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
145
    protected def addJobs()
Peter van 't Hof's avatar
Peter van 't Hof committed
146

Peter van 't Hof's avatar
Peter van 't Hof committed
147
    /** function add all libraries in one call */
148
    protected final def addPerLibJobs(): Unit = {
149
      for ((libId, library) <- libraries) {
Peter van 't Hof's avatar
Peter van 't Hof committed
150
        library.addAndTrackJobs()
151
      }
bow's avatar
bow committed
152
153
    }

154
    /** Creates a sample file with given suffix */
Peter van 't Hof's avatar
Peter van 't Hof committed
155
156
157
    def createFile(suffix: String) = new File(sampleDir, sampleId + suffix)

    /** Returns sample directory */
Peter van 't Hof's avatar
Peter van 't Hof committed
158
    def sampleDir = new File(outputDir, "samples" + File.separator + sampleId)
159
160
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
161
  /** Sample type, need implementation in pipeline */
Peter van 't Hof's avatar
Peter van 't Hof committed
162
163
  type Sample <: AbstractSample

Peter van 't Hof's avatar
Peter van 't Hof committed
164
165
  /**
   * Factory method for Sample class
166
   * @param id SampleId
Peter van 't Hof's avatar
Peter van 't Hof committed
167
168
   * @return Sample class
   */
169
  def makeSample(id: String): Sample
Peter van 't Hof's avatar
Peter van 't Hof committed
170

Peter van 't Hof's avatar
Peter van 't Hof committed
171
172
  /** Stores all samples */
  val samples: Map[String, Sample] = sampleIds.map(id => id -> makeSample(id)).toMap
173

174
  /** Returns a list of all sampleIDs */
175
  protected def sampleIds: Set[String] = ConfigUtils.any2map(globalConfig.map("samples")).keySet
176

Peter van 't Hof's avatar
Peter van 't Hof committed
177
  protected lazy val nameRegex = """^[a-zA-Z0-9][a-zA-Z0-9-_]+[a-zA-Z0-9]$""".r
Peter van 't Hof's avatar
Peter van 't Hof committed
178
179
180
181
  protected lazy val nameError = " name invalid." +
    "Name must have at least 3 characters," +
    "must begin and end with an alphanumeric character, " +
    "and must not have whitespace."
Peter van 't Hof's avatar
Peter van 't Hof committed
182

Peter van 't Hof's avatar
Peter van 't Hof committed
183
  /** Runs addAndTrackJobs method for each sample */
Peter van 't Hof's avatar
Peter van 't Hof committed
184
  final def addSamplesJobs() {
185
    if (onlySamples.isEmpty || samples.forall(x => onlySamples.contains(x._1))) {
186
187
188
      samples.foreach { case (sampleId, sample) => sample.addAndTrackJobs() }
      addMultiSampleJobs()
    } else onlySamples.foreach(sampleId => samples.get(sampleId) match {
189
190
191
      case Some(sample) => sample.addAndTrackJobs()
      case None         => logger.warn("sampleId '" + sampleId + "' not found")
    })
192
193
  }

194
  /**
Peter van 't Hof's avatar
Peter van 't Hof committed
195
   * Method where the multisample jobs should be added, this will be executed only when running the -sample argument is not given.
196
197
198
   */
  def addMultiSampleJobs()

Peter van 't Hof's avatar
Peter van 't Hof committed
199
  /** Stores sample state */
200
201
  private var currentSample: Option[String] = None

Peter van 't Hof's avatar
Peter van 't Hof committed
202
203
204
205
  /** Stores library state */
  private var currentLib: Option[String] = None

  /** Prefix full path with sample and library for jobs that's are created in current state */
206
  override def configFullPath: List[String] = {
207
    val sample = currentSample match {
Peter van 't Hof's avatar
Peter van 't Hof committed
208
209
210
      case Some(s) => "samples" :: s :: Nil
      case _       => Nil
    }
211
    val lib = currentLib match {
Peter van 't Hof's avatar
Peter van 't Hof committed
212
213
214
      case Some(l) => "libraries" :: l :: Nil
      case _       => Nil
    }
215
    sample ::: lib ::: super.configFullPath
216
  }
217
}
218
219
220
221
222
223
224

object MultiSampleQScript {
  object Gender extends Enumeration {
    val Male, Female, Unknown = Value
  }

}