MultiSampleQScript.scala 8.15 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
16
17
package nl.lumc.sasc.biopet.core

18
19
import java.io.File

20
import nl.lumc.sasc.biopet.core.MultiSampleQScript.Gender
Peter van 't Hof's avatar
Peter van 't Hof committed
21
import nl.lumc.sasc.biopet.core.summary.{ Summarizable, SummaryQScript }
Peter van 't Hof's avatar
Peter van 't Hof committed
22
import nl.lumc.sasc.biopet.utils.{ Logging, ConfigUtils }
23
import org.broadinstitute.gatk.queue.QScript
24

Peter van 't Hof's avatar
Peter van 't Hof committed
25
/** This trait creates a structured way of use multisample pipelines */
26
trait MultiSampleQScript extends SummaryQScript { qscript: QScript =>
27

28
  @Argument(doc = "Only Sample", shortName = "s", required = false, fullName = "sample")
29
  private[core] val onlySamples: List[String] = Nil
Peter van 't Hof's avatar
Peter van 't Hof committed
30

31
  require(globalConfig.map.contains("samples"), "No Samples found in config")
Peter van 't Hof's avatar
Peter van 't Hof committed
32

Peter van 't Hof's avatar
Peter van 't Hof committed
33
  /** Sample class with basic functions build in */
34
  abstract class AbstractSample(val sampleId: String) extends Summarizable {
Peter van 't Hof's avatar
Peter van 't Hof committed
35
    /** Overrules config of qscript with default sample */
Peter van 't Hof's avatar
Peter van 't Hof committed
36
    val config = new ConfigFunctions(defaultSample = sampleId)
Peter van 't Hof's avatar
Peter van 't Hof committed
37

38
39
40
    /** Sample specific settings */
    def summarySettings: Map[String, Any] = Map()

Peter van 't Hof's avatar
Peter van 't Hof committed
41
    /** Library class with basic functions build in */
42
    abstract class AbstractLibrary(val libId: String) extends Summarizable {
Peter van 't Hof's avatar
Peter van 't Hof committed
43
      /** Overrules config of qscript with default sample and default library */
44
      val config = new ConfigFunctions(defaultSample = sampleId, defaultLibrary = libId)
45

Peter van 't Hof's avatar
Peter van 't Hof committed
46
      /** Name overules the one from qscript */
47
48
49
50
      def addSummarizable(summarizable: Summarizable, name: String): Unit = {
        qscript.addSummarizable(summarizable, name, Some(sampleId), Some(libId))
      }

51
52
53
      /** Library specific settings */
      def summarySettings: Map[String, Any] = Map()

Peter van 't Hof's avatar
Peter van 't Hof committed
54
      /** Adds the library jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
55
      final def addAndTrackJobs(): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
56
        if (nameRegex.findFirstIn(libId) == None)
Peter van 't Hof's avatar
Peter van 't Hof committed
57
          Logging.addError(s"Library '$libId' $nameError")
58
        currentSample = Some(sampleId)
59
        currentLib = Some(libId)
Peter van 't Hof's avatar
Peter van 't Hof committed
60
        addJobs()
61
        qscript.addSummarizable(this, "pipeline", Some(sampleId), Some(libId))
Peter van 't Hof's avatar
Peter van 't Hof committed
62
        currentLib = None
63
64
65
        currentSample = None
      }

Peter van 't Hof's avatar
Peter van 't Hof committed
66
      /** Creates a library file with given suffix */
67
      def createFile(suffix: String): File = new File(libDir, sampleId + "-" + libId + suffix)
Peter van 't Hof's avatar
Peter van 't Hof committed
68
69

      /** Returns library directory */
Peter van 't Hof's avatar
Peter van 't Hof committed
70
      def libDir = new File(sampleDir, "lib_" + libId)
71

72
73
      lazy val groups: List[String] = config("groups", sample = sampleId, library = libId)

Peter van 't Hof's avatar
Peter van 't Hof committed
74
      /** Function that add library jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
75
      protected def addJobs()
76
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
77

Peter van 't Hof's avatar
Peter van 't Hof committed
78
    /** Library type, need implementation in pipeline */
79
    type Library <: AbstractLibrary
Peter van 't Hof's avatar
Peter van 't Hof committed
80

Peter van 't Hof's avatar
Peter van 't Hof committed
81
82
    /** Stores all libraries */
    val libraries: Map[String, Library] = libIds.map(id => id -> makeLibrary(id)).toMap
83

84
85
86
87
88
    lazy val gender = {
      val g: Option[String] = config("gender", sample = sampleId, library = null)
      g.map(_.toLowerCase) match {
        case Some("male")   => Gender.Male
        case Some("female") => Gender.Female
Peter van 't Hof's avatar
Peter van 't Hof committed
89
90
91
        case Some(s) =>
          logger.warn(s"Could not convert '$g' to a gender")
          Gender.Unknown
92
93
94
95
96
        case _              => Gender.Unknown
      }
    }

    lazy val father = {
Peter van 't Hof's avatar
Peter van 't Hof committed
97
      val g: Option[String] = config("father", sample = sampleId, library = null)
98
99
100
101
102
103
104
105
106
107
      g.foreach { father =>
        if (sampleId != father) Logging.addError(s"Father for $sampleId can not be itself")
        if (samples.contains(father)) if (samples(father).gender == Gender.Male)
          Logging.addError(s"Father of $sampleId is not a female")
        else logger.warn(s"For sample '$sampleId' is father '$father' not found in config")
      }
      g
    }

    lazy val mother = {
Peter van 't Hof's avatar
Peter van 't Hof committed
108
      val g: Option[String] = config("mother", sample = sampleId, library = null)
109
110
111
112
113
114
115
116
117
118
119
      g.foreach { mother =>
        if (sampleId != mother) Logging.addError(s"mother for $sampleId can not be itself")
        if (samples.contains(mother)) if (samples(mother).gender == Gender.Female)
          Logging.addError(s"Mother of $sampleId is not a female")
        else logger.warn(s"For sample '$sampleId' is mother '$mother' not found in config")
      }
      g
    }

    lazy val groups: List[String] = config("groups", sample = sampleId, library = null)

Peter van 't Hof's avatar
Peter van 't Hof committed
120
121
122
123
124
    /**
     * Factory method for Library class
     * @param id SampleId
     * @return Sample class
     */
Peter van 't Hof's avatar
Peter van 't Hof committed
125
    def makeLibrary(id: String): Library
Peter van 't Hof's avatar
Peter van 't Hof committed
126

Peter van 't Hof's avatar
Peter van 't Hof committed
127
128
    /** returns a set with library names */
    protected def libIds: Set[String] = {
129
      ConfigUtils.getMapFromPath(globalConfig.map, List("samples", sampleId, "libraries")).getOrElse(Map()).keySet
130
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
131

Peter van 't Hof's avatar
Peter van 't Hof committed
132
    /** Name overules the one from qscript */
133
134
135
136
    def addSummarizable(summarizable: Summarizable, name: String): Unit = {
      qscript.addSummarizable(summarizable, name, Some(sampleId))
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
137
    /** Adds sample jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
138
    final def addAndTrackJobs(): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
139
      if (nameRegex.findFirstIn(sampleId) == None)
Peter van 't Hof's avatar
Peter van 't Hof committed
140
        Logging.addError(s"Sample '$sampleId' $nameError")
141
      currentSample = Some(sampleId)
Peter van 't Hof's avatar
Peter van 't Hof committed
142
      addJobs()
143
      qscript.addSummarizable(this, "pipeline", Some(sampleId))
144
145
      currentSample = None
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
146

Peter van 't Hof's avatar
Peter van 't Hof committed
147
    /** Function to add sample jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
148
    protected def addJobs()
Peter van 't Hof's avatar
Peter van 't Hof committed
149

Peter van 't Hof's avatar
Peter van 't Hof committed
150
    /** function add all libraries in one call */
151
    protected final def addPerLibJobs(): Unit = {
152
      for ((libId, library) <- libraries) {
Peter van 't Hof's avatar
Peter van 't Hof committed
153
        library.addAndTrackJobs()
154
      }
bow's avatar
bow committed
155
156
    }

157
    /** Creates a sample file with given suffix */
Peter van 't Hof's avatar
Peter van 't Hof committed
158
159
160
    def createFile(suffix: String) = new File(sampleDir, sampleId + suffix)

    /** Returns sample directory */
Peter van 't Hof's avatar
Peter van 't Hof committed
161
    def sampleDir = new File(outputDir, "samples" + File.separator + sampleId)
162
163
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
164
  /** Sample type, need implementation in pipeline */
Peter van 't Hof's avatar
Peter van 't Hof committed
165
166
  type Sample <: AbstractSample

Peter van 't Hof's avatar
Peter van 't Hof committed
167
168
  /**
   * Factory method for Sample class
169
   * @param id SampleId
Peter van 't Hof's avatar
Peter van 't Hof committed
170
171
   * @return Sample class
   */
172
  def makeSample(id: String): Sample
Peter van 't Hof's avatar
Peter van 't Hof committed
173

Peter van 't Hof's avatar
Peter van 't Hof committed
174
175
  /** Stores all samples */
  val samples: Map[String, Sample] = sampleIds.map(id => id -> makeSample(id)).toMap
176

177
  /** Returns a list of all sampleIDs */
178
  protected def sampleIds: Set[String] = ConfigUtils.any2map(globalConfig.map("samples")).keySet
179

Peter van 't Hof's avatar
Peter van 't Hof committed
180
  protected lazy val nameRegex = """^[a-zA-Z0-9][a-zA-Z0-9-_]+[a-zA-Z0-9]$""".r
Peter van 't Hof's avatar
Peter van 't Hof committed
181
182
183
184
  protected lazy val nameError = " name invalid." +
    "Name must have at least 3 characters," +
    "must begin and end with an alphanumeric character, " +
    "and must not have whitespace."
Peter van 't Hof's avatar
Peter van 't Hof committed
185

Peter van 't Hof's avatar
Peter van 't Hof committed
186
  /** Runs addAndTrackJobs method for each sample */
Peter van 't Hof's avatar
Peter van 't Hof committed
187
  final def addSamplesJobs() {
188
    if (onlySamples.isEmpty || samples.forall(x => onlySamples.contains(x._1))) {
189
190
191
      samples.foreach { case (sampleId, sample) => sample.addAndTrackJobs() }
      addMultiSampleJobs()
    } else onlySamples.foreach(sampleId => samples.get(sampleId) match {
192
193
194
      case Some(sample) => sample.addAndTrackJobs()
      case None         => logger.warn("sampleId '" + sampleId + "' not found")
    })
195
196
  }

197
  /**
Peter van 't Hof's avatar
Peter van 't Hof committed
198
   * Method where the multisample jobs should be added, this will be executed only when running the -sample argument is not given.
199
200
201
   */
  def addMultiSampleJobs()

Peter van 't Hof's avatar
Peter van 't Hof committed
202
  /** Stores sample state */
203
204
  private var currentSample: Option[String] = None

Peter van 't Hof's avatar
Peter van 't Hof committed
205
206
207
208
  /** Stores library state */
  private var currentLib: Option[String] = None

  /** Prefix full path with sample and library for jobs that's are created in current state */
209
  override def configFullPath: List[String] = {
210
    val sample = currentSample match {
Peter van 't Hof's avatar
Peter van 't Hof committed
211
212
213
      case Some(s) => "samples" :: s :: Nil
      case _       => Nil
    }
214
    val lib = currentLib match {
Peter van 't Hof's avatar
Peter van 't Hof committed
215
216
217
      case Some(l) => "libraries" :: l :: Nil
      case _       => Nil
    }
218
    sample ::: lib ::: super.configFullPath
219
  }
220
}
221
222
223
224
225
226
227

object MultiSampleQScript {
  object Gender extends Enumeration {
    val Male, Female, Unknown = Value
  }

}