ShivaTrait.scala 12.2 KB
Newer Older
bow's avatar
bow committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
16 17 18 19 20 21
package nl.lumc.sasc.biopet.pipelines.shiva

import java.io.File

import htsjdk.samtools.SamReaderFactory
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
Peter van 't Hof's avatar
Peter van 't Hof committed
22
import nl.lumc.sasc.biopet.core.{ MultiSampleQScript, Reference }
Peter van 't Hof's avatar
Peter van 't Hof committed
23
import nl.lumc.sasc.biopet.extensions.Ln
Peter van 't Hof's avatar
Peter van 't Hof committed
24
import nl.lumc.sasc.biopet.extensions.picard.{ AddOrReplaceReadGroups, MarkDuplicates, SamToFastq }
Peter van 't Hof's avatar
Peter van 't Hof committed
25
import nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics
Peter van 't Hof's avatar
Peter van 't Hof committed
26
import nl.lumc.sasc.biopet.pipelines.mapping.Mapping
Peter van 't Hof's avatar
Peter van 't Hof committed
27

Peter van 't Hof's avatar
Peter van 't Hof committed
28 29 30
import scala.collection.JavaConversions._

/**
Peter van 't Hof's avatar
Peter van 't Hof committed
31 32
 * This is a trait for the Shiva pipeline
 *
Peter van 't Hof's avatar
Peter van 't Hof committed
33 34
 * Created by pjvan_thof on 2/26/15.
 */
35
trait ShivaTrait extends MultiSampleQScript with SummaryQScript with Reference {
Peter van 't Hof's avatar
Peter van 't Hof committed
36 37
  qscript =>

Peter van 't Hof's avatar
Peter van 't Hof committed
38
  /** Executed before running the script */
Peter van 't Hof's avatar
Peter van 't Hof committed
39
  def init(): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
40 41
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
42
  /** Method to add jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
43
  def biopetScript(): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
44 45
    addSamplesJobs()

Peter van 't Hof's avatar
Peter van 't Hof committed
46
    addSummaryJobs()
Peter van 't Hof's avatar
Peter van 't Hof committed
47 48
  }

49 50 51 52 53 54 55
  override def reportClass = {
    val shiva = new ShivaReport(this)
    shiva.outputDir = new File(outputDir, "report")
    shiva.summaryFile = summaryFile
    Some(shiva)
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
56
  /** Method to make the variantcalling submodule of shiva */
57
  def makeVariantcalling(multisample: Boolean = false): ShivaVariantcallingTrait = {
Peter van 't Hof's avatar
Peter van 't Hof committed
58
    if (multisample) new ShivaVariantcalling(qscript) {
Peter van 't Hof's avatar
Peter van 't Hof committed
59
      override def namePrefix = "multisample"
Peter van 't Hof's avatar
Peter van 't Hof committed
60 61 62 63 64 65 66 67
      override def configName = "shivavariantcalling"
      override def configPath: List[String] = super.configPath ::: "multisample" :: Nil
    }
    else new ShivaVariantcalling(qscript) {
      override def configName = "shivavariantcalling"
    }
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
68
  /** Method to make a sample */
Peter van 't Hof's avatar
Peter van 't Hof committed
69
  def makeSample(id: String) = new Sample(id)
70

Peter van 't Hof's avatar
Peter van 't Hof committed
71 72 73 74 75
  /** Class that will generate jobs for a sample */
  class Sample(sampleId: String) extends AbstractSample(sampleId) {
    /** Sample specific files to add to summary */
    def summaryFiles: Map[String, File] = {
      preProcessBam match {
76 77
        case Some(b) => Map("preProcessBam" -> b)
        case _       => Map()
Peter van 't Hof's avatar
Peter van 't Hof committed
78 79
      }
    }
80

Peter van 't Hof's avatar
Peter van 't Hof committed
81
    /** Sample specific stats to add to summary */
82 83
    def summaryStats: Map[String, Any] = Map()

Peter van 't Hof's avatar
Peter van 't Hof committed
84
    /** Method to make a library */
Peter van 't Hof's avatar
Peter van 't Hof committed
85 86
    def makeLibrary(id: String) = new Library(id)

Peter van 't Hof's avatar
Peter van 't Hof committed
87
    /** Class to generate jobs for a library */
Peter van 't Hof's avatar
Peter van 't Hof committed
88
    class Library(libId: String) extends AbstractLibrary(libId) {
Peter van 't Hof's avatar
Peter van 't Hof committed
89 90 91
      /** Library specific files to add to the summary */
      def summaryFiles: Map[String, File] = {
        (bamFile, preProcessBam) match {
92 93 94
          case (Some(b), Some(pb)) => Map("bamFile" -> b, "preProcessBam" -> pb)
          case (Some(b), _)        => Map("bamFile" -> b)
          case _                   => Map()
Peter van 't Hof's avatar
Peter van 't Hof committed
95 96
        }
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
97

Peter van 't Hof's avatar
Peter van 't Hof committed
98
      /** Library specific stats to add to summary */
99 100
      def summaryStats: Map[String, Any] = Map()

Peter van 't Hof's avatar
Peter van 't Hof committed
101
      /** Method to execute library preprocess */
Peter van 't Hof's avatar
Peter van 't Hof committed
102 103
      def preProcess(input: File): Option[File] = None

Peter van 't Hof's avatar
Peter van 't Hof committed
104
      /** Method to make the mapping submodule */
Peter van 't Hof's avatar
Peter van 't Hof committed
105 106 107 108 109
      def makeMapping = {
        val mapping = new Mapping(qscript)
        mapping.sampleId = Some(sampleId)
        mapping.libId = Some(libId)
        mapping.outputDir = libDir
110
        mapping.outputName = sampleId + "-" + libId
Peter van 't Hof's avatar
Peter van 't Hof committed
111 112 113 114 115 116 117 118 119
        (Some(mapping), Some(mapping.finalBamFile), preProcess(mapping.finalBamFile))
      }

      lazy val (mapping, bamFile, preProcessBam): (Option[Mapping], Option[File], Option[File]) =
        (config.contains("R1"), config.contains("bam")) match {
          case (true, _) => makeMapping // Default starting from fastq files
          case (false, true) => // Starting from bam file
            config("bam_to_fastq", default = false).asBoolean match {
              case true => makeMapping // bam file will be converted to fastq
120
              case false =>
Peter van 't Hof's avatar
Peter van 't Hof committed
121 122 123 124 125 126
                val file = new File(libDir, sampleId + "-" + libId + ".final.bam")
                (None, Some(file), preProcess(file))
            }
          case _ => (None, None, None)
        }

127
      lazy val variantcalling = if (config("library_variantcalling", default = false).asBoolean &&
Peter van 't Hof's avatar
Peter van 't Hof committed
128 129 130 131
        (bamFile.isDefined || preProcessBam.isDefined)) {
        Some(makeVariantcalling(multisample = false))
      } else None

Peter van 't Hof's avatar
Peter van 't Hof committed
132
      /** This will add jobs for this library */
Peter van 't Hof's avatar
Peter van 't Hof committed
133 134 135 136 137 138
      def addJobs(): Unit = {
        (config.contains("R1"), config.contains("bam")) match {
          case (true, _) => mapping.foreach(mapping => {
            mapping.input_R1 = config("R1")
            mapping.input_R2 = config("R2")
          })
139
          case (false, true) => config("bam_to_fastq", default = false).asBoolean match {
140
            case true =>
Peter van 't Hof's avatar
Peter van 't Hof committed
141 142 143 144 145 146 147 148 149
              val samToFastq = SamToFastq(qscript, config("bam"),
                new File(libDir, sampleId + "-" + libId + ".R1.fastq"),
                new File(libDir, sampleId + "-" + libId + ".R2.fastq"))
              samToFastq.isIntermediate = true
              qscript.add(samToFastq)
              mapping.foreach(mapping => {
                mapping.input_R1 = samToFastq.fastqR1
                mapping.input_R2 = Some(samToFastq.fastqR2)
              })
150
            case false =>
Peter van 't Hof's avatar
Peter van 't Hof committed
151 152 153
              val inputSam = SamReaderFactory.makeDefault.open(config("bam"))
              val readGroups = inputSam.getFileHeader.getReadGroups

154
              val readGroupOke = readGroups.forall(readGroup => {
Peter van 't Hof's avatar
Peter van 't Hof committed
155 156 157 158
                if (readGroup.getSample != sampleId) logger.warn("Sample ID readgroup in bam file is not the same")
                if (readGroup.getLibrary != libId) logger.warn("Library ID readgroup in bam file is not the same")
                readGroup.getSample == sampleId && readGroup.getLibrary == libId
              })
159
              inputSam.close()
Peter van 't Hof's avatar
Peter van 't Hof committed
160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179

              if (!readGroupOke) {
                if (config("correct_readgroups", default = false).asBoolean) {
                  logger.info("Correcting readgroups, file:" + config("bam"))
                  val aorrg = AddOrReplaceReadGroups(qscript, config("bam"), bamFile.get)
                  aorrg.RGID = sampleId + "-" + libId
                  aorrg.RGLB = libId
                  aorrg.RGSM = sampleId
                  aorrg.isIntermediate = true
                  qscript.add(aorrg)
                } else throw new IllegalStateException("Sample readgroup and/or library of input bamfile is not correct, file: " + bamFile +
                  "\nPlease note that it is possible to set 'correct_readgroups' to true in the config to automatic fix this")
              } else {
                val oldBamFile: File = config("bam")
                val oldIndex: File = new File(oldBamFile.getAbsolutePath.stripSuffix(".bam") + ".bai")
                val newIndex: File = new File(libDir, oldBamFile.getName.stripSuffix(".bam") + ".bai")
                val baiLn = Ln(qscript, oldIndex, newIndex)
                add(baiLn)

                val bamLn = Ln(qscript, oldBamFile, bamFile.get)
bow's avatar
bow committed
180
                bamLn.deps :+= baiLn.output
Peter van 't Hof's avatar
Peter van 't Hof committed
181 182 183 184 185 186 187
                add(bamLn)
              }
          }
          case _ => logger.warn("Sample: " + sampleId + "  Library: " + libId + ", no reads found")
        }

        mapping.foreach(mapping => {
188 189
          mapping.init()
          mapping.biopetScript()
Peter van 't Hof's avatar
Peter van 't Hof committed
190 191 192 193
          addAll(mapping.functions) // Add functions of mapping to curent function pool
          addSummaryQScript(mapping)
        })

Peter van 't Hof's avatar
Peter van 't Hof committed
194
        variantcalling.foreach(vc => {
195 196
          vc.sampleId = Some(libId)
          vc.libId = Some(sampleId)
Peter van 't Hof's avatar
Peter van 't Hof committed
197 198 199
          vc.outputDir = new File(libDir, "variantcalling")
          if (preProcessBam.isDefined) vc.inputBams = preProcessBam.get :: Nil
          else vc.inputBams = bamFile.get :: Nil
Peter van 't Hof's avatar
Peter van 't Hof committed
200 201
          vc.init()
          vc.biopetScript()
Peter van 't Hof's avatar
Peter van 't Hof committed
202
          addAll(vc.functions)
203
          addSummaryQScript(vc)
Peter van 't Hof's avatar
Peter van 't Hof committed
204
        })
Peter van 't Hof's avatar
Peter van 't Hof committed
205 206 207
      }
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
208
    /** This will add jobs for the double preprocessing */
Peter van 't Hof's avatar
Peter van 't Hof committed
209
    protected def addDoublePreProcess(input: List[File], isIntermediate: Boolean = false): Option[File] = {
Peter van 't Hof's avatar
Peter van 't Hof committed
210 211 212 213 214 215 216 217 218
      if (input == Nil) None
      else if (input.tail == Nil) {
        val bamFile = new File(sampleDir, input.head.getName)
        val oldIndex: File = new File(input.head.getAbsolutePath.stripSuffix(".bam") + ".bai")
        val newIndex: File = new File(sampleDir, input.head.getName.stripSuffix(".bam") + ".bai")
        val baiLn = Ln(qscript, oldIndex, newIndex)
        add(baiLn)

        val bamLn = Ln(qscript, input.head, bamFile)
bow's avatar
bow committed
219
        bamLn.deps :+= baiLn.output
Peter van 't Hof's avatar
Peter van 't Hof committed
220 221 222 223 224 225
        add(bamLn)
        Some(bamFile)
      } else {
        val md = new MarkDuplicates(qscript)
        md.input = input
        md.output = new File(sampleDir, sampleId + ".dedup.bam")
226
        md.outputMetrics = new File(sampleDir, sampleId + ".dedup.metrics")
Peter van 't Hof's avatar
Peter van 't Hof committed
227 228
        md.isIntermediate = isIntermediate
        add(md)
229
        addSummarizable(md, "mark_duplicates")
Peter van 't Hof's avatar
Peter van 't Hof committed
230 231 232 233
        Some(md.output)
      }
    }

234
    lazy val preProcessBam: Option[File] = addDoublePreProcess(libraries.flatMap(lib => {
Peter van 't Hof's avatar
Peter van 't Hof committed
235 236 237
      (lib._2.bamFile, lib._2.preProcessBam) match {
        case (_, Some(file)) => Some(file)
        case (Some(file), _) => Some(file)
Peter van 't Hof's avatar
Peter van 't Hof committed
238 239
        case _               => None
      }
240
    }).toList)
Peter van 't Hof's avatar
Peter van 't Hof committed
241

Peter van 't Hof's avatar
fix bug  
Peter van 't Hof committed
242
    lazy val variantcalling = if (config("single_sample_variantcalling", default = false).asBoolean) {
Peter van 't Hof's avatar
Peter van 't Hof committed
243 244 245
      Some(makeVariantcalling(multisample = true))
    } else None

Peter van 't Hof's avatar
Peter van 't Hof committed
246
    /** This will add sample jobs */
Peter van 't Hof's avatar
Peter van 't Hof committed
247 248 249
    def addJobs(): Unit = {
      addPerLibJobs()

250
      preProcessBam.foreach { bam =>
251
        val bamMetrics = new BamMetrics(qscript)
Peter van 't Hof's avatar
Peter van 't Hof committed
252
        bamMetrics.sampleId = Some(sampleId)
253
        bamMetrics.inputBam = bam
254
        bamMetrics.outputDir = new File(sampleDir, "metrics")
255 256
        bamMetrics.init()
        bamMetrics.biopetScript()
Peter van 't Hof's avatar
Peter van 't Hof committed
257 258 259
        addAll(bamMetrics.functions)
        addSummaryQScript(bamMetrics)

260 261 262 263 264
        val oldIndex: File = new File(bam.getAbsolutePath.stripSuffix(".bam") + ".bai")
        val newIndex: File = new File(bam + ".bai")
        val baiLn = Ln(qscript, oldIndex, newIndex)
        add(baiLn)

Peter van 't Hof's avatar
Peter van 't Hof committed
265
        variantcalling.foreach(vc => {
Peter van 't Hof's avatar
Peter van 't Hof committed
266 267
          vc.sampleId = Some(sampleId)
          vc.outputDir = new File(sampleDir, "variantcalling")
268
          vc.inputBams = bam :: Nil
Peter van 't Hof's avatar
Peter van 't Hof committed
269 270
          vc.init()
          vc.biopetScript()
Peter van 't Hof's avatar
Peter van 't Hof committed
271 272
          addAll(vc.functions)
          addSummaryQScript(vc)
Peter van 't Hof's avatar
Peter van 't Hof committed
273
        })
Peter van 't Hof's avatar
Peter van 't Hof committed
274
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
275 276 277
    }
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
278
  lazy val variantCalling = if (config("multisample_variantcalling", default = true).asBoolean) {
Peter van 't Hof's avatar
Peter van 't Hof committed
279 280 281
    Some(makeVariantcalling(multisample = true))
  } else None

Peter van 't Hof's avatar
Peter van 't Hof committed
282 283 284 285 286 287
  lazy val svCalling = if (config("sv_calling", default = false).asBoolean) {
    val svCalling = new ShivaSvCalling(this)
    samples.foreach(x => x._2.preProcessBam.foreach(bam => svCalling.addBamFile(bam, Some(x._1))))
    Some(svCalling)
  } else None

Peter van 't Hof's avatar
Peter van 't Hof committed
288
  /** This will add the mutisample variantcalling */
Peter van 't Hof's avatar
Peter van 't Hof committed
289
  def addMultiSampleJobs(): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
290
    variantCalling.foreach(vc => {
Peter van 't Hof's avatar
Peter van 't Hof committed
291
      vc.outputDir = new File(outputDir, "variantcalling")
292
      vc.inputBams = samples.flatMap(_._2.preProcessBam).toList
Peter van 't Hof's avatar
Peter van 't Hof committed
293 294
      vc.init()
      vc.biopetScript()
Peter van 't Hof's avatar
Peter van 't Hof committed
295
      addAll(vc.functions)
296
      addSummaryQScript(vc)
Peter van 't Hof's avatar
Peter van 't Hof committed
297
    })
Peter van 't Hof's avatar
Peter van 't Hof committed
298 299

    svCalling.foreach(sv => {
Peter van 't Hof's avatar
Peter van 't Hof committed
300
      sv.outputDir = new File(outputDir, "sv_calling")
Peter van 't Hof's avatar
Peter van 't Hof committed
301 302 303 304 305
      sv.init()
      sv.biopetScript()
      addAll(sv.functions)
      addSummaryQScript(sv)
    })
Peter van 't Hof's avatar
Peter van 't Hof committed
306 307
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
308
  /** Location of summary file */
Peter van 't Hof's avatar
Peter van 't Hof committed
309 310
  def summaryFile = new File(outputDir, "Shiva.summary.json")

Peter van 't Hof's avatar
Peter van 't Hof committed
311
  /** Settings of pipeline for summary */
312 313 314 315 316
  def summarySettings = {
    val roiBedFiles: List[File] = config("regions_of_interest", Nil)
    val ampliconBedFile: Option[File] = config("amplicon_bed")

    Map(
317
      "reference" -> referenceSummary,
318 319 320 321
      "regions_of_interest" -> roiBedFiles.map(_.getName.stripSuffix(".bed")),
      "amplicon_bed" -> ampliconBedFile.map(_.getName.stripSuffix(".bed"))
    )
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
322

Peter van 't Hof's avatar
Peter van 't Hof committed
323
  /** Files for the summary */
324
  def summaryFiles = Map("referenceFasta" -> referenceFasta())
Peter van 't Hof's avatar
Peter van 't Hof committed
325
}