Yamsvp.scala 4.66 KB
Newer Older
wyleung's avatar
wyleung committed
1
2
3
4
5
6
7
8
9
10
11
12
/*
 * Structural variation calling
 */

package nl.lumc.sasc.biopet.pipelines.yamsvp

import nl.lumc.sasc.biopet.core.config.Configurable
import nl.lumc.sasc.biopet.core.MultiSampleQScript
import nl.lumc.sasc.biopet.core.PipelineCommand

import nl.lumc.sasc.biopet.extensions.Cat
import nl.lumc.sasc.biopet.extensions.picard.MergeSamFiles
13
import nl.lumc.sasc.biopet.extensions.svcallers.Breakdancer
14
import nl.lumc.sasc.biopet.extensions.svcallers.Clever
wyleung's avatar
wyleung committed
15
16
17
18
19
20
21
22
23
24
25

import nl.lumc.sasc.biopet.pipelines.mapping.Mapping

import org.broadinstitute.gatk.queue.QScript
import org.broadinstitute.gatk.queue.function._

class Yamsvp(val root: Configurable) extends QScript with MultiSampleQScript {
  def this() = this(null)

  var reference: File = _
  var finalBamFiles: List[File] = Nil
26
27

  override def init() {
wyleung's avatar
wyleung committed
28
29
30
31
32
33
34
35
36
37
38
39
40
    for (file <- configfiles) globalConfig.loadConfigFile(file)
    reference = config("reference", required = true)
    if (outputDir == null)
      throw new IllegalStateException("Output directory is not specified in the config / argument")
    else if (!outputDir.endsWith("/"))
      outputDir += "/"
  }

  def biopetScript() {
    // write the pipeline here
    // start with QC, alignment, call sambamba, call sv callers, reporting

    // read config and set all parameters for the pipeline
41
    logger.info("Starting YAM SV Pipeline")
wyleung's avatar
wyleung committed
42
    runSamplesJobs
43
    // 
wyleung's avatar
wyleung committed
44
45

  }
46

wyleung's avatar
wyleung committed
47
48
49
50
51
  def runSingleSampleJobs(sampleConfig: Map[String, Any]): Map[String, List[File]] = {
    var outputFiles: Map[String, List[File]] = Map()
    var libraryBamfiles: List[File] = List()
    var libraryFastqFiles: List[File] = List()
    val sampleID: String = sampleConfig("ID").toString
52
53
54
55
    val sampleDir: String = outputDir + sampleID + "/"

    val svcallingDir: String = sampleDir + "svcalls/"

wyleung's avatar
wyleung committed
56
57
58
    for ((library, libraryFiles) <- runLibraryJobs(sampleConfig)) {
      libraryBamfiles +:= libraryFiles("FinalBam")
    }
59

wyleung's avatar
wyleung committed
60
    val bamFile: File = if (libraryBamfiles.size == 1) libraryBamfiles.head
61
62
63
64
65
    else if (libraryBamfiles.size > 1) {
      val mergeSamFiles = MergeSamFiles(this, libraryBamfiles, sampleDir)
      add(mergeSamFiles)
      mergeSamFiles.output
    } else null
wyleung's avatar
wyleung committed
66
    val fastqFile: File = if (libraryFastqFiles.size == 1) libraryFastqFiles.head
67
68
69
70
71
    else if (libraryFastqFiles.size > 1) {
      val cat = Cat.apply(this, libraryFastqFiles, sampleDir + sampleID + ".fastq")
      add(cat)
      cat.output
    } else null
72
73

    /// bamfile will be used as input for the SV callers. First run Clever
74
75
76
77
78
79
80
81
82
83
84
85
    //    val cleverVCF : File = sampleDir + "/" + sampleID + ".clever.vcf"

    val cleverDir = svcallingDir + sampleID + ".clever/"
    val clever = Clever(this, bamFile, this.reference, cleverDir)
    outputFiles += ("clever_vcf" -> List(clever.outputvcf))
    add(clever)

    //    val breakdancerDir = sampleDir + sampleID + ".breakdancer/"
    //    val breakdancer = Breakdancer(this, bamFile, this.reference, breakdancerDir )
    //    outputFiles += ("breakdancer_vcf" -> List(breakdancer.output) )
    //    addAll(breakdancer.functions)

wyleung's avatar
wyleung committed
86
87
    return outputFiles
  }
88

wyleung's avatar
wyleung committed
89
90
91
92
93
  // Called for each run from a sample
  def runSingleLibraryJobs(runConfig: Map[String, Any], sampleConfig: Map[String, Any]): Map[String, File] = {
    var outputFiles: Map[String, File] = Map()
    val runID: String = runConfig("ID").toString
    val sampleID: String = sampleConfig("ID").toString
94
95
96
    val alignmentDir: String = outputDir + sampleID + "/alignment/"

    val runDir: String = alignmentDir + "run_" + runID + "/"
wyleung's avatar
wyleung committed
97
    if (runConfig.contains("R1")) {
98
99
      val mapping = Mapping.loadFromLibraryConfig(this, runConfig, sampleConfig, runDir)
      mapping.skipFlexiprep = false
100
101
102
103
104
105
      mapping.skipMarkduplicates = true // we do the dedup marking using Sambamba
      mapping.defaultAligner = "stampy"
      mapping.aligner = "stampy" // enfore stampy to be the aligner

      //      mapping.chunking = true // align in chunks
      //      mapping.numberChunks = 4 // could be a fixed value if taken from config
wyleung's avatar
wyleung committed
106
107
108
109
110
111
112
113
114
      mapping.RGLB = runConfig("ID").toString
      mapping.RGSM = sampleConfig("ID").toString
      if (runConfig.contains("PL")) mapping.RGPL = runConfig("PL").toString
      if (runConfig.contains("PU")) mapping.RGPU = runConfig("PU").toString
      if (runConfig.contains("CN")) mapping.RGCN = runConfig("CN").toString
      mapping.outputDir = runDir
      mapping.init
      mapping.biopetScript
      addAll(mapping.functions)
115

wyleung's avatar
wyleung committed
116
117
      outputFiles += ("FinalBam" -> mapping.outputFiles("finalBamFile"))
    } else this.logger.error("Sample: " + sampleID + ": No R1 found for run: " + runConfig)
118
    logger.debug(outputFiles)
wyleung's avatar
wyleung committed
119
120
121
122
123
124
125
    return outputFiles
  }
}

object Yamsvp extends PipelineCommand {
  override val pipeline = "/nl/lumc/sasc/biopet/pipelines/yamsvp/Yamsvp.class"
}