Skip to content
Snippets Groups Projects
Commit eac4cd7b authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Added mapping as separated artifact

parent 13430b2e
No related branches found
No related tags found
No related merge requests found
/target/
\ No newline at end of file
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>nl.lumc.sasc</groupId>
<artifactId>Mapping</artifactId>
<version>0.2.0-DEV</version>
<packaging>jar</packaging>
<inceptionYear>2014</inceptionYear>
<name>Mapping</name>
<url>http://maven.apache.org</url>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<sting.unpack.phase>prepare-package</sting.unpack.phase>
<sting.shade.phase>package</sting.shade.phase>
<app.main.class>nl.lumc.sasc.biopet.core.BiopetExecutable</app.main.class>
</properties>
<dependencies>
<dependency>
<groupId>nl.lumc.sasc</groupId>
<artifactId>BiopetFramework</artifactId>
<version>0.2.0-DEV</version>
</dependency>
<dependency>
<groupId>nl.lumc.sasc</groupId>
<artifactId>Flexiprep</artifactId>
<version>0.2.0-DEV</version>
</dependency>
</dependencies>
<build>
<resources>
<resource>
<directory>src/main/resources</directory>
<includes>
<include>**/*</include>
</includes>
</resource>
</resources>
<plugins>
<plugin>
<groupId>org.scala-tools</groupId>
<artifactId>maven-scala-plugin</artifactId>
<version>2.15.2</version>
<executions>
<execution>
<id>scala-compile</id>
<goals>
<goal>compile</goal>
<goal>testCompile</goal>
</goals>
<configuration>
<args>
<arg>-dependencyfile</arg>
<arg>${project.build.directory}/.scala_dependencies</arg>
<arg>-deprecation</arg>
<arg>-feature</arg>
</args>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
<version>2.5</version>
<configuration>
<archive>
<manifest>
<addDefaultImplementationEntries>true</addDefaultImplementationEntries>
<addDefaultSpecificationEntries>true</addDefaultSpecificationEntries>
</manifest>
</archive>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>2.3.2</version>
<configuration>
<showDeprecation>true</showDeprecation>
</configuration>
</plugin>
</plugins>
</build>
</project>
package nl.lumc.sasc.biopet.pipelines.mapping
import nl.lumc.sasc.biopet.core.config.Configurable
import java.io.File
import java.util.Date
import nl.lumc.sasc.biopet.core.{ BiopetQScript, PipelineCommand }
import nl.lumc.sasc.biopet.tools.FastqSplitter
import nl.lumc.sasc.biopet.extensions.aligners.{ Bwa, Star, Bowtie, Stampy }
import nl.lumc.sasc.biopet.extensions.picard.{ MarkDuplicates, SortSam, MergeSamFiles, AddOrReplaceReadGroups }
import nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics
import nl.lumc.sasc.biopet.pipelines.flexiprep.Flexiprep
import org.broadinstitute.gatk.queue.QScript
import org.broadinstitute.gatk.utils.commandline.{ Input, Argument, ClassType }
import scala.math._
class Mapping(val root: Configurable) extends QScript with BiopetQScript {
qscript =>
def this() = this(null)
@Input(doc = "R1 fastq file", shortName = "R1", required = true)
var input_R1: File = _
@Input(doc = "R2 fastq file", shortName = "R2", required = false)
var input_R2: File = _
@Argument(doc = "Output name", shortName = "outputName", required = false)
var outputName: String = _
@Argument(doc = "Skip flexiprep", shortName = "skipflexiprep", required = false)
var skipFlexiprep: Boolean = false
@Argument(doc = "Skip mark duplicates", shortName = "skipmarkduplicates", required = false)
var skipMarkduplicates: Boolean = false
@Argument(doc = "Skip metrics", shortName = "skipmetrics", required = false)
var skipMetrics: Boolean = false
@Argument(doc = "Aligner", shortName = "ALN", required = false)
var aligner: String = config("aligner", default = "bwa")
@Argument(doc = "Reference", shortName = "R", required = false)
var reference: File = config("reference")
@Argument(doc = "Chunking", shortName = "chunking", required = false)
var chunking: Boolean = config("chunking", false)
@ClassType(classOf[Int])
@Argument(doc = "Number of chunks, when not defined pipeline will automatic calculate number of chunks", shortName = "numberChunks", required = false)
var numberChunks: Option[Int] = None
// Readgroup items
@Argument(doc = "Readgroup ID", shortName = "RGID", required = false)
var RGID: String = config("RGID")
@Argument(doc = "Readgroup Library", shortName = "RGLB", required = false)
var RGLB: String = config("RGLB")
@Argument(doc = "Readgroup Platform", shortName = "RGPL", required = false)
var RGPL: String = config("RGPL", default = "illumina")
@Argument(doc = "Readgroup platform unit", shortName = "RGPU", required = false)
var RGPU: String = config("RGPU", default = "na")
@Argument(doc = "Readgroup sample", shortName = "RGSM", required = false)
var RGSM: String = config("RGSM")
@Argument(doc = "Readgroup sequencing center", shortName = "RGCN", required = false)
var RGCN: String = config("RGCN")
@Argument(doc = "Readgroup description", shortName = "RGDS", required = false)
var RGDS: String = config("RGDS")
@Argument(doc = "Readgroup sequencing date", shortName = "RGDT", required = false)
var RGDT: Date = _
@Argument(doc = "Readgroup predicted insert size", shortName = "RGPI", required = false)
var RGPI: Int = config("RGPI")
var paired: Boolean = false
val flexiprep = new Flexiprep(this)
def init() {
if (outputDir == null) throw new IllegalStateException("Missing Output directory on mapping module")
else if (!outputDir.endsWith("/")) outputDir += "/"
if (input_R1 == null) throw new IllegalStateException("Missing FastQ R1 on mapping module")
paired = (input_R2 != null)
if (RGLB == null) throw new IllegalStateException("Missing Readgroup library on mapping module")
if (RGLB == null) throw new IllegalStateException("Missing Readgroup sample on mapping module")
if (RGID == null && RGSM != null && RGLB != null) RGID = RGSM + "-" + RGLB
else if (RGID == null) throw new IllegalStateException("Missing Readgroup ID on mapping module")
if (outputName == null) outputName = RGID
if (chunking) {
if (numberChunks.isEmpty) {
if (config.contains("numberchunks")) numberChunks = config("numberchunks", default = None)
else {
val chunkSize: Int = config("chunksize", (1 << 30))
val filesize = if (input_R1.getName.endsWith(".gz") || input_R1.getName.endsWith(".gzip")) input_R1.length * 3
else input_R1.length
numberChunks = Option(ceil(filesize.toDouble / chunkSize).toInt)
}
}
logger.debug("Chunks: " + numberChunks.getOrElse(1))
}
}
def biopetScript() {
var fastq_R1: File = input_R1
var fastq_R2: File = if (paired) input_R2 else ""
if (!skipFlexiprep) {
flexiprep.outputDir = outputDir + "flexiprep/"
flexiprep.input_R1 = fastq_R1
if (paired) flexiprep.input_R2 = fastq_R2
flexiprep.sampleName = this.RGSM
flexiprep.libraryName = this.RGLB
flexiprep.init
flexiprep.runInitialJobs
}
var bamFiles: List[File] = Nil
var fastq_R1_output: List[File] = Nil
var fastq_R2_output: List[File] = Nil
def removeGz(file: String): String = {
if (file.endsWith(".gz")) return file.substring(0, file.lastIndexOf(".gz"))
else if (file.endsWith(".gzip")) return file.substring(0, file.lastIndexOf(".gzip"))
else return file
}
var chunks: Map[String, (String, String)] = Map()
if (chunking) for (t <- 1 to numberChunks.getOrElse(1)) {
val chunkDir = outputDir + "chunks/" + t + "/"
chunks += (chunkDir -> (removeGz(chunkDir + fastq_R1.getName),
if (paired) removeGz(chunkDir + fastq_R2.getName) else ""))
}
else chunks += (outputDir -> (flexiprep.extractIfNeeded(fastq_R1, flexiprep.outputDir),
flexiprep.extractIfNeeded(fastq_R2, flexiprep.outputDir)))
if (chunking) {
val fastSplitter_R1 = new FastqSplitter(this)
fastSplitter_R1.input = fastq_R1
for ((chunkDir, fastqfile) <- chunks) fastSplitter_R1.output :+= fastqfile._1
add(fastSplitter_R1, isIntermediate = true)
if (paired) {
val fastSplitter_R2 = new FastqSplitter(this)
fastSplitter_R2.input = fastq_R2
for ((chunkDir, fastqfile) <- chunks) fastSplitter_R2.output :+= fastqfile._2
add(fastSplitter_R2, isIntermediate = true)
}
}
for ((chunkDir, fastqfile) <- chunks) {
var R1 = fastqfile._1
var R2 = fastqfile._2
var deps: List[File] = Nil
if (!skipFlexiprep) {
val flexiout = flexiprep.runTrimClip(R1, R2, chunkDir + "flexiprep/", chunkDir)
logger.debug(chunkDir + " - " + flexiout)
R1 = flexiout._1
if (paired) R2 = flexiout._2
deps = flexiout._3
fastq_R1_output :+= R1
fastq_R2_output :+= R2
}
val outputBam = new File(chunkDir + outputName + ".bam")
bamFiles :+= outputBam
aligner match {
case "bwa" => addBwa(R1, R2, outputBam, deps)
case "bowtie" => addBowtie(R1, R2, outputBam, deps)
case "stampy" => addStampy(R1, R2, outputBam, deps)
case "star" => addStar(R1, R2, outputBam, deps)
case "star-2pass" => addStar2pass(R1, R2, outputBam, deps)
case _ => throw new IllegalStateException("Option Aligner: '" + aligner + "' is not valid")
}
if (config("chunk_metrics", default = false))
addAll(BamMetrics(this, outputBam, chunkDir + "metrics/").functions)
}
if (!skipFlexiprep) {
flexiprep.runFinalize(fastq_R1_output, fastq_R2_output)
addAll(flexiprep.functions) // Add function of flexiprep to curent function pool
}
var bamFile = bamFiles.head
if (!skipMarkduplicates) {
bamFile = new File(outputDir + outputName + ".dedup.bam")
add(MarkDuplicates(this, bamFiles, bamFile))
} else if (skipMarkduplicates && chunking) {
val mergeSamFile = MergeSamFiles(this, bamFiles, outputDir)
add(mergeSamFile)
bamFile = mergeSamFile.output
}
if (!skipMetrics) addAll(BamMetrics(this, bamFile, outputDir + "metrics/").functions)
outputFiles += ("finalBamFile" -> bamFile)
}
def addBwa(R1: File, R2: File, output: File, deps: List[File]): File = {
val bwaCommand = new Bwa(this)
bwaCommand.R1 = R1
if (paired) bwaCommand.R2 = R2
bwaCommand.deps = deps
bwaCommand.R = getReadGroup
bwaCommand.output = this.swapExt(output.getParent, output, ".bam", ".sam")
add(bwaCommand, isIntermediate = true)
val sortSam = SortSam(this, bwaCommand.output, output)
if (chunking || !skipMarkduplicates) sortSam.isIntermediate = true
add(sortSam)
return sortSam.output
}
def addStampy(R1: File, R2: File, output: File, deps: List[File]): File = {
var RG: String = "ID:" + RGID + ","
RG += "SM:" + RGSM + ","
RG += "LB:" + RGLB + ","
if (RGDS != null) RG += "DS" + RGDS + ","
RG += "PU:" + RGPU + ","
if (RGPI > 0) RG += "PI:" + RGPI + ","
if (RGCN != null) RG += "CN:" + RGCN + ","
if (RGDT != null) RG += "DT:" + RGDT + ","
RG += "PL:" + RGPL
val stampyCmd = new Stampy(this)
stampyCmd.R1 = R1
if (paired) stampyCmd.R2 = R2
stampyCmd.deps = deps
stampyCmd.readgroup = RG
stampyCmd.sanger = true
stampyCmd.output = this.swapExt(output.getParent, output, ".bam", ".sam")
add(stampyCmd, isIntermediate = true)
val sortSam = SortSam(this, stampyCmd.output, output)
if (chunking || !skipMarkduplicates) sortSam.isIntermediate = true
add(sortSam)
return sortSam.output
}
def addBowtie(R1: File, R2: File, output: File, deps: List[File]): File = {
val bowtie = new Bowtie(this)
bowtie.R1 = R1
if (paired) bowtie.R2 = R2
bowtie.deps = deps
bowtie.output = this.swapExt(output.getParent, output, ".bam", ".sam")
add(bowtie, isIntermediate = true)
return addAddOrReplaceReadGroups(bowtie.output, output)
}
def addStar(R1: File, R2: File, output: File, deps: List[File]): File = {
val starCommand = Star(this, R1, if (paired) R2 else null, outputDir, isIntermediate = true, deps = deps)
add(starCommand)
return addAddOrReplaceReadGroups(starCommand.outputSam, output)
}
def addStar2pass(R1: File, R2: File, output: File, deps: List[File]): File = {
val starCommand = Star._2pass(this, R1, if (paired) R2 else null, outputDir, isIntermediate = true, deps = deps)
addAll(starCommand._2)
return addAddOrReplaceReadGroups(starCommand._1, output)
}
def addAddOrReplaceReadGroups(input: File, output: File): File = {
val addOrReplaceReadGroups = AddOrReplaceReadGroups(this, input, output)
addOrReplaceReadGroups.createIndex = true
addOrReplaceReadGroups.RGID = RGID
addOrReplaceReadGroups.RGLB = RGLB
addOrReplaceReadGroups.RGPL = RGPL
addOrReplaceReadGroups.RGPU = RGPU
addOrReplaceReadGroups.RGSM = RGSM
if (RGCN != null) addOrReplaceReadGroups.RGCN = RGCN
if (RGDS != null) addOrReplaceReadGroups.RGDS = RGDS
if (!skipMarkduplicates) addOrReplaceReadGroups.isIntermediate = true
add(addOrReplaceReadGroups)
return addOrReplaceReadGroups.output
}
def getReadGroup(): String = {
var RG: String = "@RG\\t" + "ID:" + RGID + "\\t"
RG += "LB:" + RGLB + "\\t"
RG += "PL:" + RGPL + "\\t"
RG += "PU:" + RGPU + "\\t"
RG += "SM:" + RGSM + "\\t"
if (RGCN != null) RG += "CN:" + RGCN + "\\t"
if (RGDS != null) RG += "DS" + RGDS + "\\t"
if (RGDT != null) RG += "DT" + RGDT + "\\t"
if (RGPI > 0) RG += "PI" + RGPI + "\\t"
return RG.substring(0, RG.lastIndexOf("\\t"))
}
}
object Mapping extends PipelineCommand {
def loadFromLibraryConfig(root: Configurable, runConfig: Map[String, Any], sampleConfig: Map[String, Any],
runDir: String, startJobs: Boolean = true): Mapping = {
val mapping = new Mapping(root)
logger.debug("Mapping runconfig: " + runConfig)
if (runConfig.contains("R1")) mapping.input_R1 = new File(runConfig("R1").toString)
if (runConfig.contains("R2")) mapping.input_R2 = new File(runConfig("R2").toString)
mapping.paired = (mapping.input_R2 != null)
mapping.RGLB = runConfig("ID").toString
mapping.RGSM = sampleConfig("ID").toString
if (runConfig.contains("PL")) mapping.RGPL = runConfig("PL").toString
if (runConfig.contains("PU")) mapping.RGPU = runConfig("PU").toString
if (runConfig.contains("CN")) mapping.RGCN = runConfig("CN").toString
mapping.outputDir = runDir
if (startJobs) {
mapping.init
mapping.biopetScript
}
return mapping
}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment