Commit 0c3deab9 authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Added summary framework to mapping and flexiprep

parent 2952190e
......@@ -45,8 +45,8 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri
def makeLibrary(id: String) = new Library(id)
class Library(libId: String) extends AbstractLibrary(libId) {
val mapping = new Mapping(qscript)
mapping.sampleId = sampleId
mapping.libId = libId
mapping.sampleId = Some(sampleId)
mapping.libId = Some(libId)
mapping.outputDir = libDir
/** Library variantcalling */
......
......@@ -52,8 +52,8 @@ class Carp(val root: Configurable) extends QScript with MultiSampleQScript {
if (config.contains("R1")) {
mapping.input_R1 = config("R1")
if (config.contains("R2")) mapping.input_R2 = config("R2")
mapping.libId = libId
mapping.sampleId = sampleId
mapping.libId = Some(libId)
mapping.sampleId = Some(sampleId)
mapping.outputDir = libDir
mapping.init
......
......@@ -19,12 +19,12 @@ import nl.lumc.sasc.biopet.core.summary.SummaryQScript
import org.broadinstitute.gatk.queue.QScript
import org.broadinstitute.gatk.utils.commandline.{ Input, Argument }
import nl.lumc.sasc.biopet.core.{ BiopetQScript, PipelineCommand }
import nl.lumc.sasc.biopet.core.{ SampleLibraryTag, BiopetQScript, PipelineCommand }
import nl.lumc.sasc.biopet.core.config.Configurable
import nl.lumc.sasc.biopet.extensions.{ Gzip, Pbzip2, Md5sum, Zcat, Seqstat }
import nl.lumc.sasc.biopet.tools.FastqSync
class Flexiprep(val root: Configurable) extends QScript with BiopetQScript with SummaryQScript {
class Flexiprep(val root: Configurable) extends QScript with SummaryQScript with SampleLibraryTag {
def this() = this(null)
@Input(doc = "R1 fastq file (gzipped allowed)", shortName = "R1", required = true)
......@@ -41,15 +41,11 @@ class Flexiprep(val root: Configurable) extends QScript with BiopetQScript with
// TODO: hide sampleId and libId from the command line so they do not interfere with our config values
/** Sample name */
@Argument(doc = "Sample ID", shortName = "sample", required = true)
var sampleId: String = _
def summaryFile = new File(outputDir, sampleId.getOrElse("x") + "-" + libId.getOrElse("x") + ".qc.summary.json")
/** Library name */
@Argument(doc = "Library ID", shortName = "library", required = true)
var libId: String = _
def summaryFiles = Map()
def summaryFile = new File(outputDir, sampleId + "-" + libId + ".qc.summary.json")
def summaryData = Map("skip_trim" -> skipTrim, "skip_clip" -> skipClip)
var paired: Boolean = input_R2.isDefined
var R1_ext: String = _
......@@ -67,8 +63,8 @@ class Flexiprep(val root: Configurable) extends QScript with BiopetQScript with
def init() {
require(outputDir != null, "Missing output directory on flexiprep module")
require(input_R1 != null, "Missing input R1 on flexiprep module")
require(sampleId != null, "Missing sample ID on flexiprep module")
require(libId != null, "Missing library ID on flexiprep module")
//require(sampleId != null, "Missing sample ID on flexiprep module")
//require(libId != null, "Missing library ID on flexiprep module")
paired = input_R2.isDefined
......@@ -90,7 +86,7 @@ class Flexiprep(val root: Configurable) extends QScript with BiopetQScript with
case _ =>
}
summary.out = new File(outputDir, sampleId + "-" + libId + ".qc.summary.json")
summary.out = new File(outputDir, sampleId.getOrElse("x") + "-" + libId.getOrElse("x") + ".qc.summary.json")
}
def biopetScript() {
......@@ -267,7 +263,7 @@ class Flexiprep(val root: Configurable) extends QScript with BiopetQScript with
summary.addMd5sum(md5sum_R2, R2 = true, after = true)
}
fastqc_R1_after = Fastqc(this, R1, new File(outputDir, R1_name + ".qc.fastqc/"))
addSummarizable(fastqc_R1_after)
addSummarizable(fastqc_R1_after, "fastqc_R1_qc")
add(fastqc_R1_after)
summary.addFastqc(fastqc_R1_after, after = true)
if (paired) {
......
......@@ -120,8 +120,8 @@ class FlexiprepSummary(val root: Configurable) extends InProcessFunction with Co
logger.debug("Start")
md5Summary()
val summary =
("samples" := ( flexiprep.sampleId :=
("libraries" := ( flexiprep.libId := (
("samples" := ( flexiprep.sampleId.getOrElse("x") :=
("libraries" := ( flexiprep.libId.getOrElse("x") := (
("flexiprep" := (
("clipping" := !flexiprep.skipClip) ->:
("trimming" := !flexiprep.skipTrim) ->:
......
......@@ -51,8 +51,8 @@ class FlexiprepTest extends TestNGSuite with Matchers {
flexiprep.input_R1 = new File(flexiprep.outputDir, "bla_R1.fq" + (if (zipped) ".gz" else ""))
if (paired) flexiprep.input_R2 = Some(new File(flexiprep.outputDir, "bla_R2.fq" + (if (zipped) ".gz" else "")))
flexiprep.sampleId = "1"
flexiprep.libId = "1"
flexiprep.sampleId = Some("1")
flexiprep.libId = Some("1")
flexiprep.script()
flexiprep.functions.count(_.isInstanceOf[Fastqc]) shouldBe (
......
......@@ -18,7 +18,8 @@ package nl.lumc.sasc.biopet.pipelines.mapping
import nl.lumc.sasc.biopet.core.config.Configurable
import java.io.File
import java.util.Date
import nl.lumc.sasc.biopet.core.{ BiopetQScript, PipelineCommand }
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
import nl.lumc.sasc.biopet.core.{ SampleLibraryTag, BiopetQScript, PipelineCommand }
import nl.lumc.sasc.biopet.extensions.{ Ln, Star, Stampy, Bowtie }
import nl.lumc.sasc.biopet.extensions.bwa.{ BwaSamse, BwaSampe, BwaAln, BwaMem }
import nl.lumc.sasc.biopet.pipelines.bamtobigwig.Bam2Wig
......@@ -30,7 +31,7 @@ import org.broadinstitute.gatk.queue.QScript
import org.broadinstitute.gatk.utils.commandline.{ Input, Argument, ClassType }
import scala.math._
class Mapping(val root: Configurable) extends QScript with BiopetQScript {
class Mapping(val root: Configurable) extends QScript with SummaryQScript with SampleLibraryTag {
def this() = this(null)
@Input(doc = "R1 fastq file", shortName = "R1", required = true)
......@@ -69,14 +70,6 @@ class Mapping(val root: Configurable) extends QScript with BiopetQScript {
// TODO: hide sampleId and libId from the command line so they do not interfere with our config values
/** Readgroup Library */
@Argument(doc = "Library ID", shortName = "library", required = true)
var libId: String = _
/**Readgroup sample */
@Argument(doc = "Sample ID", shortName = "sample", required = true)
var sampleId: String = _
/** Readgroup Platform */
protected var platform: String = config("platform", default = "illumina")
......@@ -99,15 +92,21 @@ class Mapping(val root: Configurable) extends QScript with BiopetQScript {
val flexiprep = new Flexiprep(this)
def finalBamFile: File = new File(outputDir, outputName + ".final.bam")
def summaryFile = new File(outputDir, sampleId.getOrElse("x") + "-" + libId.getOrElse("x") + ".summary.json")
def summaryFiles = Map()
def summaryData = Map()
def init() {
require(outputDir != null, "Missing output directory on mapping module")
require(input_R1 != null, "Missing output directory on mapping module")
require(sampleId != null, "Missing sample ID on mapping module")
require(libId != null, "Missing library ID on mapping module")
require(sampleId.isDefined, "Missing sample ID on mapping module")
require(libId.isDefined, "Missing library ID on mapping module")
paired = input_R2.isDefined
if (readgroupId == null && sampleId != null && libId != null) readgroupId = sampleId + "-" + libId
if (readgroupId == null) readgroupId = sampleId.get + "-" + libId.get
else if (readgroupId == null) readgroupId = config("readgroup_id")
if (outputName == null) outputName = readgroupId
......@@ -203,6 +202,7 @@ class Mapping(val root: Configurable) extends QScript with BiopetQScript {
if (!skipFlexiprep) {
flexiprep.runFinalize(fastq_R1_output, fastq_R2_output)
addAll(flexiprep.functions) // Add function of flexiprep to curent function pool
addSummaryQScript(flexiprep)
}
var bamFile = bamFiles.head
......@@ -223,6 +223,8 @@ class Mapping(val root: Configurable) extends QScript with BiopetQScript {
if (config("generate_wig", default = false).asBoolean)
addAll(Bam2Wig(this, finalBamFile).functions)
addSummaryJobs
}
def addBwaAln(R1: File, R2: File, output: File, deps: List[File]): File = {
......@@ -246,7 +248,7 @@ class Mapping(val root: Configurable) extends QScript with BiopetQScript {
bwaSampe.fastqR2 = R2
bwaSampe.saiR1 = bwaAlnR1.output
bwaSampe.saiR2 = bwaAlnR2.output
bwaSampe.r = getReadGroup
bwaSampe.r = getReadGroupBwa
bwaSampe.output = swapExt(output.getParent, output, ".bam", ".sam")
bwaSampe.isIntermediate = true
add(bwaSampe)
......@@ -256,7 +258,7 @@ class Mapping(val root: Configurable) extends QScript with BiopetQScript {
val bwaSamse = new BwaSamse(this)
bwaSamse.fastq = R1
bwaSamse.sai = bwaAlnR1.output
bwaSamse.r = getReadGroup
bwaSamse.r = getReadGroupBwa
bwaSamse.output = swapExt(output.getParent, output, ".bam", ".sam")
bwaSamse.isIntermediate = true
add(bwaSamse)
......@@ -275,7 +277,7 @@ class Mapping(val root: Configurable) extends QScript with BiopetQScript {
bwaCommand.R1 = R1
if (paired) bwaCommand.R2 = R2
bwaCommand.deps = deps
bwaCommand.R = Some(getReadGroup)
bwaCommand.R = Some(getReadGroupBwa)
bwaCommand.output = swapExt(output.getParent, output, ".bam", ".sam")
bwaCommand.isIntermediate = true
add(bwaCommand)
......@@ -288,8 +290,8 @@ class Mapping(val root: Configurable) extends QScript with BiopetQScript {
def addStampy(R1: File, R2: File, output: File, deps: List[File]): File = {
var RG: String = "ID:" + readgroupId + ","
RG += "SM:" + sampleId + ","
RG += "LB:" + libId + ","
RG += "SM:" + sampleId.get + ","
RG += "LB:" + libId.get + ","
if (readgroupDescription != null) RG += "DS" + readgroupDescription + ","
RG += "PU:" + platformUnit + ","
if (predictedInsertsize.getOrElse(0) > 0) RG += "PI:" + predictedInsertsize.get + ","
......@@ -340,10 +342,10 @@ class Mapping(val root: Configurable) extends QScript with BiopetQScript {
addOrReplaceReadGroups.createIndex = true
addOrReplaceReadGroups.RGID = readgroupId
addOrReplaceReadGroups.RGLB = libId
addOrReplaceReadGroups.RGLB = libId.get
addOrReplaceReadGroups.RGPL = platform
addOrReplaceReadGroups.RGPU = platformUnit
addOrReplaceReadGroups.RGSM = sampleId
addOrReplaceReadGroups.RGSM = sampleId.get
if (readgroupSequencingCenter.isDefined) addOrReplaceReadGroups.RGCN = readgroupSequencingCenter.get
if (readgroupDescription.isDefined) addOrReplaceReadGroups.RGDS = readgroupDescription.get
if (!skipMarkduplicates) addOrReplaceReadGroups.isIntermediate = true
......@@ -352,12 +354,12 @@ class Mapping(val root: Configurable) extends QScript with BiopetQScript {
return addOrReplaceReadGroups.output
}
def getReadGroup(): String = {
def getReadGroupBwa(): String = {
var RG: String = "@RG\\t" + "ID:" + readgroupId + "\\t"
RG += "LB:" + libId + "\\t"
RG += "LB:" + libId.get + "\\t"
RG += "PL:" + platform + "\\t"
RG += "PU:" + platformUnit + "\\t"
RG += "SM:" + sampleId + "\\t"
RG += "SM:" + sampleId.get + "\\t"
if (readgroupSequencingCenter.isDefined) RG += "CN:" + readgroupSequencingCenter.get + "\\t"
if (readgroupDescription.isDefined) RG += "DS" + readgroupDescription.get + "\\t"
if (readgroupDate != null) RG += "DT" + readgroupDate + "\\t"
......
......@@ -59,8 +59,8 @@ class MappingTest extends TestNGSuite with Matchers {
mapping.input_R1 = new File(mapping.outputDir, "bla_R1.fq")
if (paired) mapping.input_R2 = Some(new File(mapping.outputDir, "bla_R2.fq"))
mapping.sampleId = "1"
mapping.libId = "1"
mapping.sampleId = Some("1")
mapping.libId = Some("1")
mapping.script()
//Flexiprep
......
......@@ -66,12 +66,12 @@ class Sage(val root: Configurable) extends QScript with MultiSampleQScript {
val prefixFastq: File = createFile(".prefix.fastq")
val flexiprep = new Flexiprep(qscript)
flexiprep.sampleId = sampleId
flexiprep.libId = libId
flexiprep.sampleId = Some(sampleId)
flexiprep.libId = Some(libId)
val mapping = new Mapping(qscript)
mapping.libId = libId
mapping.sampleId = sampleId
mapping.libId = Some(libId)
mapping.sampleId = Some(sampleId)
protected def addJobs(): Unit = {
flexiprep.outputDir = new File(libDir, "flexiprep/")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment