Commit cda615ea authored by Wai Yi Leung's avatar Wai Yi Leung
Browse files

Merge branch 'patch-flexiprep_mapping_flags' into 'develop'

Patch flexiprep mapping flags

@wyleung and @p.j.van_t_hof

This is what we discussed earlier. The temporary solution is to expose it in the command line. I've also added some notes there and did some small refactoring. It's all in the commit messages :).

See merge request !80
parents 9bfac77b 98dfb61f
......@@ -33,7 +33,7 @@ class Basty(val root: Configurable) extends QScript with MultiSampleQScript {
def makeSample(id: String) = new Sample(id)
class Sample(sampleId: String) extends AbstractSample(sampleId) {
def makeLibrary(id: String) = new Library(id)
class Library(libraryId: String) extends AbstractLibrary(libraryId) {
class Library(libId: String) extends AbstractLibrary(libId) {
protected def addJobs(): Unit = {}
}
......
......@@ -42,10 +42,10 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri
def makeSample(id: String) = new Sample(id)
class Sample(sampleId: String) extends AbstractSample(sampleId) {
def makeLibrary(id: String) = new Library(id)
class Library(libraryId: String) extends AbstractLibrary(libraryId) {
class Library(libId: String) extends AbstractLibrary(libId) {
val mapping = new Mapping(qscript)
mapping.sampleId = sampleId
mapping.libraryId = libraryId
mapping.libId = libId
mapping.outputDir = libDir + "/variantcalling/"
/** Library variantcalling */
......@@ -66,8 +66,8 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri
if (!bamFile.exists) throw new IllegalStateException("Bam in config does not exist, file: " + bamFile)
if (config("bam_to_fastq", default = false).asBoolean) {
val samToFastq = SamToFastq(qscript, bamFile, libDir + sampleId + "-" + libraryId + ".R1.fastq",
libDir + sampleId + "-" + libraryId + ".R2.fastq")
val samToFastq = SamToFastq(qscript, bamFile, libDir + sampleId + "-" + libId + ".R1.fastq",
libDir + sampleId + "-" + libId + ".R2.fastq")
samToFastq.isIntermediate = true
qscript.add(samToFastq)
mapping.input_R1 = samToFastq.fastqR1
......@@ -82,17 +82,17 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri
val header = inputSam.getFileHeader.getReadGroups
for (readGroup <- inputSam.getFileHeader.getReadGroups) {
if (readGroup.getSample != sampleId) logger.warn("Sample ID readgroup in bam file is not the same")
if (readGroup.getLibrary != libraryId) logger.warn("Library ID readgroup in bam file is not the same")
if (readGroup.getSample != sampleId || readGroup.getLibrary != libraryId) readGroupOke = false
if (readGroup.getLibrary != libId) logger.warn("Library ID readgroup in bam file is not the same")
if (readGroup.getSample != sampleId || readGroup.getLibrary != libId) readGroupOke = false
}
inputSam.close
if (!readGroupOke) {
if (config("correct_readgroups", default = false)) {
logger.info("Correcting readgroups, file:" + bamFile)
val aorrg = AddOrReplaceReadGroups(qscript, bamFile, new File(libDir + sampleId + "-" + libraryId + ".bam"))
aorrg.RGID = sampleId + "-" + libraryId
aorrg.RGLB = libraryId
val aorrg = AddOrReplaceReadGroups(qscript, bamFile, new File(libDir + sampleId + "-" + libId + ".bam"))
aorrg.RGID = sampleId + "-" + libId
aorrg.RGLB = libId
aorrg.RGSM = sampleId
aorrg.isIntermediate = true
qscript.add(aorrg)
......@@ -105,7 +105,7 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri
Some(bamFile)
}
} else {
logger.error("Sample: " + sampleId + ": No R1 found for run: " + libraryId)
logger.error("Sample: " + sampleId + ": No R1 found for run: " + libId)
None
}
......
......@@ -40,26 +40,26 @@ trait MultiSampleQScript extends BiopetQScript {
/**
* Library class with basic functions build in
* @param libraryId
* @param libId
*/
abstract class AbstractLibrary(val libraryId: String) {
abstract class AbstractLibrary(val libId: String) {
/** Overrules config of qscript with default sample and default library */
val config = new ConfigFunctions(defaultSample = sampleId, defaultLibrary = libraryId)
val config = new ConfigFunctions(defaultSample = sampleId, defaultLibrary = libId)
/** Adds the library jobs */
final def addAndTrackJobs(): Unit = {
currentSample = Some(sampleId)
currentLib = Some(libraryId)
currentLib = Some(libId)
addJobs()
currentLib = None
currentSample = None
}
/** Creates a library file with given suffix */
def createFile(suffix: String): File = new File(libDir, sampleId + "-" + libraryId + suffix)
def createFile(suffix: String): File = new File(libDir, sampleId + "-" + libId + suffix)
/** Returns library directory */
def libDir = sampleDir + "lib_" + libraryId + File.separator
def libDir = sampleDir + "lib_" + libId + File.separator
/** Function that add library jobs */
protected def addJobs()
......@@ -95,7 +95,7 @@ trait MultiSampleQScript extends BiopetQScript {
/** function add all libraries in one call */
protected final def addPerLibJobs(): Unit = {
for ((libraryId, library) <- libraries) {
for ((libId, library) <- libraries) {
library.addAndTrackJobs()
}
}
......
......@@ -26,7 +26,7 @@ class MultisamplePipelineTemplate(val root: Configurable) extends QScript with M
class Sample(sampleId: String) extends AbstractSample(sampleId) {
def makeLibrary(id: String) = new Library(id)
class Library(libraryId: String) extends AbstractLibrary(libraryId) {
class Library(libId: String) extends AbstractLibrary(libId) {
protected def addJobs(): Unit = {
// Library jobs
}
......
......@@ -44,14 +44,14 @@ class Carp(val root: Configurable) extends QScript with MultiSampleQScript {
def makeSample(id: String) = new Sample(id)
class Sample(sampleId: String) extends AbstractSample(sampleId) {
def makeLibrary(id: String) = new Library(id)
class Library(libraryId: String) extends AbstractLibrary(libraryId) {
class Library(libId: String) extends AbstractLibrary(libId) {
val mapping = new Mapping(qscript)
def addJobs(): Unit = {
if (config.contains("R1")) {
mapping.input_R1 = config("R1")
if (config.contains("R2")) mapping.input_R2 = config("R2")
mapping.libraryId = libraryId
mapping.libId = libId
mapping.sampleId = sampleId
mapping.outputDir = libDir
......@@ -59,7 +59,7 @@ class Carp(val root: Configurable) extends QScript with MultiSampleQScript {
mapping.biopetScript
addAll(mapping.functions)
} else logger.error("Sample: " + sampleId + ": No R1 found for library: " + libraryId)
} else logger.error("Sample: " + sampleId + ": No R1 found for library: " + libId)
}
}
......
......@@ -38,13 +38,17 @@ class Flexiprep(val root: Configurable) extends QScript with BiopetQScript {
/** Skip Clip fastq files */
var skipClip: Boolean = config("skip_clip", default = false)
// TODO: hide sampleId and libId from the command line so they do not interfere with our config values
/** Sample name */
@Argument(doc = "Sample ID", shortName = "sample", required = true)
var sampleId: String = _
/** Library name */
var libraryId: String = _
@Argument(doc = "Library ID", shortName = "library", required = true)
var libId: String = _
var paired: Boolean = (input_R2 != null)
var paired: Boolean = input_R2.isDefined
var R1_ext: String = _
var R2_ext: String = _
var R1_name: String = _
......@@ -58,11 +62,12 @@ class Flexiprep(val root: Configurable) extends QScript with BiopetQScript {
val summary = new FlexiprepSummary(this)
def init() {
if (input_R1 == null) throw new IllegalStateException("Missing R1 on flexiprep module")
if (outputDir == null) throw new IllegalStateException("Missing Output directory on flexiprep module")
if (sampleId == null) throw new IllegalStateException("Missing Sample name on flexiprep module")
if (libraryId == null) throw new IllegalStateException("Missing Library name on flexiprep module")
else if (!outputDir.endsWith("/")) outputDir += "/"
require(outputDir != null, "Missing output directory on flexiprep module")
require(input_R1 != null, "Missing input R1 on flexiprep module")
require(sampleId != null, "Missing sample ID on flexiprep module")
require(libId != null, "Missing library ID on flexiprep module")
paired = input_R2.isDefined
if (input_R1.endsWith(".gz")) R1_name = input_R1.getName.substring(0, input_R1.getName.lastIndexOf(".gz"))
else if (input_R1.endsWith(".gzip")) R1_name = input_R1.getName.substring(0, input_R1.getName.lastIndexOf(".gzip"))
......@@ -82,7 +87,7 @@ class Flexiprep(val root: Configurable) extends QScript with BiopetQScript {
case _ =>
}
summary.out = outputDir + sampleId + "-" + libraryId + ".qc.summary.json"
summary.out = outputDir + sampleId + "-" + libId + ".qc.summary.json"
}
def biopetScript() {
......
......@@ -121,7 +121,7 @@ class FlexiprepSummary(val root: Configurable) extends InProcessFunction with Co
md5Summary()
val summary =
("samples" := ( flexiprep.sampleId :=
("libraries" := ( flexiprep.libraryId := (
("libraries" := ( flexiprep.libId := (
("flexiprep" := (
("clipping" := !flexiprep.skipClip) ->:
("trimming" := !flexiprep.skipTrim) ->:
......
......@@ -36,7 +36,7 @@ class Kopisu(val root: Configurable) extends QScript with MultiSampleQScript {
def makeSample(id: String) = new Sample(id)
class Sample(sampleId: String) extends AbstractSample(sampleId) {
def makeLibrary(id: String) = new Library(id)
class Library(libraryId: String) extends AbstractLibrary(libraryId) {
class Library(libId: String) extends AbstractLibrary(libId) {
def addJobs(): Unit = {
}
......
......@@ -66,8 +66,15 @@ class Mapping(val root: Configurable) extends QScript with BiopetQScript {
/** Readgroup ID */
protected var readgroupId: String = _
// TODO: hide sampleId and libId from the command line so they do not interfere with our config values
/** Readgroup Library */
var libraryId: String = _
@Argument(doc = "Library ID", shortName = "library", required = true)
var libId: String = _
/**Readgroup sample */
@Argument(doc = "Sample ID", shortName = "sample", required = true)
var sampleId: String = _
/** Readgroup Platform */
protected var platform: String = config("platform", default = "illumina")
......@@ -75,9 +82,6 @@ class Mapping(val root: Configurable) extends QScript with BiopetQScript {
/** Readgroup platform unit */
protected var platformUnit: String = config("platform_unit", default = "na")
/**Readgroup sample */
var sampleId: String = _
/** Readgroup sequencing center */
protected var readgroupSequencingCenter: Option[String] = config("readgroup_sequencing_center")
......@@ -95,14 +99,14 @@ class Mapping(val root: Configurable) extends QScript with BiopetQScript {
def finalBamFile: File = outputDir + outputName + ".final.bam"
def init() {
if (outputDir == null) throw new IllegalStateException("Missing Output directory on mapping module")
else if (!outputDir.endsWith("/")) outputDir += "/"
if (input_R1 == null) throw new IllegalStateException("Missing FastQ R1 on mapping module")
require(outputDir != null, "Missing output directory on mapping module")
require(input_R1 != null, "Missing output directory on mapping module")
require(sampleId != null, "Missing sample ID on mapping module")
require(libId != null, "Missing library ID on mapping module")
paired = input_R2.isDefined
if (libraryId == null) libraryId = config("library_id")
if (sampleId == null) sampleId = config("sample_id")
if (readgroupId == null && sampleId != null && libraryId != null) readgroupId = sampleId + "-" + libraryId
if (readgroupId == null && sampleId != null && libId != null) readgroupId = sampleId + "-" + libId
else if (readgroupId == null) readgroupId = config("readgroup_id")
if (outputName == null) outputName = readgroupId
......@@ -127,7 +131,7 @@ class Mapping(val root: Configurable) extends QScript with BiopetQScript {
flexiprep.input_R1 = input_R1
flexiprep.input_R2 = input_R2
flexiprep.sampleId = this.sampleId
flexiprep.libraryId = this.libraryId
flexiprep.libId = this.libId
flexiprep.init
flexiprep.runInitialJobs
}
......@@ -281,7 +285,7 @@ class Mapping(val root: Configurable) extends QScript with BiopetQScript {
var RG: String = "ID:" + readgroupId + ","
RG += "SM:" + sampleId + ","
RG += "LB:" + libraryId + ","
RG += "LB:" + libId + ","
if (readgroupDescription != null) RG += "DS" + readgroupDescription + ","
RG += "PU:" + platformUnit + ","
if (predictedInsertsize.getOrElse(0) > 0) RG += "PI:" + predictedInsertsize.get + ","
......@@ -332,7 +336,7 @@ class Mapping(val root: Configurable) extends QScript with BiopetQScript {
addOrReplaceReadGroups.createIndex = true
addOrReplaceReadGroups.RGID = readgroupId
addOrReplaceReadGroups.RGLB = libraryId
addOrReplaceReadGroups.RGLB = libId
addOrReplaceReadGroups.RGPL = platform
addOrReplaceReadGroups.RGPU = platformUnit
addOrReplaceReadGroups.RGSM = sampleId
......@@ -346,7 +350,7 @@ class Mapping(val root: Configurable) extends QScript with BiopetQScript {
def getReadGroup(): String = {
var RG: String = "@RG\\t" + "ID:" + readgroupId + "\\t"
RG += "LB:" + libraryId + "\\t"
RG += "LB:" + libId + "\\t"
RG += "PL:" + platform + "\\t"
RG += "PU:" + platformUnit + "\\t"
RG += "SM:" + sampleId + "\\t"
......
......@@ -59,16 +59,16 @@ class Sage(val root: Configurable) extends QScript with MultiSampleQScript {
def makeSample(id: String) = new Sample(id)
class Sample(sampleId: String) extends AbstractSample(sampleId) {
def makeLibrary(id: String) = new Library(id)
class Library(libraryId: String) extends AbstractLibrary(libraryId) {
class Library(libId: String) extends AbstractLibrary(libId) {
val inputFastq: File = config("R1", required = true)
val prefixFastq: File = createFile(".prefix.fastq")
val flexiprep = new Flexiprep(qscript)
flexiprep.sampleId = sampleId
flexiprep.libraryId = libraryId
flexiprep.libId = libId
val mapping = new Mapping(qscript)
mapping.libraryId = libraryId
mapping.libId = libId
mapping.sampleId = sampleId
protected def addJobs(): Unit = {
......@@ -93,8 +93,8 @@ class Sage(val root: Configurable) extends QScript with MultiSampleQScript {
qscript.addAll(mapping.functions)
if (config("library_counts", default = false).asBoolean) {
addBedtoolsCounts(mapping.finalBamFile, sampleId + "-" + libraryId, libDir)
addTablibCounts(pf.outputFastq, sampleId + "-" + libraryId, libDir)
addBedtoolsCounts(mapping.finalBamFile, sampleId + "-" + libId, libDir)
addTablibCounts(pf.outputFastq, sampleId + "-" + libId, libDir)
}
}
}
......
......@@ -124,11 +124,11 @@ class Yamsvp(val root: Configurable) extends QScript with BiopetQScript { //with
// Called for each run from a sample
def runSingleLibraryJobs(libraryId: String, sampleID: String): LibraryOutput = {
def runSingleLibraryJobs(libId: String, sampleID: String): LibraryOutput = {
val libraryOutput = new LibraryOutput
val alignmentDir: String = outputDir + sampleID + "/alignment/"
val runDir: String = alignmentDir + "run_" + libraryId + "/"
val runDir: String = alignmentDir + "run_" + libId + "/"
if (config.contains("R1")) {
val mapping = new Mapping(this)
......@@ -140,7 +140,7 @@ class Yamsvp(val root: Configurable) extends QScript with BiopetQScript { //with
mapping.input_R1 = config("R1")
mapping.input_R2 = config("R2")
mapping.paired = (mapping.input_R2 != null)
mapping.RGLB = libraryId
mapping.RGLB = libId
mapping.RGSM = sampleID
mapping.RGPL = config("PL")
mapping.RGPU = config("PU")
......@@ -154,7 +154,7 @@ class Yamsvp(val root: Configurable) extends QScript with BiopetQScript { //with
// start sambamba dedup
libraryOutput.mappedBamFile = mapping.outputFiles("finalBamFile")
} else this.logger.error("Sample: " + sampleID + ": No R1 found for library: " + libraryId)
} else this.logger.error("Sample: " + sampleID + ": No R1 found for library: " + libId)
return libraryOutput
// logger.debug(outputFiles)
// return outputFiles
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment