Commit 4fc971d4 authored by Wai Yi Leung's avatar Wai Yi Leung
Browse files

Merge branch 'feature-dict_check' into 'develop'

Added check on sequence dictionary

Fixes #283 

See merge request !325
parents abe8e6ec af0d834a
......@@ -65,8 +65,10 @@ class AddOrReplaceReadGroups(val root: Configurable) extends Picard {
/** Returns command to execute */
override def cmdLine = super.cmdLine +
required("INPUT=", input, spaceSeparated = false) +
required("OUTPUT=", output, spaceSeparated = false) +
(if (inputAsStdin) required("INPUT=", new File("/dev/stdin"), spaceSeparated = false)
else required("INPUT=", input, spaceSeparated = false)) +
(if (outputAsStsout) required("OUTPUT=", new File("/dev/stdout"), spaceSeparated = false)
else required("OUTPUT=", output, spaceSeparated = false)) +
required("SORT_ORDER=", sortOrder, spaceSeparated = false) +
required("RGID=", RGID, spaceSeparated = false) +
required("RGLB=", RGLB, spaceSeparated = false) +
......
......@@ -34,6 +34,9 @@ class ReorderSam(val root: Configurable) extends Picard with Reference {
@Output(doc = "Output SAM or BAM file", required = true)
var output: File = null
@Output(doc = "The output file to bam file to", required = true)
lazy val outputIndex: File = new File(output.getAbsolutePath.stripSuffix(".bam") + ".bai")
@Argument(doc = "Allow incomplete dict concordance", required = false)
var allowIncompleteDictConcordance: Boolean = config("allow_incomplete_dict_concordance", default = false)
......@@ -49,6 +52,8 @@ class ReorderSam(val root: Configurable) extends Picard with Reference {
conditional(allowIncompleteDictConcordance, "ALLOW_INCOMPLETE_DICT_CONCORDANCE=TRUE") +
conditional(allowContigLengthDiscordance, "ALLOW_CONTIG_LENGTH_DISCORDANCE=TRUE") +
required("REFERENCE=", reference, spaceSeparated = false) +
required("INPUT=", input, spaceSeparated = false) +
required("OUTPUT=", output, spaceSeparated = false)
(if (inputAsStdin) required("INPUT=", new File("/dev/stdin"), spaceSeparated = false)
else required("INPUT=", input, spaceSeparated = false)) +
(if (outputAsStsout) required("OUTPUT=", new File("/dev/stdout"), spaceSeparated = false)
else required("OUTPUT=", output, spaceSeparated = false))
}
......@@ -37,7 +37,7 @@ class Carp(val root: Configurable) extends QScript with MultisampleMappingTrait
qscript =>
def this() = this(null)
override def defaults = Map(
override def defaults = super.defaults ++ Map(
"mapping" -> Map(
"skip_markduplicates" -> false,
"aligner" -> "bwa-mem"
......
......@@ -77,7 +77,7 @@ class Gentrap(val root: Configurable) extends QScript
lazy val removeRibosomalReads: Boolean = config("remove_ribosomal_reads", default = false)
/** Default pipeline config */
override def defaults = Map(
override def defaults = super.defaults ++ Map(
"htseqcount" -> (if (strandProtocol.isSet) Map("stranded" -> (strandProtocol() match {
case StrandProtocol.NonSpecific => "no"
case StrandProtocol.Dutp => "reverse"
......
......@@ -3,10 +3,11 @@ package nl.lumc.sasc.biopet.pipelines.mapping
import java.io.File
import htsjdk.samtools.SamReaderFactory
import htsjdk.samtools.reference.FastaSequenceFile
import nl.lumc.sasc.biopet.core.report.ReportBuilderExtension
import nl.lumc.sasc.biopet.core.{ PipelineCommand, Reference, MultiSampleQScript }
import nl.lumc.sasc.biopet.extensions.Ln
import nl.lumc.sasc.biopet.extensions.picard.{ MarkDuplicates, MergeSamFiles, AddOrReplaceReadGroups, SamToFastq }
import nl.lumc.sasc.biopet.extensions.picard._
import nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics
import nl.lumc.sasc.biopet.pipelines.bamtobigwig.Bam2Wig
import nl.lumc.sasc.biopet.pipelines.gears.GearsSingle
......@@ -50,6 +51,8 @@ trait MultisampleMappingTrait extends MultiSampleQScript
Some(report)
}
override def defaults = super.defaults ++ Map("reordersam" -> Map("allow_incomplete_dict_concordance" -> true))
/** In a default multisample mapping run there are no multsample jobs. This method can be overriden by other pipelines */
def addMultiSampleJobs(): Unit = {
// this code will be executed after all code of all samples is executed
......@@ -116,7 +119,7 @@ trait MultisampleMappingTrait extends MultiSampleQScript
val samToFastq = SamToFastq(qscript, inputBam.get,
new File(libDir, sampleId + "-" + libId + ".R1.fq.gz"),
new File(libDir, sampleId + "-" + libId + ".R2.fq.gz"))
samToFastq.isIntermediate = true
samToFastq.isIntermediate = libraries.size > 1
qscript.add(samToFastq)
mapping.foreach(m => {
m.input_R1 = samToFastq.fastqR1
......@@ -125,39 +128,50 @@ trait MultisampleMappingTrait extends MultiSampleQScript
})
} else {
val inputSam = SamReaderFactory.makeDefault.open(inputBam.get)
val readGroups = inputSam.getFileHeader.getReadGroups
val header = inputSam.getFileHeader
val readGroups = header.getReadGroups
val referenceFile = new FastaSequenceFile(referenceFasta(), true)
val dictOke: Boolean = {
var oke = true
try {
header.getSequenceDictionary.assertSameDictionary(referenceFile.getSequenceDictionary)
} catch {
case e: AssertionError => {
logger.error(e.getMessage)
oke = false
}
}
oke
}
inputSam.close()
referenceFile.close()
val readGroupOke = readGroups.forall(readGroup => {
if (readGroup.getSample != sampleId) logger.warn("Sample ID readgroup in bam file is not the same")
if (readGroup.getLibrary != libId) logger.warn("Library ID readgroup in bam file is not the same")
readGroup.getSample == sampleId && readGroup.getLibrary == libId
})
inputSam.close()
if (!readGroupOke) {
if (correctReadgroups) {
if (!readGroupOke || !dictOke) {
if (!readGroupOke && !correctReadgroups) Logging.addError(
"Sample readgroup and/or library of input bamfile is not correct, file: " + bamFile +
"\nPlease note that it is possible to set 'correct_readgroups' to true in the config to automatic fix this")
if (!dictOke) Logging.addError(
"Sequence dictionary in the bam file is not the same as the reference, file: " + bamFile +
"\nPlease note that it is possible to set 'correct_dict' to true in the config to automatic fix this")
if (!readGroupOke && correctReadgroups) {
logger.info("Correcting readgroups, file:" + inputBam.get)
val aorrg = AddOrReplaceReadGroups(qscript, inputBam.get, bamFile.get)
aorrg.RGID = s"$sampleId-$libId"
aorrg.RGID = config("rgid", default = s"$sampleId-$libId")
aorrg.RGLB = libId
aorrg.RGSM = sampleId
aorrg.RGPL = "unknown"
aorrg.RGPU = "na"
aorrg.isIntermediate = true
aorrg.RGPL = config("rgpl", default = "unknown")
aorrg.RGPU = config("rgpu", default = "na")
aorrg.isIntermediate = libraries.size > 1
qscript.add(aorrg)
} else throw new IllegalStateException("Sample readgroup and/or library of input bamfile is not correct, file: " + bamFile +
"\nPlease note that it is possible to set 'correct_readgroups' to true in the config to automatic fix this")
} else {
val oldBamFile: File = inputBam.get
val oldIndex: File = new File(oldBamFile.getAbsolutePath.stripSuffix(".bam") + ".bai")
val newIndex: File = new File(libDir, bamFile.get.getName.stripSuffix(".bam") + ".bai")
val baiLn = Ln(qscript, oldIndex, newIndex)
add(baiLn)
val bamLn = Ln(qscript, oldBamFile, bamFile.get)
bamLn.deps :+= baiLn.output
add(bamLn)
}
}
} else add(Ln.linkBamFile(qscript, inputBam.get, bamFile.get): _*)
val bamMetrics = new BamMetrics(qscript)
bamMetrics.sampleId = Some(sampleId)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment