Commit 74f04eef authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Merge remote-tracking branch 'remotes/origin/develop' into fix-BIOPET-500

parents 99cf9848 725ed53a
......@@ -58,9 +58,9 @@ trait Reference extends Configurable {
}
lazy val geneAnnotationVersion: Option[String] = config("gene_annotation_name")
lazy val geneAnnotationSubPath = geneAnnotationVersion.map(x => List("gene_annotations", x)).getOrElse(Nil)
lazy val dbsnpVersion: Option[Int] = config("dbsnp_version")
lazy val dbsnpSubPath: List[String] = dbsnpVersion.map(x => List("dbsnp_annotations", x.toString)).getOrElse(Nil)
lazy val geneAnnotationSubPath = geneAnnotationVersion.map(List("gene_annotations", _)).getOrElse(Nil)
lazy val dbsnpVersion: Option[String] = config("dbsnp_version")
lazy val dbsnpSubPath: List[String] = dbsnpVersion.map(List("dbsnp_annotations", _)).getOrElse(Nil)
def dbsnpVcfFile: Option[File] = config("dbsnp_vcf", extraSubPath = dbsnpSubPath)
/** Returns the reference config path */
......
......@@ -30,6 +30,12 @@ import scala.io.Source
*/
class VariantEffectPredictor(val root: Configurable) extends BiopetCommandLineFunction with Reference with Version with Summarizable {
lazy val vepVersion = new LazyCheck({
val s: Option[String] = config("vep_version")
s
})
vepVersion()
executable = config("exe", namespace = "perl", default = "perl")
var vepScript: String = config("vep_script")
......@@ -39,13 +45,8 @@ class VariantEffectPredictor(val root: Configurable) extends BiopetCommandLineFu
@Output(doc = "output file", required = true)
var output: File = null
lazy val vepConfig = new LazyCheck({
val s: Option[String] = config("vep_config")
s
})
override def subPath = {
if (vepConfig.isSet) super.subPath ++ vepConfig()
if (vepVersion.isSet) super.subPath ++ List("vep_settings") ++ vepVersion()
else super.subPath
}
......
......@@ -6,7 +6,7 @@ import nl.lumc.sasc.biopet.core.summary.Summarizable
import nl.lumc.sasc.biopet.core.{ Reference, ToolCommandFunction }
import nl.lumc.sasc.biopet.utils.ConfigUtils
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.Input
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
/**
* Created by pjvanthof on 18/11/2016.
......@@ -36,6 +36,9 @@ class BamStats(val root: Configurable) extends ToolCommandFunction with Referenc
}
}
@Output
private var outputFiles: List[File] = Nil
def bamstatsSummary: File = new File(outputDir, "bamstats.summary.json")
def flagstatSummaryFile(contig: Option[String] = None): File = getOutputFile("flagstats.summary.json", contig)
def mappingQualityFile(contig: Option[String] = None): File = getOutputFile("mapping_quality.tsv", contig)
......@@ -44,6 +47,10 @@ class BamStats(val root: Configurable) extends ToolCommandFunction with Referenc
override def beforeGraph() {
super.beforeGraph()
deps :+= new File(bamFile.getAbsolutePath.replaceAll(".bam$", ".bai"))
outputFiles :+= bamstatsSummary
outputFiles :+= flagstatSummaryFile()
outputFiles :+= mappingQualityFile()
outputFiles :+= clipingFile()
jobOutputFile = new File(outputDir, ".bamstats.out")
if (reference == null) reference = referenceFasta()
}
......
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.extensions.tools
import java.io.File
import nl.lumc.sasc.biopet.core.{ Reference, ToolCommandFunction }
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.Input
class ValidateVcf(val root: Configurable) extends ToolCommandFunction with Reference {
def toolObject = nl.lumc.sasc.biopet.tools.ValidateVcf
@Input(required = true)
var inputVcf: File = _
@Input(required = true)
var reference: File = _
var disableFail: Boolean = false
override def defaultCoreMemory = 4.0
override def beforeGraph(): Unit = {
super.beforeGraph()
if (reference == null) reference = referenceFasta()
}
override def cmdLine = super.cmdLine +
required("-i", inputVcf) +
required("-R", reference) +
conditional(disableFail, "--disableFail")
}
......@@ -45,6 +45,7 @@ object BiopetToolsExecutable extends BiopetExecutable {
nl.lumc.sasc.biopet.tools.SquishBed,
nl.lumc.sasc.biopet.tools.SummaryToTsv,
nl.lumc.sasc.biopet.tools.ValidateFastq,
nl.lumc.sasc.biopet.tools.ValidateVcf,
nl.lumc.sasc.biopet.tools.VcfFilter,
nl.lumc.sasc.biopet.tools.VcfStats,
nl.lumc.sasc.biopet.tools.VcfToTsv,
......
package nl.lumc.sasc.biopet.tools
import java.io.File
import htsjdk.variant.vcf.VCFFileReader
import nl.lumc.sasc.biopet.utils.ToolCommand
import nl.lumc.sasc.biopet.utils.intervals.BedRecordList
import scala.collection.JavaConversions._
/**
* Created by pjvanthof on 10/12/2016.
*/
object ValidateVcf extends ToolCommand {
case class Args(inputVcf: File = null,
reference: File = null,
failOnError: Boolean = true) extends AbstractArgs
class OptParser extends AbstractOptParser {
opt[File]('i', "inputVcf") required () maxOccurs 1 valueName "<file>" action { (x, c) =>
c.copy(inputVcf = x)
} text "Vcf file to check"
opt[File]('R', "reference") required () maxOccurs 1 valueName "<file>" action { (x, c) =>
c.copy(reference = x)
} text "Reference fasta to check vcf file against"
opt[Unit]("disableFail") maxOccurs 1 valueName "<file>" action { (x, c) =>
c.copy(failOnError = false)
} text "Do not fail on error. The tool will still exit when encountering an error, but will do so with exit code 0"
}
def main(args: Array[String]): Unit = {
logger.info("Start")
val argsParser = new OptParser
val cmdArgs: Args = argsParser.parse(args, Args()) getOrElse (throw new IllegalArgumentException)
val regions = BedRecordList.fromReference(cmdArgs.reference)
val vcfReader = new VCFFileReader(cmdArgs.inputVcf, false)
try {
for (record <- vcfReader.iterator()) {
val contig = record.getContig
require(regions.chrRecords.contains(contig),
s"The following contig in the vcf file does not exist in the reference: ${contig}")
val start = record.getStart
val end = record.getEnd
val contigStart = regions.chrRecords(contig).head.start
val contigEnd = regions.chrRecords(contig).head.end
require(start >= contigStart && start <= contigEnd,
s"The following position does not exist on reference: ${contig}:$start")
if (end != start) require(end >= contigStart && end <= contigEnd,
s"The following position does not exist on reference: ${contig}:$end")
require(start <= end, "End location of variant is larger than start position. This should not be possible")
}
} catch {
case e: IllegalArgumentException =>
if (cmdArgs.failOnError) throw e
else logger.error(e.getMessage)
}
vcfReader.close()
logger.info("Done")
}
}
##fileformat=VCFv4.1
##reference=file:///data/DIV5/KG/references/gatk_bundle_2.5/hg19_nohap/ucsc.hg19_nohap.fasta
##INFO=<ID=DN,Number=1,Type=Integer,Description="inDbSNP">
##INFO=<ID=DT,Number=0,Type=Flag,Description="in1000Genomes">
##INFO=<ID=DA,Number=1,Type=String,Description="allelesDBSNP">
##INFO=<ID=FG,Number=.,Type=String,Description="functionGVS">
##INFO=<ID=FD,Number=.,Type=String,Description="functionDBSNP">
##INFO=<ID=GM,Number=.,Type=String,Description="accession">
##INFO=<ID=GL,Number=.,Type=String,Description="geneList">
##INFO=<ID=AAC,Number=.,Type=String,Description="aminoAcids">
##INFO=<ID=PP,Number=.,Type=String,Description="proteinPosition">
##INFO=<ID=CDP,Number=.,Type=String,Description="cDNAPosition">
##INFO=<ID=PH,Number=.,Type=String,Description="polyPhen">
##INFO=<ID=CP,Number=1,Type=String,Description="scorePhastCons">
##INFO=<ID=CG,Number=1,Type=String,Description="consScoreGERP">
##INFO=<ID=AA,Number=1,Type=String,Description="chimpAllele">
##INFO=<ID=CN,Number=.,Type=String,Description="CNV">
##INFO=<ID=HA,Number=1,Type=String,Description="AfricanHapMapFreq">
##INFO=<ID=HE,Number=1,Type=String,Description="EuropeanHapMapFreq">
##INFO=<ID=HC,Number=1,Type=String,Description="AsianHapMapFreq">
##INFO=<ID=DG,Number=0,Type=Flag,Description="hasGenotypes">
##INFO=<ID=DV,Number=.,Type=String,Description="dbSNPValidation">
##INFO=<ID=RM,Number=.,Type=String,Description="repeatMasker">
##INFO=<ID=RT,Number=.,Type=String,Description="tandemRepeat">
##INFO=<ID=CA,Number=0,Type=Flag,Description="clinicalAssociation">
##INFO=<ID=DSP,Number=1,Type=Integer,Description="distanceToSplice">
##INFO=<ID=GS,Number=.,Type=String,Description="granthamScore">
##INFO=<ID=MR,Number=.,Type=String,Description="microRNAs">
##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">
##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency, for each ALT allele, in the same order as listed">
##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
##INFO=<ID=BaseQRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt Vs. Ref base qualities">
##INFO=<ID=DB,Number=0,Type=Flag,Description="dbSNP Membership">
##INFO=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth; some reads may have been filtered">
##INFO=<ID=DS,Number=0,Type=Flag,Description="Were any of the samples downsampled?">
##INFO=<ID=Dels,Number=1,Type=Float,Description="Fraction of Reads Containing Spanning Deletions">
##INFO=<ID=END,Number=1,Type=Integer,Description="Stop position of the interval">
##INFO=<ID=FS,Number=1,Type=Float,Description="Phred-scaled p-value using Fisher's exact test to detect strand bias">
##INFO=<ID=HaplotypeScore,Number=1,Type=Float,Description="Consistency of the site with at most two segregating haplotypes">
##INFO=<ID=InbreedingCoeff,Number=1,Type=Float,Description="Inbreeding coefficient as estimated from the genotype likelihoods per-sample when compared against the Hardy-Weinberg expectation">
##INFO=<ID=MLEAC,Number=A,Type=Integer,Description="Maximum likelihood expectation (MLE) for the allele counts (not necessarily the same as the AC), for each ALT allele, in the same order as listed">
##INFO=<ID=MLEAF,Number=A,Type=Float,Description="Maximum likelihood expectation (MLE) for the allele frequency (not necessarily the same as the AF), for each ALT allele, in the same order as listed">
##INFO=<ID=MQ,Number=1,Type=Float,Description="RMS Mapping Quality">
##INFO=<ID=MQ0,Number=1,Type=Integer,Description="Total Mapping Quality Zero Reads">
##INFO=<ID=MQRankSum,Number=1,Type=Float,Description="Z-score From Wilcoxon rank sum test of Alt vs. Ref read mapping qualities">
##INFO=<ID=NEGATIVE_TRAIN_SITE,Number=0,Type=Flag,Description="This variant was used to build the negative training set of bad variants">
##INFO=<ID=POSITIVE_TRAIN_SITE,Number=0,Type=Flag,Description="This variant was used to build the positive training set of good variants">
##INFO=<ID=QD,Number=1,Type=Float,Description="Variant Confidence/Quality by Depth">
##INFO=<ID=RPA,Number=.,Type=Integer,Description="Number of times tandem repeat unit is repeated, for each allele (including reference)">
##INFO=<ID=RU,Number=1,Type=String,Description="Tandem repeat unit (bases)">
##INFO=<ID=ReadPosRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt vs. Ref read position bias">
##INFO=<ID=STR,Number=0,Type=Flag,Description="Variant is a short tandem repeat">
##INFO=<ID=VQSLOD,Number=1,Type=Float,Description="Log odds ratio of being a true variant versus being false under the trained gaussian mixture model">
##INFO=<ID=culprit,Number=1,Type=String,Description="The annotation which was the worst performing in the Gaussian mixture model, likely the reason why the variant was filtered out">
##INFO=<ID=ClippingRankSum,Number=1,Type=Float,Description="Z-score From Wilcoxon rank sum test of Alt vs. Ref number of hard clipped bases">
##INFO=<ID=GATKCaller,Number=.,Type=String,Description="GATK variant caller used to call the variant">
##INFO=<ID=PartOfCompound,Number=.,Type=String,Description="Whether the record was originally part of a record containing compound variants">
##FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth (reads with MQ=255 or with bad mates are filtered)">
##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification">
##FILTER=<ID=LowQual,Description="Low quality">
##FILTER=<ID=VQSRTrancheINDEL99.00to99.90,Description="Truth sensitivity tranche level for INDEL model at VQS Lod: -1.4714 <= x < -0.3324">
##FILTER=<ID=VQSRTrancheINDEL99.90to100.00+,Description="Truth sensitivity tranche level for INDEL model at VQS Lod < -6.093">
##FILTER=<ID=VQSRTrancheINDEL99.90to100.00,Description="Truth sensitivity tranche level for INDEL model at VQS Lod: -6.093 <= x < -1.4714">
##FILTER=<ID=VQSRTrancheSNP99.00to99.90,Description="Truth sensitivity tranche level for SNP model at VQS Lod: -4.8126 <= x < 0.2264">
##FILTER=<ID=VQSRTrancheSNP99.90to100.00+,Description="Truth sensitivity tranche level for SNP model at VQS Lod < -39474.9285">
##FILTER=<ID=VQSRTrancheSNP99.90to100.00,Description="Truth sensitivity tranche level for SNP model at VQS Lod: -39474.9285 <= x < -4.8126">
##FILTER=<ID=TooHigh1000GAF,Description="Allele frequency in 1000G is more than 5%">
##FILTER=<ID=TooHighGoNLAF,Description="Allele frequency in 1000G is more than 5%">
##FILTER=<ID=IndexNotCalled,Description="Position in index sample is not called">
##FILTER=<ID=IndexIsVariant,Description="Index call is a variant">
##FILTER=<ID=InArtificialChrom,Description="Variant found in an artificial chromosome">
##FILTER=<ID=IsIntergenic,Description="Variant found in intergenic region">
##contig=<ID=chrQ,length=16571>
##INFO=<ID=CSQ,Number=.,Type=String,Description="Consequence type as predicted by VEP. Format: Allele|Gene|Feature|Feature_type|Consequence|cDNA_position|CDS_position|Protein_position|Amino_acids|Codons|Existing_variation|AA_MAF|EA_MAF|ALLELE_NUM|DISTANCE|STRAND|CLIN_SIG|SYMBOL|SYMBOL_SOURCE|GMAF|HGVSc|HGVSp|AFR_MAF|AMR_MAF|ASN_MAF|EUR_MAF|PUBMED">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT Sample_101 Sample_102 Sample_103
chrNotExist 1042 rs199537431 C CA 1541.12 PASS FG=intron;FD=unknown;GM=NM_152486.2;GL=SAMD11;CP=0.000;CG=-1.630;CN=2294,3274,30362,112930;DSP=107;AC=2;AF=0.333;AN=6;BaseQRankSum=4.068;DB;DP=124;FS=1.322;MLEAC=2;MLEAF=0.333;MQ=60.0;MQ0=0;MQRankSum=-0.197;QD=19.03;RPA=1,2;RU=A;ReadPosRankSum=-0.424;STR;VQSLOD=0.079;culprit=FS;GATKCaller=UG,HC;CSQ=A|ENSESTG00000013623|ENSESTT00000034081|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||||A:0.0078|ENSESTT00000034081.1:c.306-110_306-109insA||||||,A|CCDS2.2|CCDS2.2|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||||A:0.0078|CCDS2.2:c.306-110_306-109insA||||||,A|ENSESTG00000013623|ENSESTT00000034116|Transcript|upstream_gene_variant||||||rs199537431|||1|3610|1||||A:0.0078|||||||,A|ENSESTG00000013623|ENSESTT00000034091|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||||A:0.0078|ENSESTT00000034091.1:c.306-110_306-109insA||||||,A|ENSESTG00000013623|ENSESTT00000034102|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||||A:0.0078|ENSESTT00000034102.1:c.29-110_29-109insA||||||,A|148398|XM_005244723.1|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||SAMD11||A:0.0078|XM_005244723.1:c.306-110_306-109insA||||||,A|148398|XM_005244724.1|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||SAMD11||A:0.0078|XM_005244724.1:c.306-110_306-109insA||||||,A|148398|XM_005244725.1|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||SAMD11||A:0.0078|XM_005244725.1:c.306-110_306-109insA||||||,A|148398|NM_152486.2|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||SAMD11||A:0.0078|NM_152486.2:c.306-110_306-109insA||||||,A|148398|XM_005244727.1|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||SAMD11||A:0.0078|XM_005244727.1:c.306-110_306-109insA||||||,A|148398|XM_005244726.1|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||SAMD11||A:0.0078|XM_005244726.1:c.306-110_306-109insA|||||| GT:AD:DP:GQ:PL 0/1:24,21:45:99:838,0,889 0/1:17,19:36:99:744,0,603 0/0:42,0:43:99:0,126,1717
package nl.lumc.sasc.biopet.tools
import java.nio.file.Paths
import org.scalatest.Matchers
import org.scalatest.testng.TestNGSuite
import org.testng.annotations.Test
/**
* Created by pjvan_thof on 12-12-16.
*/
class ValidateVcfTest extends TestNGSuite with Matchers {
private def resourcePath(p: String): String =
Paths.get(getClass.getResource(p).toURI).toString
@Test
def testMain(): Unit = {
noException shouldBe thrownBy {
ValidateVcf.main(Array("-i", resourcePath("/chrQ2.vcf"), "-R", resourcePath("/fake_chrQ.fa")))
}
an[IllegalArgumentException] shouldBe thrownBy {
ValidateVcf.main(Array("-i", resourcePath("/chrQ_wrong_contig.vcf"), "-R", resourcePath("/fake_chrQ.fa")))
}
noException shouldBe thrownBy {
ValidateVcf.main(Array("-i", resourcePath("/chrQ_wrong_contig.vcf"), "-R", resourcePath("/fake_chrQ.fa"), "--disableFail"))
}
}
}
......@@ -58,6 +58,7 @@ All other values should be provided in the config. Specific config values toward
| readgroup_sequencing_center | String (optional) | Read group sequencing center |
| readgroup_description | String (optional) | Read group description |
| predicted_insertsize | Integer (optional) | Read group predicted insert size |
| keep_final_bam_file | Boolean (default true) | when needed the pipeline can remove the bam file after it's not required anymore for other jobs |
It is possible to provide any config value as a command line argument as well, using the `-cv` flag.
E.g. `-cv reference=<path/to/reference>` would set value `reference`.
......
......@@ -55,6 +55,27 @@ With that in mind, an example configuration using mode `standard` of the VepNorm
}
~~~
Multiple global configurations
------------------------------
It's possible to make a global config with multiple version of cache / vep in there. This way a user only needs to supply the `vep_version` config value.
Gloabl config:
``` yaml
vep_settings:
86:
vep_script: <path_to_exe>
dir: <path_to_cache>
75:
vep_script: <path_to_exe>
dir: <path_to_cache>
```
User config:
``` yaml
vep_version: 75 or 86
```
Varda
-----
Annotation with a [Varda](http://varda.readthedocs.org/en/latest/) database instance is possible.
......
......@@ -93,19 +93,23 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S
/** Readgroup predicted insert size */
protected var predictedInsertsize: Option[Int] = config("predicted_insertsize")
val keepFinalBamFile: Boolean = config("keep_final_bam_file", default = true)
protected var paired: Boolean = false
val flexiprep = new Flexiprep(this)
def finalBamFile: File = new File(outputDir, outputName + ".final.bam")
def finalBamFile: File = if (skipMarkduplicates) {
new File(outputDir, outputName + ".bam")
} else new File(outputDir, outputName + ".dedup.bam")
/** location of summary file */
def summaryFile = new File(outputDir, sampleId.getOrElse("x") + "-" + libId.getOrElse("x") + ".summary.json")
override def defaults = Map(
override def defaults: Map[String, Any] = Map(
"gsnap" -> Map("batch" -> 4),
"star" -> Map("outsamunmapped" -> "Within")
)
override def fixedValues = Map(
override def fixedValues: Map[String, Any] = Map(
"gsnap" -> Map("format" -> "sam"),
"bowtie" -> Map("sam" -> true)
)
......@@ -255,11 +259,13 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S
var bamFile = bamFiles.head
if (!skipMarkduplicates) {
bamFile = new File(outputDir, outputName + ".dedup.bam")
val md = MarkDuplicates(this, bamFiles, bamFile)
val md = MarkDuplicates(this, bamFiles, finalBamFile)
md.isIntermediate = !keepFinalBamFile
add(md)
addSummarizable(md, "mark_duplicates")
} else if (skipMarkduplicates && chunking) {
val mergeSamFile = MergeSamFiles(this, bamFiles, new File(outputDir, outputName + ".merge.bam"))
val mergeSamFile = MergeSamFiles(this, bamFiles, finalBamFile)
mergeSamFile.isIntermediate = !keepFinalBamFile
add(mergeSamFile)
bamFile = mergeSamFile.output
}
......@@ -270,9 +276,7 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S
addSummaryQScript(bamMetrics)
}
add(Ln(this, swapExt(outputDir, bamFile, ".bam", ".bai"), swapExt(outputDir, finalBamFile, ".bam", ".bai")))
add(Ln(this, bamFile, finalBamFile))
outputFiles += ("finalBamFile" -> finalBamFile.getAbsoluteFile)
outputFiles += ("finalBamFile" -> finalBamFile)
if (config("unmapped_to_gears", default = false).asBoolean) {
val gears = new GearsSingle(this)
......@@ -331,7 +335,7 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S
}
val sortSam = SortSam(this, samFile, output)
if (chunking || !skipMarkduplicates) sortSam.isIntermediate = true
sortSam.isIntermediate = chunking || !skipMarkduplicates || !keepFinalBamFile
add(sortSam)
sortSam.output
}
......@@ -345,7 +349,7 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S
val sortSam = new SortSam(this)
sortSam.output = output
val pipe = bwaCommand | sortSam
pipe.isIntermediate = chunking || !skipMarkduplicates
pipe.isIntermediate = chunking || !skipMarkduplicates || !keepFinalBamFile
pipe.threadsCorrection = -1
add(pipe)
output
......@@ -363,6 +367,7 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S
val ar = addAddOrReplaceReadGroups(reorderSam.output, output)
val pipe = new BiopetFifoPipe(this, gsnapCommand :: ar._1 :: reorderSam :: Nil)
pipe.threadsCorrection = -2
pipe.isIntermediate = chunking || !skipMarkduplicates || !keepFinalBamFile
add(pipe)
ar._2
}
......@@ -386,7 +391,7 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S
val sortSam = new SortSam(this)
sortSam.output = output
val pipe = hisat2 | sortSam
pipe.isIntermediate = chunking || !skipMarkduplicates
pipe.isIntermediate = chunking || !skipMarkduplicates || !keepFinalBamFile
pipe.threadsCorrection = 1
add(pipe)
......@@ -430,9 +435,11 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S
val reorderSam = new ReorderSam(this)
reorderSam.input = mergeSamFile.output
reorderSam.output = swapExt(output.getParent, output, ".merge.bam", ".reordered.bam")
reorderSam.isIntermediate = true
add(reorderSam)
val ar = addAddOrReplaceReadGroups(reorderSam.output, output)
ar._1.isIntermediate = chunking || !skipMarkduplicates || !keepFinalBamFile
add(ar._1)
ar._2
}
......@@ -459,7 +466,7 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S
stampyCmd.isIntermediate = true
add(stampyCmd)
val sortSam = SortSam(this, stampyCmd.output, output)
if (chunking || !skipMarkduplicates) sortSam.isIntermediate = true
sortSam.isIntermediate = chunking || !skipMarkduplicates || !keepFinalBamFile
add(sortSam)
sortSam.output
}
......@@ -478,6 +485,7 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S
val ar = addAddOrReplaceReadGroups(bowtie.output, output)
val pipe = new BiopetFifoPipe(this, (Some(bowtie) :: Some(ar._1) :: Nil).flatten)
pipe.threadsCorrection = -1
pipe.isIntermediate = chunking || !skipMarkduplicates || !keepFinalBamFile
add(pipe)
ar._2
}
......@@ -495,7 +503,7 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S
val sortSam = new SortSam(this)
sortSam.output = output
val pipe = bowtie2 | sortSam
pipe.isIntermediate = chunking || !skipMarkduplicates
pipe.isIntermediate = chunking || !skipMarkduplicates || !keepFinalBamFile
pipe.threadsCorrection = -1
add(pipe)
output
......@@ -517,6 +525,7 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S
pipe.threadsCorrection = -3
zcatR1._1.foreach(x => pipe.threadsCorrection -= 1)
zcatR2.foreach(_._1.foreach(x => pipe.threadsCorrection -= 1))
pipe.isIntermediate = chunking || !skipMarkduplicates || !keepFinalBamFile
add(pipe)
reorderSam.output
}
......@@ -531,6 +540,7 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S
val starCommand = Star._2pass(this, zcatR1._2, zcatR2.map(_._2), outputDir, isIntermediate = true)
addAll(starCommand._2)
val ar = addAddOrReplaceReadGroups(starCommand._1, output)
ar._1.isIntermediate = chunking || !skipMarkduplicates || !keepFinalBamFile
add(ar._1)
ar._2
}
......@@ -547,7 +557,7 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S
addOrReplaceReadGroups.RGSM = sampleId.get
if (readgroupSequencingCenter.isDefined) addOrReplaceReadGroups.RGCN = readgroupSequencingCenter.get
if (readgroupDescription.isDefined) addOrReplaceReadGroups.RGDS = readgroupDescription.get
if (!skipMarkduplicates) addOrReplaceReadGroups.isIntermediate = true
addOrReplaceReadGroups.isIntermediate = chunking || !skipMarkduplicates || !keepFinalBamFile
(addOrReplaceReadGroups, addOrReplaceReadGroups.output)
}
......
......@@ -83,10 +83,10 @@ trait MultisampleMappingTrait extends MultiSampleQScript
"merge_strategy" -> mergeStrategy.toString)
def makeSample(id: String) = new Sample(id)
class Sample(sampleId: String) extends AbstractSample(sampleId) {
class Sample(sampleId: String) extends AbstractSample(sampleId) { sample =>
def makeLibrary(id: String) = new Library(id)
class Library(libId: String) extends AbstractLibrary(libId) {
class Library(libId: String) extends AbstractLibrary(libId) { lib =>
/** By default the bams files are put in the summary, more files can be added here */
def summaryFiles: Map[String, File] = (inputR1.map("input_R1" -> _) :: inputR2.map("input_R2" -> _) ::
......@@ -101,22 +101,28 @@ trait MultisampleMappingTrait extends MultiSampleQScript
lazy val bamToFastq: Boolean = config("bam_to_fastq", default = false)
lazy val correctReadgroups: Boolean = config("correct_readgroups", default = false)
lazy val mapping = if (inputR1.isDefined || (inputBam.isDefined && bamToFastq)) {
val m = new Mapping(qscript)
def keepFinalBamfile = samples(sampleId).libraries.size == 1
lazy val mapping: Option[Mapping] = if (inputR1.isDefined || (inputBam.isDefined && bamToFastq)) {
val m: Mapping = new Mapping(qscript) {
override def configNamespace = "mapping"
override def defaults: Map[String, Any] = super.defaults ++
Map("keep_final_bamfile" -> keepFinalBamfile)
}
m.sampleId = Some(sampleId)
m.libId = Some(libId)
m.outputDir = libDir
Some(m)
} else None
def bamFile = mapping match {
def bamFile: Option[File] = mapping match {
case Some(m) => Some(m.finalBamFile)
case _ if inputBam.isDefined => Some(new File(libDir, s"$sampleId-$libId.bam"))
case _ => None
}
/** By default the preProcessBam is the same as the normal bamFile. A pipeline can extend this is there are preprocess steps */
def preProcessBam = bamFile
def preProcessBam: Option[File] = bamFile
/** This method can be extended to add jobs to the pipeline, to do this the super call of this function must be called by the pipelines */
def addJobs(): Unit = {
......
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.pipelines.shiva
import java.io.File
import org.broadinstitute.gatk.queue.function.InProcessFunction
import org.broadinstitute.gatk.utils.commandline.Input
import scala.io.Source
/**
* This class checks results of [[nl.lumc.sasc.biopet.tools.ValidateVcf]] and aborts the pipeline when a error was been found
*
* Created by pjvanthof on 16/08/15.
*/
class CheckValidateVcf extends InProcessFunction {
@Input(required = true)
var inputLogFile: File = _
/** Exits whenever the input md5sum is not the same as the output md5sum */
def run: Unit = {