Commit 3cf8e117 authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Merge remote-tracking branch 'remotes/origin/develop' into feature-general-refactor-biopet

parents c8f56975 9087090a
......@@ -135,6 +135,17 @@ To view all possible config options please navigate to our Gitlab wiki page
Since Shiva uses the [Mapping](mapping.md) pipeline internally, mapping config values can be specified as well.
For all the options, please see the corresponding documentation for the mapping pipeline.
### Exome variant calling
If one calls variants with Shiva on exome samples and a ```amplicon_bed``` file is available, the user is able to add this file to the config file.
When the file is given, the coverage over the positions in the bed file will be calculated plus the number of variants on each position. If there is an interest
in a specific region of the genome/exome one is capable to give multiple ```regionOfInterest.bed``` files with the option ```regions_of_interest``` (in list/array format).
A short recap: the option ```amplicon_bed``` can only be given one time and should be composed of the amplicon kit used to obtain the exome data.
The option ```regions_of_interest``` can contain multiple bed files in ```list``` format and can contain any region a user wants. If multiple regions are given,
the pipeline will make an coverage plot over each bed file separately.
### Modes
Shiva furthermore supports three modes. The default and recommended option is `multisample_variantcalling`.
......
......@@ -83,13 +83,14 @@ trait BiopetCommandLineFunction extends CommandLineResources { biopetFunction =>
/** Set default output file, threads and vmem for current job */
final def internalBeforeGraph(): Unit = {
pipesJobs.foreach(_.beforeGraph())
pipesJobs.foreach(_.internalBeforeGraph())
_pipesJobs.foreach(_.beforeGraph())
_pipesJobs.foreach(_.internalBeforeGraph())
}
/**
* Can override this value is executable may not be converted to CanonicalPath
*
* @deprecated
*/
val executableToCanonicalPath = true
......@@ -121,6 +122,7 @@ trait BiopetCommandLineFunction extends CommandLineResources { biopetFunction =>
/**
* This operator sends stdout to `that` and combine this into 1 command line function
*
* @param that Function that will read from stdin
* @return BiopetPipe function
*/
......@@ -141,6 +143,7 @@ trait BiopetCommandLineFunction extends CommandLineResources { biopetFunction =>
/**
* This operator can be used to give a program a file as stdin
*
* @param file File that will become stdin for this program
* @return It's own class
*/
......@@ -152,6 +155,7 @@ trait BiopetCommandLineFunction extends CommandLineResources { biopetFunction =>
/**
* This operator can be used to give a program a file write it's atdout
*
* @param file File that will become stdout for this program
* @return It's own class
*/
......@@ -169,6 +173,7 @@ trait BiopetCommandLineFunction extends CommandLineResources { biopetFunction =>
/**
* This function needs to be implemented to define the command that is executed
*
* @return Command to run
*/
protected[core] def cmdLine: String
......@@ -176,6 +181,7 @@ trait BiopetCommandLineFunction extends CommandLineResources { biopetFunction =>
/**
* implementing a final version of the commandLine from org.broadinstitute.gatk.queue.function.CommandLineFunction
* User needs to implement cmdLine instead
*
* @return Command to run
*/
override final def commandLine: String = {
......@@ -187,10 +193,11 @@ trait BiopetCommandLineFunction extends CommandLineResources { biopetFunction =>
cmd
}
private[core] var pipesJobs: List[BiopetCommandLineFunction] = Nil
private[core] var _pipesJobs: List[BiopetCommandLineFunction] = Nil
def pipesJobs = _pipesJobs
def addPipeJob(job: BiopetCommandLineFunction) {
pipesJobs :+= job
pipesJobs = pipesJobs.distinct
_pipesJobs :+= job
_pipesJobs = _pipesJobs.distinct
}
}
......
......@@ -67,8 +67,8 @@ class BiopetFifoPipe(val root: Configurable,
deps :::= inputs.values.toList.flatten.filter(!fifoFiles.contains(_))
deps = deps.distinct
pipesJobs :::= commands
pipesJobs = pipesJobs.distinct
_pipesJobs :::= commands
_pipesJobs = _pipesJobs.distinct
}
override def beforeCmd(): Unit = {
......
......@@ -41,7 +41,7 @@ class BiopetPipe(val commands: List[BiopetCommandLineFunction]) extends BiopetCo
case e: Exception => Nil
}
pipesJobs :::= commands
_pipesJobs :::= commands
override def beforeGraph() {
super.beforeGraph()
......@@ -61,7 +61,7 @@ class BiopetPipe(val commands: List[BiopetCommandLineFunction]) extends BiopetCo
}
override def setResources(): Unit = {
combineResources(pipesJobs)
combineResources(_pipesJobs)
}
override def setupRetry(): Unit = {
......
......@@ -37,7 +37,92 @@ class Freebayes(val root: Configurable) extends BiopetCommandLineFunction with R
@Output(required = true)
var outputVcf: File = null
@Input(required = false)
var bam_list: Option[File] = config("bam_list")
@Input(required = false)
var targets: Option[File] = config("targets")
@Input(required = false)
var samples: Option[File] = config("samples")
@Input(required = false)
var populations: Option[File] = config("populations")
@Input(required = false)
var cnv_map: Option[File] = config("cnv_map")
@Input(required = false)
var trace: Option[File] = config("trace")
@Input(required = false)
var failed_alleles: Option[File] = config("failed_alleles")
@Input(required = false)
var observation_bias: Option[File] = config("observation_bias")
@Input(required = false)
var contamination_estimates: Option[File] = config("contamination_estimates")
@Input(required = false)
var variant_input: Option[File] = config("variant_input")
@Input(required = false)
var haplotype_basis_alleles: Option[File] = config("haplotype_basis_alleles")
var pvar: Option[Int] = config("pvar")
var theta: Option[Int] = config("theta")
var ploidy: Option[Int] = config("ploidy")
var use_best_n_alleles: Option[Int] = config("use_best_n_alleles")
var max_complex_gap: Option[Int] = config("max_complex_gap")
var min_repeat_size: Option[Int] = config("min_repeat_size")
var min_repeat_entropy: Option[Int] = config("min_repeat_entropy")
var read_mismatch_limit: Option[Int] = config("read_mismatch_limit")
var read_max_mismatch_fraction: Option[Int] = config("read_max_mismatch_fraction")
var read_snp_limit: Option[Int] = config("read_snp_limit")
var read_indel_limit: Option[Int] = config("read_indel_limit")
var min_alternate_fraction: Option[Double] = config("min_alternate_fraction")
var min_alternate_count: Option[Int] = config("min_alternate_count")
var min_alternate_qsum: Option[Int] = config("min_alternate_qsum")
var min_alternate_total: Option[Int] = config("min_alternate_total")
var min_coverage: Option[Int] = config("min_coverage")
var genotyping_max_iterations: Option[Int] = config("genotyping_max_iterations")
var genotyping_max_banddepth: Option[Int] = config("genotyping_max_banddepth")
var genotype_variant_threshold: Option[Int] = config("genotype_variant_threshold")
var read_dependence_factor: Option[Int] = config("read_dependence_factor")
var min_mapping_quality: Option[Double] = config("min_mapping_quality")
var min_base_quality: Option[Double] = config("min_base_quality")
var min_supporting_allele_qsum: Option[Double] = config("min_supporting_allele_qsum")
var min_supporting_mapping_qsum: Option[Double] = config("min_supporting_mapping_qsum")
var mismatch_base_quality_threshold: Option[Double] = config("mismatch_base_quality_threshold")
var base_quality_cap: Option[Double] = config("base_quality_cap")
var prob_contamination: Option[Double] = config("prob_contamination")
var only_use_input_alleles: Boolean = config("only_use_input_alleles", default = false)
var report_all_haplotype_alleles: Boolean = config("report_all_haplotype_alleles", default = false)
var report_monomorphic: Boolean = config("report_monomorphic", default = false)
var pooled_discrete: Boolean = config("pooled_discrete", default = false)
var pooled_continuous: Boolean = config("pooled_continuous", default = false)
var use_reference_allele: Boolean = config("use_reference_allele", default = false)
var no_snps: Boolean = config("no_snps", default = false)
var no_indels: Boolean = config("no_indels", default = false)
var no_mnps: Boolean = config("no_mnps", default = false)
var no_complex: Boolean = config("no_complex", default = false)
var no_partial_observations: Boolean = config("no_partial_observations", default = false)
var dont_left_align_indels: Boolean = config("dont_left_align_indels", default = false)
var use_duplicate_reads: Boolean = config("use_duplicate_reads", default = false)
var standard_filters: Boolean = config("standard_filters", default = false)
var no_population_priors: Boolean = config("no_population_priors", default = false)
var hwe_priors_off: Boolean = config("hwe_priors_off", default = false)
var binomial_obs_priors_off: Boolean = config("binomial_obs_priors_off", default = false)
var allele_balance_priors_off: Boolean = config("allele_balance_priors_off", default = false)
var legacy_gls: Boolean = config("legacy_gls", default = false)
var report_genotype_likelihood_max: Boolean = config("report_genotype_likelihood_max", default = false)
var exclude_unobserved_genotypes: Boolean = config("exclude_unobserved_genotypes", default = false)
var use_mapping_quality: Boolean = config("use_mapping_quality", default = false)
var harmonic_indel_quality: Boolean = config("harmonic_indel_quality", default = false)
var genotype_qualities: Boolean = config("genotype_qualities", default = false)
var debug: Boolean = config("debug", default = logger.isDebugEnabled)
var haplotypeLength: Option[Int] = config("haplotype_length")
executable = config("exe", default = "freebayes")
......@@ -52,7 +137,70 @@ class Freebayes(val root: Configurable) extends BiopetCommandLineFunction with R
def cmdLine = executable +
required("--fasta-reference", reference) +
repeat("--bam", bamfiles) +
optional("--vcf", outputVcf) +
optional("--bam-list", bam_list) +
optional("--targets", targets) +
optional("--samples", samples) +
optional("--populations", populations) +
optional("--cnv-map", cnv_map) +
optional("--trace", trace) +
optional("--failed-alleles", failed_alleles) +
optional("--observation-bias", observation_bias) +
optional("--contamination-estimates", contamination_estimates) +
optional("--variant-input", variant_input) +
optional("--haplotype-basis-alleles", haplotype_basis_alleles) +
optional("--pvar", pvar) +
optional("--theta", theta) +
optional("--ploidy", ploidy) +
optional("--haplotype-length", haplotypeLength)
optional("--use-best-n-alleles", use_best_n_alleles) +
optional("--max-complex-gap", max_complex_gap) +
optional("--min-repeat-size", min_repeat_size) +
optional("--min-repeat-entropy", min_repeat_entropy) +
optional("--read-mismatch-limit", read_mismatch_limit) +
optional("--read-max-mismatch-fraction", read_max_mismatch_fraction) +
optional("--read-snp-limit", read_snp_limit) +
optional("--read-indel-limit", read_indel_limit) +
optional("--min-alternate-fraction", min_alternate_fraction) +
optional("--min-alternate-count", min_alternate_count) +
optional("--min-alternate-qsum", min_alternate_qsum) +
optional("--min-alternate-total", min_alternate_total) +
optional("--min-coverage", min_coverage) +
optional("--genotyping-max-iterations", genotyping_max_iterations) +
optional("--genotyping-max-banddepth", genotyping_max_banddepth) +
optional("--genotype-variant-threshold", genotype_variant_threshold) +
optional("--read-dependence-factor", read_dependence_factor) +
optional("--min-mapping-quality", min_mapping_quality) +
optional("--min-base-quality", min_base_quality) +
optional("--min-supporting-allele-qsum", min_supporting_allele_qsum) +
optional("--min-supporting-mapping-qsum", min_supporting_mapping_qsum) +
optional("--mismatch-base-quality-threshold", mismatch_base_quality_threshold) +
optional("--base-quality-cap", base_quality_cap) +
optional("--prob-contamination", prob_contamination) +
conditional(only_use_input_alleles, "--only-use-input-alleles") +
conditional(report_all_haplotype_alleles, "--report-all-haplotype-alleles") +
conditional(report_monomorphic, "--report-monomorphic") +
conditional(pooled_discrete, "--pooled-discrete") +
conditional(pooled_continuous, "--pooled-continuous") +
conditional(use_reference_allele, "--use-reference-allele") +
conditional(no_snps, "--no-snps") +
conditional(no_indels, "--no-indels") +
conditional(no_mnps, "--no-mnps") +
conditional(no_complex, "--no-complex") +
conditional(no_partial_observations, "--no-partial-observations") +
conditional(dont_left_align_indels, "--dont-left-align-indels") +
conditional(use_duplicate_reads, "--use-duplicate-reads") +
conditional(standard_filters, "--standard-filters") +
conditional(no_population_priors, "--no-population-priors") +
conditional(hwe_priors_off, "--hwe-priors-off") +
conditional(binomial_obs_priors_off, "--binomial-obs-priors-off") +
conditional(allele_balance_priors_off, "--allele-balance-priors-off") +
conditional(legacy_gls, "--legacy-gls") +
conditional(report_genotype_likelihood_max, "--report-genotype-likelihood-max") +
conditional(exclude_unobserved_genotypes, "--exclude-unobserved-genotypes") +
conditional(use_mapping_quality, "--use-mapping-quality") +
conditional(harmonic_indel_quality, "--harmonic-indel-quality") +
conditional(genotype_qualities, "--genotype-qualities") +
conditional(debug, "--debug") +
optional("--haplotype-length", haplotypeLength) +
(if (inputAsStdin) required("--stdin") else "") +
(if (outputAsStsout) "" else optional("--vcf", outputVcf))
}
......@@ -148,6 +148,8 @@ class VariantEffectPredictor(val root: Configurable) extends BiopetCommandLineFu
// ought to be a flag, but is BUG in VEP; becomes numeric ("1" is true)
var failed: Option[Int] = config("failed")
override def defaultCoreMemory = 4.0
override def beforeGraph(): Unit = {
super.beforeGraph()
if (!cache && !database) {
......@@ -155,6 +157,7 @@ class VariantEffectPredictor(val root: Configurable) extends BiopetCommandLineFu
} else if (cache && dir.isEmpty) {
Logging.addError("Must supply dir to cache for VariantEffectPredictor")
}
if (stats_text) outputFiles :+= new File(output.getAbsolutePath + "_summary.txt")
}
/** Returns command to execute */
......
......@@ -135,7 +135,12 @@ class Bowtie2(val root: Configurable) extends BiopetCommandLineFunction with Ref
val indexDir = new File(bowtieIndex).getParentFile
val basename = bowtieIndex.stripPrefix(indexDir.getPath + File.separator)
if (indexDir.exists()) {
if (!indexDir.list().toList.filter(_.startsWith(basename)).exists(_.endsWith(".bt2")))
if (!indexDir.list()
.toList
.filter(_.startsWith(basename))
.exists({ p =>
p.endsWith(".bt2") || p.endsWith(".bt2l")
}))
Logging.addError(s"No index files found for bowtie2 in: $indexDir with basename: $basename")
}
}
......
......@@ -53,7 +53,7 @@ class Kraken(val root: Configurable) extends BiopetCommandLineFunction with Vers
def versionCommand = executable + " --version"
override def defaultCoreMemory = 15.0
override def defaultCoreMemory = 17.0
override def defaultThreads = 4
......
......@@ -17,8 +17,8 @@ class PickClosedReferenceOtus(val root: Configurable) extends BiopetCommandLineF
var outputDir: File = null
override def defaultThreads = 2
override def defaultCoreMemory = 10.0
override def defaultThreads = 3
override def defaultCoreMemory = 12.0
def versionCommand = executable + " --version"
def versionRegex = """Version: (.*)""".r
......
......@@ -14,14 +14,19 @@
def getPlot(read:String) = {
summary.getLibraryValue(sampleId.get, libId.get, "flexiprep", "files", read, plot, "path").collect {
case value => {
val file = new File(value.toString)
case path => {
val file = new File(path.toString)
val newFile = new File(outputDir, read + "_" + file.getName)
if (file.exists()) FileUtils.copyFile(file, newFile)
newFile.getName
}
}
}
def plotAvailable(read:String) = {
new File(summary.getLibraryValue(sampleId.get, libId.get, "flexiprep", "files", read, plot, "path").get.toString).exists()
}
}#
<div class="row">
......@@ -32,11 +37,19 @@
<div class="row">
<div class="col-md-1"><b>R1</b></div>
<div class="col-md-5">
#if (plotAvailable( "fastqc_R1" ))
<img class="img-responsive" src="${getPlot("fastqc_R1")}" />
#else
Image was not generated by FastQC
#end
</div>
#if (!skipTrim || !skipClip)
<div class="col-md-5">
#if (plotAvailable( "fastqc_R1_qc" ))
<img class="img-responsive" src="${getPlot("fastqc_R1_qc")}" />
#else
Image was not generated by FastQC
#end
</div>
#end
</div>
......@@ -44,11 +57,19 @@
<div class="row">
<div class="col-md-1"><b>R2</b></div>
<div class="col-md-5">
#if (plotAvailable( "fastqc_R2" ))
<img class="img-responsive" src="${getPlot("fastqc_R2")}" />
#else
Image was not generated by FastQC
#end
</div>
#if (!skipTrim || !skipClip)
<div class="col-md-5">
#if (plotAvailable( "fastqc_R2_qc" ))
<img class="img-responsive" src="${getPlot("fastqc_R2_qc")}" />
#else
Image was not generated by FastQC
#end
</div>
#end
</div>
......
......@@ -62,7 +62,7 @@ object FlexiprepReport extends ReportBuilder {
fastqcPlotSection("Sequence quality", "plot_per_sequence_quality"),
fastqcPlotSection("Base GC content", "plot_per_base_gc_content"),
fastqcPlotSection("Sequence GC content", "plot_per_sequence_gc_content"),
fastqcPlotSection("Base seqeunce content", "plot_per_base_sequence_content"),
fastqcPlotSection("Base sequence content", "plot_per_base_sequence_content"),
fastqcPlotSection("Duplication", "plot_duplication_levels"),
fastqcPlotSection("Kmers", "plot_kmer_profiles"),
fastqcPlotSection("Length distribution", "plot_sequence_length_distribution")
......@@ -71,7 +71,7 @@ object FlexiprepReport extends ReportBuilder {
)
protected def fastqcPlotSection(name: String, tag: String) = {
name -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepFastaqcPlot.ssp", Map("plot" -> tag))
name -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepFastQcPlot.ssp", Map("plot" -> tag))
}
/**
......
......@@ -14,15 +14,8 @@ class Freebayes(val root: Configurable) extends Variantcaller {
val fb = new nl.lumc.sasc.biopet.extensions.Freebayes(this)
fb.bamfiles = inputBams.values.toList
fb.outputVcf = new File(outputDir, namePrefix + ".freebayes.vcf")
fb.isIntermediate = true
add(fb)
add(fb | new Bgzip(this) > outputFile)
//TODO: need piping for this, see also issue #114
val bz = new Bgzip(this)
bz.input = List(fb.outputVcf)
bz.output = outputFile
add(bz)
add(Tabix.apply(this, bz.output))
add(Tabix.apply(this, outputFile))
}
}
......@@ -18,16 +18,17 @@ package nl.lumc.sasc.biopet.pipelines.shiva
import java.io.{ File, FileOutputStream }
import com.google.common.io.Files
import nl.lumc.sasc.biopet.utils.config.Config
import nl.lumc.sasc.biopet.core.BiopetPipe
import nl.lumc.sasc.biopet.extensions.Freebayes
import nl.lumc.sasc.biopet.extensions.bcftools.{ BcftoolsCall, BcftoolsMerge }
import nl.lumc.sasc.biopet.extensions.gatk.CombineVariants
import nl.lumc.sasc.biopet.extensions.tools.VcfFilter
import nl.lumc.sasc.biopet.extensions.tools.{ MpileupToVcf, VcfFilter }
import nl.lumc.sasc.biopet.utils.ConfigUtils
import org.apache.commons.io.FileUtils
import nl.lumc.sasc.biopet.utils.config.Config
import org.broadinstitute.gatk.queue.QSettings
import org.scalatest.Matchers
import org.scalatest.testng.TestNGSuite
import org.testng.annotations.{ AfterClass, DataProvider, Test }
import org.testng.annotations.{ DataProvider, Test }
import scala.collection.mutable.ListBuffer
......@@ -88,11 +89,13 @@ class ShivaVariantcallingTest extends TestNGSuite with Matchers {
pipeline.init()
pipeline.script()
val pipesJobs = pipeline.functions.filter(_.isInstanceOf[BiopetPipe]).flatMap(_.asInstanceOf[BiopetPipe].pipesJobs)
pipeline.functions.count(_.isInstanceOf[CombineVariants]) shouldBe (1 + (if (raw) 1 else 0) + (if (varscanCnsSinglesample) 1 else 0))
//pipeline.functions.count(_.isInstanceOf[Bcftools]) shouldBe (if (bcftools) 1 else 0)
//FIXME: Can not check for bcftools because of piping
pipeline.functions.count(_.isInstanceOf[Freebayes]) shouldBe (if (freebayes) 1 else 0)
//pipeline.functions.count(_.isInstanceOf[MpileupToVcf]) shouldBe (if (raw) bams else 0)
pipesJobs.count(_.isInstanceOf[BcftoolsCall]) shouldBe (if (bcftools) 1 else 0) + (if (bcftoolsSinglesample) bams else 0)
pipeline.functions.count(_.isInstanceOf[BcftoolsMerge]) shouldBe (if (bcftoolsSinglesample && bams > 1) 1 else 0)
pipesJobs.count(_.isInstanceOf[Freebayes]) shouldBe (if (freebayes) 1 else 0)
pipesJobs.count(_.isInstanceOf[MpileupToVcf]) shouldBe (if (raw) bams else 0)
pipeline.functions.count(_.isInstanceOf[VcfFilter]) shouldBe (if (raw) bams else 0)
}
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment