Commit 5c5ae1cd authored by Peter van 't Hof's avatar Peter van 't Hof

Merge remote-tracking branch 'remotes/origin/fix-BIOPET-750' into fix-BIOPET-750

parents 3b61a8de e32832d3
......@@ -420,11 +420,3 @@ trait CommandLineGATK extends BiopetJavaCommandLineFunction with Reference with
optional("-l", logging_level) +
optional("-log", log_to_file)
}
object CommandLineGATK {
def isFileWithTag(file: File, tag: String): Boolean = file match {
case f:TaggedFile => f.tag == tag
case _ => false
}
}
\ No newline at end of file
......@@ -2,29 +2,13 @@ package nl.lumc.sasc.biopet.extensions.gatk
import java.io.File
import nl.lumc.sasc.biopet.extensions.gatk.CommandLineGATK.isFileWithTag
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile
import org.broadinstitute.gatk.utils.commandline.{Argument, Input, Output}
class ContEst(val parent: Configurable) extends CommandLineGATK {
def analysis_type: String = "ContEst"
/** Getter and setter for tumor sample bam file. */
def tumorSampleBam: File = input_file.find(file => isFileWithTag(file, "eval")).orNull
def tumorSampleBam_= (value:File):Unit = {
input_file = input_file.filterNot(file => isFileWithTag(file, "eval"))
input_file :+= TaggedFile(value, "eval")
}
/** Getter and setter for normal sample bam file. */
def normalSampleBam: File = input_file.find(file => isFileWithTag(file, "genotype")).orNull
def normalSampleBam_= (value:File):Unit = {
input_file = input_file.filterNot(file => isFileWithTag(file, "genotype"))
input_file :+= TaggedFile(value, "genotype")
}
/** Variant file containing information about the population allele frequencies. */
@Input(fullName = "popfile", shortName="pf", required = true)
var popFile: File = config("popfile")
......@@ -89,13 +73,3 @@ class ContEst(val parent: Configurable) extends CommandLineGATK {
optional("--trim_fraction", trimFraction)
}
object ContEst {
def apply(parent: Configurable, tumorSampleBam: File, normalSampleBam: File, output: File): ContEst = {
val conest = new ContEst(parent)
conest.tumorSampleBam = tumorSampleBam
conest.normalSampleBam = normalSampleBam
conest.output = output
conest
}
}
\ No newline at end of file
......@@ -17,11 +17,10 @@ package nl.lumc.sasc.biopet.tools.vcfstats
import java.io.{File, FileOutputStream, IOException, PrintWriter}
import nl.lumc.sasc.biopet.tools.vcfstats.Stats.plotHeatmap
import nl.lumc.sasc.biopet.tools.vcfstats.VcfStats.{getClass, logger, sampleDistributions}
import scala.collection.mutable
import nl.lumc.sasc.biopet.tools.vcfstats.VcfStats.logger
import nl.lumc.sasc.biopet.utils.{ConfigUtils, sortAnyAny}
import scala.collection.mutable
import scala.sys.process.{Process, ProcessLogger}
/**
......@@ -161,26 +160,32 @@ case class Stats(generalStats: mutable.Map[String, mutable.Map[Any, Int]] = muta
def writeAllOutput(outputDir: File,
samples: List[String],
genotypeFields: List[String] = Nil,
infoFields: List[String] = Nil,
sampleDistributions: List[String] = Nil): Unit = {
genotypeFields: List[String],
infoFields: List[String],
sampleDistributions: List[String],
contig: Option[String]): Unit = {
outputDir.mkdirs()
this.writeToFile(new File(outputDir, "stats.json"),
samples,
genotypeFields,
infoFields,
sampleDistributions)
sampleDistributions,
contig)
writeOverlap(outputDir, samples)
}
def writeToFile(outputFile: File,
samples: List[String],
genotypeFields: List[String] = Nil,
infoFields: List[String] = Nil,
sampleDistributions: List[String] = Nil): Unit = {
genotypeFields: List[String],
infoFields: List[String],
sampleDistributions: List[String],
contig: Option[String]): Unit = {
val allWriter = new PrintWriter(outputFile)
val json = ConfigUtils.mapToJson(
this.getStatsAsMap(samples, genotypeFields, infoFields, sampleDistributions))
val map = this.getStatsAsMap(samples, genotypeFields, infoFields, sampleDistributions)
val json = contig match {
case Some(c) => ConfigUtils.mapToJson(Map("contigs" -> Map(c -> map)))
case _ => ConfigUtils.mapToJson(Map("total" -> map))
}
allWriter.println(json.nospaces)
allWriter.close()
}
......
......@@ -89,7 +89,8 @@ object VcfStats extends ToolCommand {
samples,
adGenotypeTags,
adInfoTags,
sampleDistributions))
sampleDistributions,
None))
regionStats
.flatMap(_._2)
......@@ -100,7 +101,8 @@ object VcfStats extends ToolCommand {
samples,
adGenotypeTags,
adInfoTags,
sampleDistributions)
sampleDistributions,
Some(k))
}
regionStats.unpersist()
......@@ -159,7 +161,8 @@ object VcfStats extends ToolCommand {
samples,
adGenotypeTags,
adInfoTags,
sampleDistributions)
sampleDistributions,
Some(bedRecord.chr))
None
}
} else Future.successful(Some(bedRecord.chr -> stats))
......
......@@ -246,24 +246,24 @@ class VcfStatsTest extends TestNGSuite with Matchers {
def testCheckGeneral(): Unit = {
val record = new VCFFileReader(new File(resourcePath("/chrQ.vcf.gz"))).iterator().next()
val blah = checkGeneral(record, List())
blah.get("SampleDistribution-NonInformative") shouldEqual Some(Map(0 -> 1))
blah.get("SampleDistribution-Called") shouldEqual Some(Map(3 -> 1))
blah.get("SampleDistribution-Mixed") shouldEqual Some(Map(0 -> 1))
blah.get("SampleDistribution-Hom") shouldEqual Some(Map(1 -> 1))
blah.get("SampleDistribution-HomRef") shouldEqual Some(Map(1 -> 1))
blah.get("SampleDistribution-Available") shouldEqual Some(Map(3 -> 1))
blah.get("QUAL") shouldEqual Some(Map(1541 -> 1))
blah.get("SampleDistribution-HetNonRef") shouldEqual Some(Map(0 -> 1))
blah.get("SampleDistribution-Het") shouldEqual Some(Map(2 -> 1))
blah.get("SampleDistribution-NoCall") shouldEqual Some(Map(0 -> 1))
blah.get("SampleDistribution-Filtered") shouldEqual Some(Map(0 -> 1))
blah.get("SampleDistribution-HomVar") shouldEqual Some(Map(0 -> 1))
blah.get("SampleDistribution-Variant") shouldEqual Some(Map(2 -> 1))
blah.get("general") should not be empty
val general = blah("general")
val generalStats = checkGeneral(record, List())
generalStats.get("SampleDistribution-NonInformative") shouldEqual Some(Map(0 -> 1))
generalStats.get("SampleDistribution-Called") shouldEqual Some(Map(3 -> 1))
generalStats.get("SampleDistribution-Mixed") shouldEqual Some(Map(0 -> 1))
generalStats.get("SampleDistribution-Hom") shouldEqual Some(Map(1 -> 1))
generalStats.get("SampleDistribution-HomRef") shouldEqual Some(Map(1 -> 1))
generalStats.get("SampleDistribution-Available") shouldEqual Some(Map(3 -> 1))
generalStats.get("QUAL") shouldEqual Some(Map(1541 -> 1))
generalStats.get("SampleDistribution-HetNonRef") shouldEqual Some(Map(0 -> 1))
generalStats.get("SampleDistribution-Het") shouldEqual Some(Map(2 -> 1))
generalStats.get("SampleDistribution-NoCall") shouldEqual Some(Map(0 -> 1))
generalStats.get("SampleDistribution-Filtered") shouldEqual Some(Map(0 -> 1))
generalStats.get("SampleDistribution-HomVar") shouldEqual Some(Map(0 -> 1))
generalStats.get("SampleDistribution-Variant") shouldEqual Some(Map(2 -> 1))
generalStats.get("general") should not be empty
val general = generalStats("general")
general.get("PolymorphicInSamples") shouldEqual Some(1)
general.get("ComplexIndel") shouldEqual Some(0)
......@@ -285,7 +285,7 @@ class VcfStatsTest extends TestNGSuite with Matchers {
general.get("Symbolic") shouldEqual Some(0)
general.get("SimpleInsertion") shouldEqual Some(1)
val total = blah
val total = generalStats
total.get("SampleDistribution-NonInformative") shouldEqual Some(Map(0 -> 1))
total.get("SampleDistribution-Called") shouldEqual Some(Map(3 -> 1))
total.get("SampleDistribution-Mixed") shouldEqual Some(Map(0 -> 1))
......@@ -300,8 +300,8 @@ class VcfStatsTest extends TestNGSuite with Matchers {
total.get("SampleDistribution-HomVar") shouldEqual Some(Map(0 -> 1))
total.get("SampleDistribution-Variant") shouldEqual Some(Map(2 -> 1))
blah.get("general") should not be empty
val totGeneral = blah("general")
generalStats.get("general") should not be empty
val totGeneral = generalStats("general")
totGeneral.get("PolymorphicInSamples") shouldEqual Some(1)
totGeneral.get("ComplexIndel") shouldEqual Some(0)
......@@ -330,17 +330,17 @@ class VcfStatsTest extends TestNGSuite with Matchers {
val genotype = record.getGenotype(0)
val blah = checkGenotype(record, genotype, List())
val genotypeStats = checkGenotype(record, genotype, List())
blah.get("GQ") shouldEqual Some(Map(99 -> 1))
blah.get("AD") shouldEqual Some(Map(24 -> 1, 21 -> 1))
blah.get("AD-used") shouldEqual Some(Map(24 -> 1, 21 -> 1))
blah.get("DP") shouldEqual Some(Map(45 -> 1))
blah.get("AD-alt") shouldEqual Some(Map(21 -> 1))
blah.get("AD-ref") shouldEqual Some(Map(24 -> 1))
blah.get("general") should not be empty
genotypeStats.get("GQ") shouldEqual Some(Map(99 -> 1))
genotypeStats.get("AD") shouldEqual Some(Map(24 -> 1, 21 -> 1))
genotypeStats.get("AD-used") shouldEqual Some(Map(24 -> 1, 21 -> 1))
genotypeStats.get("DP") shouldEqual Some(Map(45 -> 1))
genotypeStats.get("AD-alt") shouldEqual Some(Map(21 -> 1))
genotypeStats.get("AD-ref") shouldEqual Some(Map(24 -> 1))
genotypeStats.get("general") should not be empty
val general = blah("general")
val general = genotypeStats("general")
general.get("Hom") shouldEqual Some(0)
general.get("NoCall") shouldEqual Some(0)
general.get("Variant") shouldEqual Some(1)
......@@ -355,7 +355,7 @@ class VcfStatsTest extends TestNGSuite with Matchers {
general.get("Het") shouldEqual Some(1)
general.get("HetNonRef") shouldEqual Some(0)
val total = blah
val total = genotypeStats
total.get("GQ") shouldEqual Some(Map(99 -> 1))
total.get("AD") shouldEqual Some(Map(24 -> 1, 21 -> 1))
total.get("AD-used") shouldEqual Some(Map(24 -> 1, 21 -> 1))
......@@ -364,7 +364,7 @@ class VcfStatsTest extends TestNGSuite with Matchers {
total.get("AD-ref") shouldEqual Some(Map(24 -> 1))
total.get("general") should not be empty
val totGeneral = blah("general")
val totGeneral = genotypeStats("general")
totGeneral.get("Hom") shouldEqual Some(0)
totGeneral.get("NoCall") shouldEqual Some(0)
totGeneral.get("Variant") shouldEqual Some(1)
......
......@@ -82,6 +82,10 @@ unifiedgenotyper:
## Supported variant callers
At this moment the following variant callers can be used
##### Germline
When doing variant calling most often germline is used. This will detect variants based on the assumption that there is a fixed number of alleles. Mostly the default used is a ploidy of 2. When this assumption does not hold for your data, somatic variant calling can be a better solution.
| ConfigName | Tool | Description |
| ---------- | ---- | ----------- |
| haplotypecaller_gvcf | <a href="https://www.broadinstitute.org/gatk/gatkdocs/org_broadinstitute_gatk_tools_walkers_haplotypecaller_HaplotypeCaller.php">haplotypecaller</a> | Running HaplotypeCaller in gvcf mode |
......@@ -95,6 +99,31 @@ At this moment the following variant callers can be used
| bcftools_singlesample | <a href="https://samtools.github.io/bcftools/bcftools.html">bcftools</a> | |
| varscan_cns_singlesample | <a href="http://varscan.sourceforge.net/">varscan</a> | |
##### Somatic
In contrast to germline variant calling, somatic variant calling does not have a direct assumption about the number of alleles. Some can also take a control into account, like MuTect2. Having a control is useful when analysing tumor samples.
| ConfigName | Tool | Description |
| ---------- | ---- | ----------- |
| mutect2 | <a href="https://software.broadinstitute.org/gatk/gatkdocs/3.7-0/org_broadinstitute_gatk_tools_walkers_cancer_m2_MuTect2.php">MuTect2</a> | Running mutect2, requires tumor normal pairs |
| varscan_cns_singlesample | <a href="http://varscan.sourceforge.net/">varscan</a> | |
| raw | [Naive variant caller](../../tools/MpileupToVcf) | |
###### Config for tumor-normal pairs
To define the tumor-normal pairs, the config can look like this:
```yaml
samples:
sample1:
tags:
type: tumor
normal: sample2
sample2:
tags:
type: normal
```
## Config options
### Required settings
......
......@@ -242,7 +242,9 @@ class ShivaVariantcalling(val parent: Configurable)
def summarySettings = Map(
"variantcallers" -> configCallers.toList,
"regions_of_interest" -> roiBedFiles.map(_.getName),
"amplicon_bed" -> ampliconBedFile.map(_.getAbsolutePath)
"amplicon_bed" -> ampliconBedFile.map(_.getAbsolutePath),
"somatic_variant_calling" -> isSomaticVariantCallingConfigured,
"germline_variant_calling" -> isGermlineVariantCallingConfigured
)
/** Files for the summary */
......
......@@ -47,8 +47,8 @@ class MuTect2(val parent: Configurable) extends SomaticVariantCaller {
if (runConEst) {
val namePrefix = outputFile.getAbsolutePath.stripSuffix(".vcf.gz")
val contEst = new gatk.ContEst(this)
inputBams.get(pair.tumorSample).foreach(contEst.input_file :+= _)
inputBams.get(pair.normalSample).foreach(contEst.input_file :+= _)
inputBams.get(pair.tumorSample).foreach(contEst.input_file :+= TaggedFile(_, "eval"))
inputBams.get(pair.normalSample).foreach(contEst.input_file :+= TaggedFile(_, "genotype"))
contEst.output = new File(s"$namePrefix.contamination.txt")
contEst.BQSR = bqsrFile
add(contEst)
......
......@@ -19,7 +19,7 @@
*/
package nl.lumc.sasc.biopet.pipelines.shiva
import java.io.{File, FileOutputStream}
import java.io.{File, FileOutputStream, IOException}
import com.google.common.io.Files
import nl.lumc.sasc.biopet.core.BiopetPipe
......@@ -208,8 +208,15 @@ trait ShivaVariantcallingTestTrait extends TestNGSuite with Matchers {
}
// remove temporary run directory all tests in the class have been run
@AfterClass def removeTempOutputDir(): Unit = {
dirs.foreach(FileUtils.deleteDirectory)
@AfterClass def removeTempOutputDir() = {
dirs.filter(_.exists()).foreach { dir =>
try {
FileUtils.deleteDirectory(dir)
} catch {
case e: IOException if e.getMessage.startsWith("Unable to delete directory") =>
Logging.logger.error(e.getMessage)
}
}
}
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment