Commit d3b973b0 authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Merge branch 'develop' into feature-bamToBigWig

Conflicts:
	public/biopet-public-package/pom.xml
	public/biopet-public-package/src/main/scala/nl/lumc/sasc/biopet/core/BiopetExecutablePublic.scala
	public/pom.xml
parents 96534e9b cda615ea
......@@ -72,16 +72,16 @@ Global setting examples are:
#### Example settings config
~~~
{
"reference": "/data/LGTC/projects/vandoorn-melanoma/data/references/hg19_nohap/ucsc.hg19_nohap.fasta",
"dbsnp": "/data/LGTC/projects/vandoorn-melanoma/data/references/hg19_nohap/dbsnp_137.hg19_nohap.vcf",
"reference": "/references/hg19_nohap/ucsc.hg19_nohap.fasta",
"dbsnp": "/references/hg19_nohap/dbsnp_137.hg19_nohap.vcf",
"joint_variantcalling": false,
"haplotypecaller": { "scattercount": 100 },
"multisample": { "haplotypecaller": { "scattercount": 1000 } },
"picard": { "validationstringency": "LENIENT" },
"library_variantcalling_temp": true,
"target_bed_temp": "/data/LGTC/projects/vandoorn-melanoma/analysis/target.bed",
"target_bed_temp": "analysis/target.bed",
"min_dp": 5,
"bedtools": {"exe":"/share/isilon/system/local/BEDtools/bedtools-2.17.0/bin/bedtools"},
"bedtools": {"exe":"/BEDtools/bedtools-2.17.0/bin/bedtools"},
"bam_to_fastq": true,
"baserecalibrator": { "memory_limit": 8, "vmem":"16G" },
"samtofastq": {"memory_limit": 8, "vmem": "16G"},
......@@ -95,4 +95,4 @@ Global setting examples are:
### JSON validation
To check if the JSON file created is correct we can use multiple options the simplest way is using [this](http://jsonformatter.curiousconcept.com/)
website. It is also possible to use Python or Scala for validating but this requires some more knowledge.
\ No newline at end of file
website. It is also possible to use Python or Scala for validating but this requires some more knowledge.
......@@ -33,7 +33,7 @@ class Basty(val root: Configurable) extends QScript with MultiSampleQScript {
def makeSample(id: String) = new Sample(id)
class Sample(sampleId: String) extends AbstractSample(sampleId) {
def makeLibrary(id: String) = new Library(id)
class Library(libraryId: String) extends AbstractLibrary(libraryId) {
class Library(libId: String) extends AbstractLibrary(libId) {
protected def addJobs(): Unit = {}
}
......@@ -41,7 +41,7 @@ class Basty(val root: Configurable) extends QScript with MultiSampleQScript {
var outputSnps: FastaOutput = _
protected def addJobs(): Unit = {
addLibsJobs()
addPerLibJobs()
output = addGenerateFasta(sampleId, sampleDir)
outputSnps = addGenerateFasta(sampleId, sampleDir, snpsOnly = true)
}
......@@ -56,11 +56,13 @@ class Basty(val root: Configurable) extends QScript with MultiSampleQScript {
gatkPipeline.biopetScript
addAll(gatkPipeline.functions)
addSamplesJobs()
}
def addMultiSampleJobs(): Unit = {
val refVariants = addGenerateFasta(null, outputDir + "reference/", outputName = "reference")
val refVariantSnps = addGenerateFasta(null, outputDir + "reference/", outputName = "reference", snpsOnly = true)
addSamplesJobs()
val catVariants = Cat(this, refVariants.variants :: samples.map(_._2.output.variants).toList, outputDir + "fastas/variant.fasta")
add(catVariants)
val catVariantsSnps = Cat(this, refVariantSnps.variants :: samples.map(_._2.outputSnps.variants).toList, outputDir + "fastas/variant.snps_only.fasta")
......@@ -122,13 +124,14 @@ class Basty(val root: Configurable) extends QScript with MultiSampleQScript {
val gubbins = new RunGubbins(this)
gubbins.fastafile = concensusVariants
gubbins.startingTree = raxmlBi.getBipartitionsFile
gubbins.startingTree = Some(raxmlBi.getBipartitionsFile)
gubbins.outputDirectory = outputDir + dirSufixGubbins
add(gubbins)
}
addTreeJobs(catVariantsSnps.output, catConsensusVariantsSnps.output, outputDir + "trees" + File.separator + "snps_only", "snps_only")
addTreeJobs(catVariants.output, catConsensusVariants.output, outputDir + "trees" + File.separator + "snps_indels", "snps_indels")
}
def addGenerateFasta(sampleName: String, outputDir: String, outputName: String = null,
......
......@@ -12,7 +12,7 @@ class BaseRecalibrator(val root: Configurable) extends org.broadinstitute.gatk.q
memoryLimit = Option(4)
override val defaultVmem = "8G"
if (config.contains("scattercount")) scatterCount = config("scattercount")
if (config.contains("scattercount")) scatterCount = config("scattercount", default = 1)
if (config.contains("dbsnp")) knownSites :+= new File(config("dbsnp").asString)
if (config.contains("known_sites")) knownSites :+= new File(config("known_sites").asString)
}
......
......@@ -19,7 +19,7 @@ trait GatkGeneral extends CommandLineGATK with BiopetJavaCommandLineFunction {
if (config.contains("intervals")) intervals = config("intervals").asFileList
if (config.contains("exclude_intervals")) excludeIntervals = config("exclude_intervals").asFileList
reference_sequence = config("reference")
gatk_key = config("gatk_key")
reference_sequence = config("reference", required = true)
if (config.contains("gatk_key")) gatk_key = config("gatk_key")
if (config.contains("pedigree")) pedigree = config("pedigree").asFileList
}
......@@ -9,40 +9,40 @@ import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.utils.variant.GATKVCFIndexType
class HaplotypeCaller(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.HaplotypeCaller with GatkGeneral {
override def afterGraph {
super.afterGraph
min_mapping_quality_score = config("minMappingQualityScore", default = 20)
if (config.contains("scattercount")) scatterCount = config("scattercount")
if (config.contains("dbsnp")) this.dbsnp = config("dbsnp")
this.sample_ploidy = config("ploidy")
nct = config("threads", default = 1)
bamOutput = config("bamOutput")
memoryLimit = Option(nct.getOrElse(1) * 2)
if (config.contains("allSitePLs")) this.allSitePLs = config("allSitePLs")
if (config.contains("output_mode")) {
import org.broadinstitute.gatk.tools.walkers.genotyper.OutputMode._
config("output_mode").asString match {
case "EMIT_ALL_CONFIDENT_SITES" => output_mode = EMIT_ALL_CONFIDENT_SITES
case "EMIT_ALL_SITES" => output_mode = EMIT_ALL_SITES
case "EMIT_VARIANTS_ONLY" => output_mode = EMIT_VARIANTS_ONLY
case e => logger.warn("output mode '" + e + "' does not exist")
}
min_mapping_quality_score = config("minMappingQualityScore", default = 20)
scatterCount = config("scattercount", default = 1)
if (config.contains("dbsnp")) this.dbsnp = config("dbsnp")
this.sample_ploidy = config("ploidy")
if (config.contains("bamOutput")) bamOutput = config("bamOutput")
if (config.contains("allSitePLs")) allSitePLs = config("allSitePLs")
if (config.contains("output_mode")) {
import org.broadinstitute.gatk.tools.walkers.genotyper.OutputMode._
config("output_mode").asString match {
case "EMIT_ALL_CONFIDENT_SITES" => output_mode = EMIT_ALL_CONFIDENT_SITES
case "EMIT_ALL_SITES" => output_mode = EMIT_ALL_SITES
case "EMIT_VARIANTS_ONLY" => output_mode = EMIT_VARIANTS_ONLY
case e => logger.warn("output mode '" + e + "' does not exist")
}
}
if (config("inputtype", default = "dna").asString == "rna") {
dontUseSoftClippedBases = config("dontusesoftclippedbases", default = true)
stand_call_conf = config("stand_call_conf", default = 5)
stand_emit_conf = config("stand_emit_conf", default = 0)
} else {
dontUseSoftClippedBases = config("dontusesoftclippedbases", default = false)
stand_call_conf = config("stand_call_conf", default = 5)
stand_emit_conf = config("stand_emit_conf", default = 0)
}
if (config("inputtype", default = "dna").asString == "rna") {
dontUseSoftClippedBases = config("dontusesoftclippedbases", default = true)
stand_call_conf = config("stand_call_conf", default = 5)
stand_emit_conf = config("stand_emit_conf", default = 0)
} else {
dontUseSoftClippedBases = config("dontusesoftclippedbases", default = false)
stand_call_conf = config("stand_call_conf", default = 5)
stand_emit_conf = config("stand_emit_conf", default = 0)
}
override def afterGraph {
super.afterGraph
if (bamOutput != null && nct.getOrElse(1) > 1) {
nct = Option(1)
threads = 1
logger.warn("BamOutput is on, nct/threads is forced to set on 1, this option is only for debug")
}
nct = Some(threads)
memoryLimit = Option(memoryLimit.getOrElse(2.0) * nct.getOrElse(1))
}
def useGvcf() {
......
......@@ -36,17 +36,16 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri
var singleSampleCalling = config("single_sample_calling", default = true)
var reference: File = config("reference", required = true)
var dbsnp: File = config("dbsnp")
var useAllelesOption: Boolean = config("use_alleles_option", default = false)
val externalGvcfs = config("external_gvcfs_files", default = Nil).asFileList
def makeSample(id: String) = new Sample(id)
class Sample(sampleId: String) extends AbstractSample(sampleId) {
def makeLibrary(id: String) = new Library(id)
class Library(libraryId: String) extends AbstractLibrary(libraryId) {
class Library(libId: String) extends AbstractLibrary(libId) {
val mapping = new Mapping(qscript)
mapping.sampleId = sampleId
mapping.libraryId = libraryId
mapping.libId = libId
mapping.outputDir = libDir + "/variantcalling/"
/** Library variantcalling */
......@@ -67,12 +66,12 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri
if (!bamFile.exists) throw new IllegalStateException("Bam in config does not exist, file: " + bamFile)
if (config("bam_to_fastq", default = false).asBoolean) {
val samToFastq = SamToFastq(qscript, bamFile, libDir + sampleId + "-" + libraryId + ".R1.fastq",
libDir + sampleId + "-" + libraryId + ".R2.fastq")
val samToFastq = SamToFastq(qscript, bamFile, libDir + sampleId + "-" + libId + ".R1.fastq",
libDir + sampleId + "-" + libId + ".R2.fastq")
samToFastq.isIntermediate = true
qscript.add(samToFastq)
mapping.input_R1 = samToFastq.fastqR1
mapping.input_R2 = samToFastq.fastqR2
mapping.input_R2 = Some(samToFastq.fastqR2)
mapping.init
mapping.biopetScript
addAll(mapping.functions) // Add functions of mapping to curent function pool
......@@ -83,17 +82,17 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri
val header = inputSam.getFileHeader.getReadGroups
for (readGroup <- inputSam.getFileHeader.getReadGroups) {
if (readGroup.getSample != sampleId) logger.warn("Sample ID readgroup in bam file is not the same")
if (readGroup.getLibrary != libraryId) logger.warn("Library ID readgroup in bam file is not the same")
if (readGroup.getSample != sampleId || readGroup.getLibrary != libraryId) readGroupOke = false
if (readGroup.getLibrary != libId) logger.warn("Library ID readgroup in bam file is not the same")
if (readGroup.getSample != sampleId || readGroup.getLibrary != libId) readGroupOke = false
}
inputSam.close
if (!readGroupOke) {
if (config("correct_readgroups", default = false)) {
logger.info("Correcting readgroups, file:" + bamFile)
val aorrg = AddOrReplaceReadGroups(qscript, bamFile, new File(libDir + sampleId + "-" + libraryId + ".bam"))
aorrg.RGID = sampleId + "-" + libraryId
aorrg.RGLB = libraryId
val aorrg = AddOrReplaceReadGroups(qscript, bamFile, new File(libDir + sampleId + "-" + libId + ".bam"))
aorrg.RGID = sampleId + "-" + libId
aorrg.RGLB = libId
aorrg.RGSM = sampleId
aorrg.isIntermediate = true
qscript.add(aorrg)
......@@ -106,7 +105,7 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri
Some(bamFile)
}
} else {
logger.error("Sample: " + sampleId + ": No R1 found for run: " + libraryId)
logger.error("Sample: " + sampleId + ": No R1 found for run: " + libId)
None
}
......@@ -127,7 +126,7 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri
gatkVariantcalling.outputDir = sampleDir + "/variantcalling/"
protected def addJobs(): Unit = {
addLibsJobs()
addPerLibJobs()
gatkVariantcalling.inputBams = libraries.map(_._2.mapping.finalBamFile).toList
gatkVariantcalling.preProcesBams = false
if (!singleSampleCalling) {
......@@ -150,10 +149,11 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri
override def configPath: List[String] = super.configPath ::: "multisample" :: Nil
}
def biopetScript() {
addSamplesJobs
def biopetScript(): Unit = {
addSamplesJobs()
}
//SampleWide jobs
def addMultiSampleJobs(): Unit = {
val gvcfFiles: List[File] = if (mergeGvcfs && externalGvcfs.size + samples.size > 1) {
val newFile = outputDir + "merged.gvcf.vcf.gz"
add(CombineGVCFs(this, externalGvcfs ++ samples.map(_._2.gatkVariantcalling.scriptOutput.gvcfFile), newFile))
......
......@@ -7,7 +7,7 @@ package nl.lumc.sasc.biopet.pipelines.gatk
import nl.lumc.sasc.biopet.core.{ BiopetQScript, PipelineCommand }
import java.io.File
import nl.lumc.sasc.biopet.tools.{ MpileupToVcf, VcfFilter, MergeAlleles }
import nl.lumc.sasc.biopet.tools.{ VcfStats, MpileupToVcf, VcfFilter, MergeAlleles }
import nl.lumc.sasc.biopet.core.config.Configurable
import nl.lumc.sasc.biopet.extensions.gatk.{ AnalyzeCovariates, BaseRecalibrator, GenotypeGVCFs, HaplotypeCaller, IndelRealigner, PrintReads, RealignerTargetCreator, SelectVariants, CombineVariants, UnifiedGenotyper }
import nl.lumc.sasc.biopet.extensions.picard.MarkDuplicates
......@@ -32,9 +32,6 @@ class GatkVariantcalling(val root: Configurable) extends QScript with BiopetQScr
@Argument(doc = "Reference", shortName = "R", required = false)
var reference: File = config("reference", required = true)
@Argument(doc = "Dbsnp", shortName = "dbsnp", required = false)
var dbsnp: File = config("dbsnp")
@Argument(doc = "OutputName", required = false)
var outputName: String = _
......@@ -53,7 +50,7 @@ class GatkVariantcalling(val root: Configurable) extends QScript with BiopetQScr
def init() {
if (outputName == null && sampleID != null) outputName = sampleID
else if (outputName == null) outputName = "noname"
else if (outputName == null) outputName = config("output_name", default = "noname")
if (outputDir == null) throw new IllegalStateException("Missing Output directory on gatk module")
else if (!outputDir.endsWith("/")) outputDir += "/"
......@@ -200,6 +197,12 @@ class GatkVariantcalling(val root: Configurable) extends QScript with BiopetQScr
val cvFinal = CombineVariants(this, mergeList.toList, outputDir + outputName + ".final.vcf.gz")
cvFinal.genotypemergeoption = org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils.GenotypeMergeType.UNSORTED
add(cvFinal)
val vcfStats = new VcfStats(this)
vcfStats.input = cvFinal.out
vcfStats.setOutputDir(outputDir + File.separator + "vcfstats")
add(vcfStats)
scriptOutput.finalVcfFile = cvFinal.out
}
}
......
/**
* Due to the license issue with GATK, this part of Biopet can only be used inside the
* LUMC. Please refer to https://git.lumc.nl/biopet/biopet/wikis/home for instructions
* on how to use this protected part of biopet or contact us at sasc@lumc.nl
*/
package nl.lumc.sasc.biopet.pipelines.gatk
import nl.lumc.sasc.biopet.core.{ BiopetQScript, PipelineCommand }
import java.io.File
import nl.lumc.sasc.biopet.core.config.Configurable
import nl.lumc.sasc.biopet.extensions.gatk.CombineVariants
import nl.lumc.sasc.biopet.extensions.gatk.SelectVariants
import nl.lumc.sasc.biopet.extensions.gatk.VariantEval
import org.broadinstitute.gatk.queue.QScript
import org.broadinstitute.gatk.utils.commandline.{ Input, Argument }
class GatkVcfSampleCompare(val root: Configurable) extends QScript with BiopetQScript {
def this() = this(null)
@Input(doc = "Sample vcf file(s)", shortName = "V")
var vcfFiles: List[File] = _
@Argument(doc = "Reference", shortName = "R", required = false)
var reference: File = config("reference")
@Argument(doc = "Target bed", shortName = "targetBed", required = false)
var targetBed: List[File] = Nil
@Argument(doc = "Samples", shortName = "sample", required = false)
var samples: List[String] = Nil
var vcfFile: File = _
var sampleVcfs: Map[String, File] = Map()
def generalSampleDir = outputDir + "samples/"
def init() {
if (config.contains("target_bed"))
for (bed <- config("target_bed").asList)
targetBed :+= bed.toString
if (outputDir == null) throw new IllegalStateException("Missing Output directory on gatk module")
else if (!outputDir.endsWith("/")) outputDir += "/"
}
def biopetScript() {
vcfFile = if (vcfFiles.size > 1) {
val combineVariants = CombineVariants(this, vcfFiles, outputDir + "merge.vcf")
add(combineVariants)
combineVariants.out
} else vcfFiles.head
for (sample <- samples) {
sampleVcfs += (sample -> new File(generalSampleDir + sample + File.separator + sample + ".vcf"))
val selectVariants = SelectVariants(this, vcfFile, sampleVcfs(sample))
selectVariants.sample_name = Seq(sample)
selectVariants.excludeNonVariants = true
add(selectVariants)
}
val sampleCompareMetrics = new SampleCompareMetrics(this)
sampleCompareMetrics.samples = samples
sampleCompareMetrics.sampleDir = generalSampleDir
sampleCompareMetrics.snpRelFile = outputDir + "compare.snp.rel.tsv"
sampleCompareMetrics.snpAbsFile = outputDir + "compare.snp.abs.tsv"
sampleCompareMetrics.indelRelFile = outputDir + "compare.indel.rel.tsv"
sampleCompareMetrics.indelAbsFile = outputDir + "compare.indel.abs.tsv"
sampleCompareMetrics.totalFile = outputDir + "total.tsv"
for ((sample, sampleVcf) <- sampleVcfs) {
val sampleDir = generalSampleDir + sample + File.separator
for ((compareSample, compareSampleVcf) <- sampleVcfs) {
val variantEval = VariantEval(this,
sampleVcf,
compareSampleVcf,
new File(sampleDir + sample + "-" + compareSample + ".eval.txt"),
Seq("VariantType", "CompRod"),
Seq("CompOverlap")
)
if (targetBed != null) variantEval.L = targetBed
add(variantEval)
sampleCompareMetrics.deps ::= variantEval.out
}
}
add(sampleCompareMetrics)
}
}
object GatkVcfSampleCompare extends PipelineCommand
/**
* Due to the license issue with GATK, this part of Biopet can only be used inside the
* LUMC. Please refer to https://git.lumc.nl/biopet/biopet/wikis/home for instructions
* on how to use this protected part of biopet or contact us at sasc@lumc.nl
*/
package nl.lumc.sasc.biopet.pipelines.gatk
import java.io.File
import java.io.PrintWriter
import nl.lumc.sasc.biopet.core.BiopetJavaCommandLineFunction
import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.utils.R.RScriptExecutor
import org.broadinstitute.gatk.utils.commandline.{ Output, Argument }
import scala.io.Source
import org.broadinstitute.gatk.utils.R.{ RScriptLibrary, RScriptExecutor }
import org.broadinstitute.gatk.utils.io.Resource
import scala.collection.mutable.Map
import scala.math._
class SampleCompareMetrics(val root: Configurable) extends BiopetJavaCommandLineFunction {
javaMainClass = getClass.getName
@Argument(doc = "Sample Dir", shortName = "sampleDir", required = true)
var sampleDir: String = _
@Argument(doc = "Samples", shortName = "sample", required = true)
var samples: List[String] = Nil
@Argument(doc = "File sufix", shortName = "sufix", required = false)
var fileSufix: String = _
@Output(doc = "snpRelFile", shortName = "snpRelFile", required = true)
var snpRelFile: File = _
@Output(doc = "snpAbsFile", shortName = "snpAbsFile", required = true)
var snpAbsFile: File = _
@Output(doc = "indelRelFile", shortName = "indelRelFile", required = true)
var indelRelFile: File = _
@Output(doc = "indelAbsFile", shortName = "indelAbsFile", required = true)
var indelAbsFile: File = _
@Output(doc = "totalFile", shortName = "totalFile", required = true)
var totalFile: File = _
override val defaultVmem = "8G"
memoryLimit = Option(4.0)
override def commandLine = super.commandLine +
required("-sampleDir", sampleDir) +
repeat("-sample", samples) +
optional("-fileSufix", fileSufix) +
required("-snpRelFile", snpRelFile) +
required("-snpAbsFile", snpAbsFile) +
required("-indelRelFile", indelRelFile) +
required("-indelAbsFile", indelAbsFile) +
required("-totalFile", totalFile)
}
object SampleCompareMetrics {
var sampleDir: String = _
var samples: List[String] = Nil
var fileSufix: String = ".eval.txt"
var snpRelFile: File = _
var snpAbsFile: File = _
var indelRelFile: File = _
var indelAbsFile: File = _
var totalFile: File = _
/**
* @param args the command line arguments
*/
def main(args: Array[String]): Unit = {
for (t <- 0 until args.size) {
args(t) match {
case "-sample" => samples +:= args(t + 1)
case "-sampleDir" => sampleDir = args(t + 1)
case "-fileSufix" => fileSufix = args(t + 1)
case "-snpRelFile" => snpRelFile = new File(args(t + 1))
case "-snpAbsFile" => snpAbsFile = new File(args(t + 1))
case "-indelRelFile" => indelRelFile = new File(args(t + 1))
case "-indelAbsFile" => indelAbsFile = new File(args(t + 1))
case "-totalFile" => totalFile = new File(args(t + 1))
case _ =>
}
}
if (sampleDir == null) throw new IllegalStateException("No sampleDir, use -sampleDir")
else if (!sampleDir.endsWith("/")) sampleDir += "/"
val regex = """\W+""".r
val snpsOverlap: Map[(String, String), Int] = Map()
val indelsOverlap: Map[(String, String), Int] = Map()
val snpsTotal: Map[String, Int] = Map()
val indelsTotal: Map[String, Int] = Map()
for (sample1 <- samples; sample2 <- samples) {
val reader = Source.fromFile(new File(sampleDir + sample1 + "/" + sample1 + "-" + sample2 + fileSufix))
for (line <- reader.getLines) {
regex.split(line) match {
case Array(_, _, _, varType, all, novel, overlap, rate, _*) => {
varType match {
case "SNP" => {
snpsOverlap += (sample1, sample2) -> overlap.toInt
snpsTotal += sample1 -> all.toInt
}
case "INDEL" => {
indelsOverlap += (sample1, sample2) -> overlap.toInt
indelsTotal += sample1 -> all.toInt
}
case _ =>
}
}
case _ =>
}
}
reader.close()
}
val snpRelWritter = new PrintWriter(snpRelFile)
val snpAbsWritter = new PrintWriter(snpAbsFile)
val indelRelWritter = new PrintWriter(indelRelFile)
val indelAbsWritter = new PrintWriter(indelAbsFile)
val allWritters = List(snpRelWritter, snpAbsWritter, indelRelWritter, indelAbsWritter)
for (writter <- allWritters) writter.println(samples.mkString("\t", "\t", ""))
for (sample1 <- samples) {
for (writter <- allWritters) writter.print(sample1)
for (sample2 <- samples) {
snpRelWritter.print("\t" + (round((snpsOverlap(sample1, sample2).toDouble / snpsTotal(sample1) * 10000.0)) / 10000.0))
snpAbsWritter.print("\t" + snpsOverlap(sample1, sample2))
indelRelWritter.print("\t" + (round((indelsOverlap(sample1, sample2).toDouble / indelsTotal(sample1) * 10000.0)) / 10000.0))
indelAbsWritter.print("\t" + indelsOverlap(sample1, sample2))
}
for (writter <- allWritters) writter.println()
}
for (writter <- allWritters) writter.close()
val totalWritter = new PrintWriter(totalFile)
totalWritter.println("Sample\tSNPs\tIndels")
for (sample <- samples)
totalWritter.println(sample + "\t" + snpsTotal(sample) + "\t" + indelsTotal(sample))
totalWritter.close()
def plot(file: File) {
val executor = new RScriptExecutor
executor.addScript(new Resource("plotHeatmap.R", getClass))
executor.addArgs(file, file.getAbsolutePath.stripSuffix(".tsv") + ".png", file.getAbsolutePath.stripSuffix(".tsv") + ".clustering.png")
executor.exec()
}
plot(snpRelFile)
plot(indelRelFile)