Commit 827de641 authored by bow's avatar bow
Browse files

Merge branch 'feature-basty_generate_fasta' into 'develop'

Feature basty generate fasta

for #31

See merge request !51
parents 109ff2b3 cbcd5dd0
......@@ -37,6 +37,7 @@ object BiopetExecutable extends Logging {
nl.lumc.sasc.biopet.tools.SageCountFastq,
nl.lumc.sasc.biopet.tools.SageCreateLibrary,
nl.lumc.sasc.biopet.tools.SageCreateTagCounts,
nl.lumc.sasc.biopet.tools.BastyGenerateFasta,
nl.lumc.sasc.biopet.tools.MergeAlleles,
nl.lumc.sasc.biopet.tools.SamplesTsvToJson)
)
......
......@@ -9,6 +9,8 @@ trait BiopetJavaCommandLineFunction extends JavaCommandLineFunction with BiopetC
javaGCHeapFreeLimit = config("java_gc_heap_freelimit")
javaGCTimeLimit = config("java_gc_timelimit")
override def javaOpts = super.javaOpts + optional("-Dscala.concurrent.context.numThreads=", threads, spaceSeparated = false, escape = false)
override def afterGraph {
memoryLimit = config("memory_limit")
}
......
......@@ -7,9 +7,9 @@ import java.io.File
class Raxml(val root: Configurable) extends BiopetCommandLineFunction {
override val defaultThreads = 4
override val defaultThreads = 1
override def versionCommand = executable + " -v"
override val versionRegex = """.*version \w* .*""".r
override val versionRegex = """.*version ([\w\.]*) .*""".r
@Input(doc = "Input phy/fasta file", required = true)
var input: File = _
......@@ -32,8 +32,8 @@ class Raxml(val root: Configurable) extends BiopetCommandLineFunction {
@Argument(doc = "Name of output files", required = true)
var f: String = "d"
@Argument(doc = "Output directory", required = false)
var w: String = jobLocalDir.getAbsolutePath
@Argument(doc = "Output directory", required = true)
var w: String = _
@Input(required = false)
var t: File = _
......@@ -44,18 +44,35 @@ class Raxml(val root: Configurable) extends BiopetCommandLineFunction {
@Output(doc = "Output files", required = false)
private var out: List[File] = Nil
executable = config("exe", default = "raxmlHPC")
var executableNonThreads: String = config("exe", default = "raxmlHPC")
var executableThreads: String = config("exe_pthreads")
override def afterGraph {
if (threads == 0) threads = getThreads(defaultThreads)
executable = if (threads > 1 && executableThreads != null) executableThreads else executableNonThreads
super.afterGraph
out +:= getInfoFile
f match {
case "d" if b.isEmpty => out +:= getBestTree
case "d" if b.isDefined => out +:= getBootstrap
case "d" if b.isEmpty => {
out +:= getBestTreeFile
for (t <- 0 until N.getOrElse(1)) {
out +:= new File(w + File.separator + "RAxML_log." + n + ".RUN." + t)
out +:= new File(w + File.separator + "RAxML_parsimonyTree." + n + ".RUN." + t)
out +:= new File(w + File.separator + "RAxML_result." + n + ".RUN." + t)
}
}
case "d" if b.isDefined => out +:= getBootstrapFile
case "b" => {
out +:= new File(w + File.separator + "RAxML_bipartitionsBranchLabels." + n)
out +:= new File(w + File.separator + "RAxML_bipartitions." + n)
}
case _ =>
}
}
def getBestTree: File = new File(w + File.separator + "RAxML_bestTree." + n)
def getBootstrap: File = new File(w + File.separator + "RAxML_bootstrap." + n)
def getBestTreeFile: File = new File(w + File.separator + "RAxML_bestTree." + n)
def getBootstrapFile: File = new File(w + File.separator + "RAxML_bootstrap." + n)
def getInfoFile: File = new File(w + File.separator + "RAxML_info." + n)
def cmdLine = required(executable) +
required("-m", m) +
......@@ -63,9 +80,10 @@ class Raxml(val root: Configurable) extends BiopetCommandLineFunction {
optional("-p", p) +
optional("-b", b) +
optional("-N", N) +
optional("-n", n) +
optional("-w", w) +
optional("-f", f) +
optional("-t", t) +
optional("-z", z) +
(if (threads > 1) required("-T", threads) else "")
}
\ No newline at end of file
required("-T", threads)
}
package nl.lumc.sasc.biopet.pipelines.basty
import java.io.File
import nl.lumc.sasc.biopet.core.MultiSampleQScript
import nl.lumc.sasc.biopet.core.PipelineCommand
import nl.lumc.sasc.biopet.core.config.Configurable
import nl.lumc.sasc.biopet.extensions.Cat
import nl.lumc.sasc.biopet.extensions.Raxml
import nl.lumc.sasc.biopet.pipelines.gatk.GatkPipeline
import nl.lumc.sasc.biopet.tools.BastyGenerateFasta
import org.broadinstitute.gatk.queue.QScript
class Basty(val root: Configurable) extends QScript with MultiSampleQScript {
......@@ -12,15 +16,18 @@ class Basty(val root: Configurable) extends QScript with MultiSampleQScript {
class LibraryOutput extends AbstractLibraryOutput {
}
case class FastaOutput(variants: File, consensus: File, consensusVariants: File)
class SampleOutput extends AbstractSampleOutput {
var output: FastaOutput = _
var outputSnps: FastaOutput = _
}
defaults ++= Map("ploidy" -> 1, "use_haplotypecaller" -> false, "use_unifiedgenotyper" -> true)
defaults ++= Map("ploidy" -> 1, "use_haplotypecaller" -> false, "use_unifiedgenotyper" -> true, "joint_variantcalling" -> true)
var gatkPipeline: GatkPipeline = _
var gatkPipeline: GatkPipeline = new GatkPipeline(this)
gatkPipeline.jointVariantcalling = true
def init() {
gatkPipeline = new GatkPipeline(this)
gatkPipeline.outputDir = outputDir
gatkPipeline.init
}
......@@ -29,17 +36,82 @@ class Basty(val root: Configurable) extends QScript with MultiSampleQScript {
gatkPipeline.biopetScript
addAll(gatkPipeline.functions)
val refVariants = addGenerateFasta(null, outputDir + "reference/", outputName = "reference")
val refVariantSnps = addGenerateFasta(null, outputDir + "reference/", outputName = "reference", snpsOnly = true)
runSamplesJobs()
val catVariants = Cat(this, refVariants.variants :: samplesOutput.map(_._2.output.variants).toList, outputDir + "fastas/variant.fasta")
add(catVariants)
val catVariantsSnps = Cat(this, refVariantSnps.variants :: samplesOutput.map(_._2.outputSnps.variants).toList, outputDir + "fastas/variant.snps_only.fasta")
add(catVariantsSnps)
val catConsensus = Cat(this, refVariants.consensus :: samplesOutput.map(_._2.output.consensus).toList, outputDir + "fastas/consensus.fasta")
add(catConsensus)
val catConsensusSnps = Cat(this, refVariantSnps.consensus :: samplesOutput.map(_._2.outputSnps.consensus).toList, outputDir + "fastas/consensus.snps_only.fasta")
add(catConsensusSnps)
val catConsensusVariants = Cat(this, refVariants.consensusVariants :: samplesOutput.map(_._2.output.consensusVariants).toList, outputDir + "fastas/consensus.variant.fasta")
add(catConsensusVariants)
val catConsensusVariantsSnps = Cat(this, refVariantSnps.consensusVariants :: samplesOutput.map(_._2.outputSnps.consensusVariants).toList, outputDir + "fastas/consensus.variant.snps_only.fasta")
add(catConsensusVariantsSnps)
val seed: Int = config("seed", default = 12345)
def addRaxml(input: File, outputDir: String, outputName: String) {
val raxmlMl = new Raxml(this)
raxmlMl.input = input
raxmlMl.m = config("raxml_ml_model", default = "GTRGAMMAX")
raxmlMl.p = seed
raxmlMl.n = outputName + "_ml"
raxmlMl.w = outputDir
raxmlMl.N = config("ml_runs", default = 20, submodule = "raxml")
add(raxmlMl)
val r = new scala.util.Random(seed)
val numBoot = config("boot_runs", default = 100, submodule = "raxml").getInt
val bootList = for (t <- 0 until numBoot) yield {
val raxmlBoot = new Raxml(this)
raxmlBoot.threads = 1
raxmlBoot.input = input
raxmlBoot.m = config("raxml_ml_model", default = "GTRGAMMAX")
raxmlBoot.p = seed
raxmlBoot.b = math.abs(r.nextInt)
raxmlBoot.w = outputDir
raxmlBoot.N = 1
raxmlBoot.n = outputName + "_boot_" + t
add(raxmlBoot)
raxmlBoot.getBootstrapFile
}
val cat = Cat(this, bootList.toList, outputDir + "/boot_list")
add(cat)
val raxmlBi = new Raxml(this)
raxmlBi.input = input
raxmlBi.t = raxmlMl.getBestTreeFile
raxmlBi.z = cat.output
raxmlBi.m = config("raxml_ml_model", default = "GTRGAMMAX")
raxmlBi.p = seed
raxmlBi.f = "b"
raxmlBi.n = outputName + "_bi"
raxmlBi.w = outputDir
add(raxmlBi)
}
addRaxml(catVariantsSnps.output, outputDir + "raxml", "snps")
}
// Called for each sample
def runSingleSampleJobs(sampleConfig: Map[String, Any]): SampleOutput = {
val sampleOutput = new SampleOutput
val sampleID: String = sampleConfig("ID").toString
val sampleDir = globalSampleDir + sampleID
val sampleDir = globalSampleDir + sampleID + "/"
sampleOutput.libraries = runLibraryJobs(sampleConfig)
sampleOutput.output = addGenerateFasta(sampleID, sampleDir)
sampleOutput.outputSnps = addGenerateFasta(sampleID, sampleDir, snpsOnly = true)
return sampleOutput
}
......@@ -53,6 +125,23 @@ class Basty(val root: Configurable) extends QScript with MultiSampleQScript {
return libraryOutput
}
def addGenerateFasta(sampleName: String, outputDir: String, outputName: String = null,
snpsOnly: Boolean = false): FastaOutput = {
val bastyGenerateFasta = new BastyGenerateFasta(this)
bastyGenerateFasta.outputName = if (outputName != null) outputName else sampleName
bastyGenerateFasta.inputVcf = gatkPipeline.multisampleVariantcalling.scriptOutput.finalVcfFile
if (gatkPipeline.samplesOutput.contains(sampleName)) {
bastyGenerateFasta.bamFile = gatkPipeline.samplesOutput(sampleName).variantcalling.bamFiles.head
}
bastyGenerateFasta.outputVariants = outputDir + bastyGenerateFasta.outputName + ".variants" + (if (snpsOnly) ".snps_only" else "") + ".fasta"
bastyGenerateFasta.outputConsensus = outputDir + bastyGenerateFasta.outputName + ".consensus" + (if (snpsOnly) ".snps_only" else "") + ".fasta"
bastyGenerateFasta.outputConsensusVariants = outputDir + bastyGenerateFasta.outputName + ".consensus_variants" + (if (snpsOnly) ".snps_only" else "") + ".fasta"
bastyGenerateFasta.sampleName = sampleName
bastyGenerateFasta.snpsOnly = snpsOnly
add(bastyGenerateFasta)
return FastaOutput(bastyGenerateFasta.outputVariants, bastyGenerateFasta.outputConsensus, bastyGenerateFasta.outputConsensusVariants)
}
}
object Basty extends PipelineCommand
......@@ -59,6 +59,11 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri
else if (!outputDir.endsWith("/")) outputDir += "/"
}
val multisampleVariantcalling = new GatkVariantcalling(this) {
override protected lazy val configName = "gatkvariantcalling"
override def configPath: List[String] = "multisample" :: super.configPath
}
def biopetScript() {
if (onlySample.isEmpty) {
runSamplesJobs
......@@ -92,23 +97,19 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri
val cvRaw = CombineVariants(this, allRawVcfFiles.toList, outputDir + "variantcalling/multisample.raw.vcf.gz")
add(cvRaw)
val gatkVariantcalling = new GatkVariantcalling(this) {
override protected lazy val configName = "gatkvariantcalling"
override def configPath: List[String] = "multisample" :: super.configPath
}
gatkVariantcalling.preProcesBams = Some(false)
gatkVariantcalling.doublePreProces = Some(false)
gatkVariantcalling.inputBams = allBamfiles.toList
gatkVariantcalling.rawVcfInput = cvRaw.out
gatkVariantcalling.outputDir = outputDir + "variantcalling"
gatkVariantcalling.outputName = "multisample"
gatkVariantcalling.init
gatkVariantcalling.biopetScript
addAll(gatkVariantcalling.functions)
multisampleVariantcalling.preProcesBams = false
multisampleVariantcalling.doublePreProces = false
multisampleVariantcalling.inputBams = allBamfiles.toList
multisampleVariantcalling.rawVcfInput = cvRaw.out
multisampleVariantcalling.outputDir = outputDir + "variantcalling"
multisampleVariantcalling.outputName = "multisample"
multisampleVariantcalling.init
multisampleVariantcalling.biopetScript
addAll(multisampleVariantcalling.functions)
if (config("inputtype", default = "dna").getString != "rna" && config("recalibration", default = false).getBoolean) {
val recalibration = new GatkVariantRecalibration(this)
recalibration.inputVcf = gatkVariantcalling.scriptOutput.finalVcfFile
recalibration.inputVcf = multisampleVariantcalling.scriptOutput.finalVcfFile
recalibration.bamFiles = finalBamFiles
recalibration.outputDir = outputDir + "recalibration/"
recalibration.init
......@@ -134,10 +135,10 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri
val gatkVariantcalling = new GatkVariantcalling(this)
gatkVariantcalling.inputBams = libraryBamfiles
gatkVariantcalling.outputDir = sampleDir + "/variantcalling/"
gatkVariantcalling.preProcesBams = Some(false)
gatkVariantcalling.preProcesBams = false
if (!singleSampleCalling) {
gatkVariantcalling.useHaplotypecaller = Some(false)
gatkVariantcalling.useUnifiedGenotyper = Some(false)
gatkVariantcalling.useHaplotypecaller = false
gatkVariantcalling.useUnifiedGenotyper = false
}
gatkVariantcalling.sampleID = sampleID
gatkVariantcalling.init
......
......@@ -8,7 +8,7 @@ import nl.lumc.sasc.biopet.extensions.gatk.{ AnalyzeCovariates, BaseRecalibrator
import nl.lumc.sasc.biopet.extensions.picard.MarkDuplicates
import org.broadinstitute.gatk.queue.QScript
import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile
import org.broadinstitute.gatk.utils.commandline.{ Input, Output, Argument }
import org.broadinstitute.gatk.utils.commandline.{ Input, Argument }
import scala.collection.SortedMap
import scala.language.reflectiveCalls
......@@ -41,25 +41,32 @@ class GatkVariantcalling(val root: Configurable) extends QScript with BiopetQScr
var useHaplotypecaller: Option[Boolean] = config("use_haplotypecaller", default = true)
var useUnifiedGenotyper: Option[Boolean] = config("use_unifiedgenotyper", default = false)
var useAllelesOption: Option[Boolean] = config("use_alleles_option", default = false)
var useIndelRealigner: Boolean = config("use_indel_realign", default = true)
var useBaseRecalibration: Boolean = config("use_base_recalibration", default = true)
def init() {
if (outputName == null && sampleID != null) outputName = sampleID
else if (outputName == null) outputName = "noname"
if (outputDir == null) throw new IllegalStateException("Missing Output directory on gatk module")
else if (!outputDir.endsWith("/")) outputDir += "/"
val baseRecalibrator = new BaseRecalibrator(this)
if (preProcesBams && useBaseRecalibration && baseRecalibrator.knownSites.isEmpty) {
logger.warn("No Known site found, skipping base recalibration")
useBaseRecalibration = false
}
}
private def doublePreProces(files: List[File]): List[File] = {
if (files.size == 1) return files
if (files.isEmpty) throw new IllegalStateException("Files can't be empty")
if (!doublePreProces.get) return files
val markDub = MarkDuplicates(this, files, new File(outputDir + outputName + ".dedup.bam"))
if (dbsnp != null) {
add(markDub, isIntermediate = true)
List(addIndelRealign(markDub.output, outputDir, isIntermediate = false))
val markDup = MarkDuplicates(this, files, new File(outputDir + outputName + ".dedup.bam"))
add(markDup, isIntermediate = useIndelRealigner)
if (useIndelRealigner) {
List(addIndelRealign(markDup.output, outputDir, isIntermediate = false))
} else {
add(markDub, isIntermediate = true)
List(markDub.output)
List(markDup.output)
}
}
......@@ -67,8 +74,14 @@ class GatkVariantcalling(val root: Configurable) extends QScript with BiopetQScr
scriptOutput.bamFiles = if (preProcesBams.get) {
var bamFiles: List[File] = Nil
for (inputBam <- inputBams) {
var bamFile = addIndelRealign(inputBam, outputDir)
bamFiles :+= addBaseRecalibrator(bamFile, outputDir, isIntermediate = bamFiles.size > 1)
var bamFile = inputBam
if (useIndelRealigner) {
bamFile = addIndelRealign(bamFile, outputDir, isIntermediate = useBaseRecalibration)
}
if (useBaseRecalibration) {
bamFile = addBaseRecalibrator(bamFile, outputDir, isIntermediate = bamFiles.size > 1)
}
bamFiles :+= bamFile
}
doublePreProces(bamFiles)
} else if (inputBams.size > 1 && doublePreProces.get) {
......@@ -188,7 +201,7 @@ class GatkVariantcalling(val root: Configurable) extends QScript with BiopetQScr
}
def addBaseRecalibrator(inputBam: File, dir: String, isIntermediate: Boolean = false): File = {
val baseRecalibrator = BaseRecalibrator(this, inputBam, swapExt(dir, inputBam, ".bam", ".baserecal")) //with gatkArguments {
val baseRecalibrator = BaseRecalibrator(this, inputBam, swapExt(dir, inputBam, ".bam", ".baserecal"))
if (baseRecalibrator.knownSites.isEmpty) {
logger.warn("No Known site found, skipping base recalibration, file: " + inputBam)
......@@ -196,7 +209,7 @@ class GatkVariantcalling(val root: Configurable) extends QScript with BiopetQScr
}
add(baseRecalibrator)
val baseRecalibratorAfter = BaseRecalibrator(this, inputBam, swapExt(dir, inputBam, ".bam", ".baserecal.after")) //with gatkArguments {
val baseRecalibratorAfter = BaseRecalibrator(this, inputBam, swapExt(dir, inputBam, ".bam", ".baserecal.after"))
baseRecalibratorAfter.BQSR = baseRecalibrator.o
add(baseRecalibratorAfter)
......
package nl.lumc.sasc.biopet.tools
import htsjdk.samtools.SamReaderFactory
import htsjdk.samtools.reference.IndexedFastaSequenceFile
import htsjdk.variant.variantcontext.VariantContext
import htsjdk.variant.vcf.VCFFileReader
import java.io.File
import java.io.PrintWriter
import nl.lumc.sasc.biopet.core.BiopetJavaCommandLineFunction
import nl.lumc.sasc.biopet.core.ToolCommand
import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
import scala.collection.JavaConversions._
import nl.lumc.sasc.biopet.utils.VcfUtils._
import scala.collection.mutable.ListBuffer
class BastyGenerateFasta(val root: Configurable) extends BiopetJavaCommandLineFunction {
javaMainClass = getClass.getName
@Input(doc = "Input vcf file", required = false)
var inputVcf: File = _
@Input(doc = "Bam File", required = false)
var bamFile: File = _
@Input(doc = "reference", required = false)
var reference: File = config("reference")
@Output(doc = "Output fasta, variants only", required = false)
var outputVariants: File = _
@Output(doc = "Output fasta, variants only", required = false)
var outputConsensus: File = _
@Output(doc = "Output fasta, variants only", required = false)
var outputConsensusVariants: File = _
var snpsOnly: Boolean = config("snps_only", default = false)
var sampleName: String = _
var minAD: Int = config("min_ad", default = 8)
var minDepth: Int = config("min_depth", default = 8)
var outputName: String = _
override val defaultVmem = "8G"
memoryLimit = Option(4.0)
override def commandLine = super.commandLine +
optional("--inputVcf", inputVcf) +
optional("--bamFile", bamFile) +
optional("--outputVariants", outputVariants) +
optional("--outputConsensus", outputConsensus) +
optional("--outputConsensusVariants", outputConsensusVariants) +
conditional(snpsOnly, "--snpsOnly") +
optional("--sampleName", sampleName) +
required("--outputName", outputName) +
optional("--minAD", minAD) +
optional("--minDepth", minDepth) +
optional("--reference", reference)
}
object BastyGenerateFasta extends ToolCommand {
case class Args(inputVcf: File = null,
outputVariants: File = null,
outputConsensus: File = null,
outputConsensusVariants: File = null,
bamFile: File = null,
snpsOnly: Boolean = false,
sampleName: String = null,
outputName: String = null,
minAD: Int = 8,
minDepth: Int = 8,
reference: File = null) extends AbstractArgs
class OptParser extends AbstractOptParser {
opt[File]('V', "inputVcf") unbounded () valueName ("<file>") action { (x, c) =>
c.copy(inputVcf = x)
} text ("vcf file, needed for outputVariants and outputConsensusVariants") validate { x =>
if (x.exists) success else failure("File does not exist: " + x)
}
opt[File]("bamFile") unbounded () valueName ("<file>") action { (x, c) =>
c.copy(bamFile = x)
} text ("bam file, needed for outputConsensus and outputConsensusVariants") validate { x =>
if (x.exists) success else failure("File does not exist: " + x)
}
opt[File]("outputVariants") maxOccurs (1) unbounded () valueName ("<file>") action { (x, c) =>
c.copy(outputVariants = x)
} text ("fasta with only variants from vcf file")
opt[File]("outputConsensus") maxOccurs (1) unbounded () valueName ("<file>") action { (x, c) =>
c.copy(outputConsensus = x)
} text ("Consensus fasta from bam, always reference bases else 'N'")
opt[File]("outputConsensusVariants") maxOccurs (1) unbounded () valueName ("<file>") action { (x, c) =>
c.copy(outputConsensusVariants = x)
} text ("Consensus fasta from bam with variants from vcf file, always reference bases else 'N'")
opt[Unit]("snpsOnly") unbounded () action { (x, c) =>
c.copy(snpsOnly = true)
} text ("Only use snps from vcf file")
opt[String]("sampleName") unbounded () action { (x, c) =>
c.copy(sampleName = x)
} text ("Sample name in vcf file")
opt[String]("outputName") required () unbounded () action { (x, c) =>
c.copy(outputName = x)
} text ("Output name in fasta file header")
opt[Int]("minAD") unbounded () action { (x, c) =>
c.copy(minAD = x)
} text ("min AD value in vcf file for sample")
opt[Int]("minDepth") unbounded () action { (x, c) =>
c.copy(minDepth = x)
} text ("min detp in bam file")
opt[File]("reference") unbounded () action { (x, c) =>
c.copy(reference = x)
} text ("Indexed reference fasta file") validate { x =>
if (x.exists) success else failure("File does not exist: " + x)
}
checkConfig { c =>
{
val err: ListBuffer[String] = ListBuffer()
if (c.outputConsensus != null || c.outputConsensusVariants != null) {
if (c.reference == null)
err.add("No reference suplied")
else {
val index = new File(c.reference.getAbsolutePath + ".fai")
if (!index.exists) err.add("Reference does not have index")
}
if (c.outputConsensusVariants != null && c.inputVcf == null)
err.add("To write outputVariants input vcf is required, please use --inputVcf option")
if (c.sampleName != null && c.bamFile == null)
err.add("To write Consensus input bam file is required, please use --bamFile option")
}
if (c.outputVariants != null && c.inputVcf == null)
err.add("To write outputVariants input vcf is required, please use --inputVcf option")
if (c.outputVariants == null && c.outputConsensus == null && c.outputConsensusVariants == null)
err.add("No output file selected")
if (err.isEmpty) success else failure(err.mkString("", "\nError: ", "\n"))
}
}
}
protected var cmdArgs: Args = _
private val chunkSize = 100000
/**
* @param args the command line arguments
*/
def main(args: Array[String]): Unit = {
val argsParser = new OptParser
cmdArgs = argsParser.parse(args, Args()) getOrElse sys.exit(1)
if (cmdArgs.outputVariants != null) writeVariantsOnly()
if (cmdArgs.outputConsensus != null || cmdArgs.outputConsensusVariants != null) writeConsensus()
}
protected def writeConsensus() {
val referenceFile = new IndexedFastaSequenceFile(cmdArgs.reference)
val referenceDict = referenceFile.getSequenceDictionary
for (chr <- referenceDict.getSequences) {
val chunks = (for (chunk <- (0 to (chr.getSequenceLength / chunkSize)).par) yield {
val chrName = chr.getSequenceName
val begin = chunk * chunkSize + 1
val end = {
val e = (chunk + 1) * chunkSize
if (e > chr.getSequenceLength) chr.getSequenceLength else e
}
logger.info("begin on: chrName: " + chrName + " begin: " + begin + " end: " + end)
val referenceSequence = referenceFile.getSubsequenceAt(chrName, begin, end)
val variants: Map[(Int, Int), VariantContext] = if (cmdArgs.inputVcf != null) {
val reader = new VCFFileReader(cmdArgs.inputVcf, true)
(for (variant <- reader.query(chrName, begin, end) if (!cmdArgs.snpsOnly || variant.isSNP)) yield {
(variant.getStart, variant.getEnd) -> variant
}).toMap
} else Map()