Commit dee3612b authored by Peter van 't Hof's avatar Peter van 't Hof

Extract impute to vcf as separated pipeline

parent ece09a76
......@@ -34,6 +34,7 @@ object BiopetExecutableMain extends BiopetExecutable {
nl.lumc.sasc.biopet.pipelines.gears.GearsSingle,
nl.lumc.sasc.biopet.pipelines.gears.Gears,
nl.lumc.sasc.biopet.pipelines.gwastest.GwasTest,
nl.lumc.sasc.biopet.pipelines.gwastest.impute.Impute2Vcf,
nl.lumc.sasc.biopet.pipelines.shiva.ShivaVariantcalling,
nl.lumc.sasc.biopet.pipelines.basty.Basty,
nl.lumc.sasc.biopet.pipelines.shiva.Shiva,
......
......@@ -15,97 +15,35 @@
package nl.lumc.sasc.biopet.pipelines.gwastest
import java.io.File
import java.util
import htsjdk.samtools.reference.FastaSequenceFile
import nl.lumc.sasc.biopet.core.{ BiopetQScript, PipelineCommand, Reference }
import nl.lumc.sasc.biopet.extensions.Snptest
import nl.lumc.sasc.biopet.extensions.gatk.{ CatVariants, SelectVariants }
import nl.lumc.sasc.biopet.extensions.tools.{ GensToVcf, SnptestToVcf }
import nl.lumc.sasc.biopet.pipelines.gwastest.impute.ImputeOutput
import nl.lumc.sasc.biopet.utils.Logging
import nl.lumc.sasc.biopet.extensions.tools.SnptestToVcf
import nl.lumc.sasc.biopet.utils.config.Configurable
import nl.lumc.sasc.biopet.utils.intervals.BedRecordList
import org.broadinstitute.gatk.queue.QScript
import scala.collection.JavaConversions._
/**
* Created by pjvanthof on 16/03/16.
*/
class GwasTest(val root: Configurable) extends QScript with BiopetQScript with Reference {
def this() = this(null)
import GwasTest._
val inputVcf: Option[File] = config("input_vcf")
val inputVcf: File = config("input_vcf")
val phenotypeFile: File = config("phenotype_file")
val specsFile: Option[File] = config("imute_specs_file")
val inputGens: List[GensInput] = if (inputVcf.isDefined) List[GensInput]()
else config("input_gens", default = Nil).asList.map {
case value: Map[String, Any] =>
GensInput(new File(value("genotypes").toString),
value.get("info").map(x => new File(x.toString)),
value("contig").toString)
case value: util.LinkedHashMap[String, _] =>
GensInput(new File(value.get("genotypes").toString),
value.toMap.get("info").map(x => new File(x.toString)),
value.get("contig").toString)
case _ => throw new IllegalArgumentException
} ++ (specsFile match {
case Some(file) => imputeSpecsToGensInput(file, config("validate_specs", default = true))
case _ => Nil
})
override def dictRequired = true
override def defaults = Map("snptest" -> Map("genotype_field" -> "GP"))
/** Init for pipeline */
def init(): Unit = {
inputGens.foreach { g =>
val referenceDict = new FastaSequenceFile(referenceFasta(), true).getSequenceDictionary
if (referenceDict.getSequenceIndex(g.contig) == -1)
Logging.addError(s"Contig '${g.contig}' does not exist on reference: ${referenceFasta()}")
}
}
/** Pipeline itself */
def biopetScript(): Unit = {
val (vcfFile, chrVcfFiles): (File, Map[String, File]) = inputVcf.map((_, Map[String, File]())).getOrElse {
require(inputGens.nonEmpty, "No vcf file or gens files defined in config")
val outputDirGens = new File(outputDir, "gens_to_vcf")
val cvTotal = new CatVariants(this)
cvTotal.assumeSorted = true
cvTotal.outputFile = new File(outputDirGens, "merge.gens.vcf.gz")
val chrGens = inputGens.groupBy(_.contig).map {
case (contig, gens) =>
val cvChr = new CatVariants(this)
cvChr.assumeSorted = true
//cvChr.isIntermediate = true
cvChr.outputFile = new File(outputDirGens, s"$contig.merge.gens.vcf.gz")
gens.zipWithIndex.foreach { gen =>
val gensToVcf = new GensToVcf(this)
gensToVcf.inputGens = gen._1.genotypes
gensToVcf.inputInfo = gen._1.info
gensToVcf.contig = gen._1.contig
gensToVcf.samplesFile = phenotypeFile
gensToVcf.outputVcf = new File(outputDirGens, gen._1.genotypes.getName + s".${gen._2}.vcf.gz")
gensToVcf.isIntermediate = true
add(gensToVcf)
cvChr.variant :+= gensToVcf.outputVcf
}
add(cvChr)
cvTotal.variant :+= cvChr.outputFile
contig -> cvChr.outputFile
}
add(cvTotal)
(cvTotal.outputFile, chrGens)
}
val snpTests = BedRecordList.fromReference(referenceFasta())
.scatter(config("bin_size", default = 1000000))
.allRecords.map { region =>
......@@ -119,7 +57,7 @@ class GwasTest(val root: Configurable) extends QScript with BiopetQScript with R
bedFile.deleteOnExit()
val sv = new SelectVariants(this)
sv.variant = chrVcfFiles.getOrElse(region.chr, vcfFile)
sv.variant = inputVcf
sv.out = new File(regionDir, s"$name.vcf.gz")
sv.intervals :+= bedFile
sv.isIntermediate = true
......@@ -147,11 +85,4 @@ class GwasTest(val root: Configurable) extends QScript with BiopetQScript with R
}
}
object GwasTest extends PipelineCommand {
case class GensInput(genotypes: File, info: Option[File], contig: String)
def imputeSpecsToGensInput(specsFile: File, validate: Boolean = true): List[GensInput] = {
ImputeOutput.readSpecsFile(specsFile, validate)
.map(x => GensInput(x.gens, Some(x.gensInfo), x.chromosome))
}
}
\ No newline at end of file
object GwasTest extends PipelineCommand
\ No newline at end of file
package nl.lumc.sasc.biopet.pipelines.gwastest.impute
import java.io.File
import java.util
import htsjdk.samtools.reference.FastaSequenceFile
import nl.lumc.sasc.biopet.core.{ BiopetQScript, PipelineCommand, Reference }
import nl.lumc.sasc.biopet.extensions.gatk.CatVariants
import nl.lumc.sasc.biopet.extensions.tools.GensToVcf
import nl.lumc.sasc.biopet.pipelines.gwastest.impute.Impute2Vcf.GensInput
import nl.lumc.sasc.biopet.utils.Logging
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.queue.QScript
import scala.collection.JavaConversions._
/**
* Created by pjvan_thof on 27-5-16.
*/
class Impute2Vcf(val root: Configurable) extends QScript with BiopetQScript with Reference {
def this() = this(null)
val specsFile: Option[File] = config("imute_specs_file")
val phenotypeFile: File = config("phenotype_file")
val inputGens: List[GensInput] = config("input_gens", default = Nil).asList.map {
case value: Map[String, Any] =>
GensInput(new File(value("genotypes").toString),
value.get("info").map(x => new File(x.toString)),
value("contig").toString)
case value: util.LinkedHashMap[String, _] =>
GensInput(new File(value.get("genotypes").toString),
value.toMap.get("info").map(x => new File(x.toString)),
value.get("contig").toString)
case _ => throw new IllegalArgumentException
} ++ (specsFile match {
case Some(file) => Impute2Vcf.imputeSpecsToGensInput(file, config("validate_specs", default = true))
case _ => Nil
})
/** Init for pipeline */
def init(): Unit = {
inputGens.foreach { g =>
val referenceDict = new FastaSequenceFile(referenceFasta(), true).getSequenceDictionary
if (referenceDict.getSequenceIndex(g.contig) == -1)
Logging.addError(s"Contig '${g.contig}' does not exist on reference: ${referenceFasta()}")
}
}
/** Pipeline itself */
def biopetScript(): Unit = {
require(inputGens.nonEmpty, "No vcf file or gens files defined in config")
val cvTotal = new CatVariants(this)
cvTotal.assumeSorted = true
cvTotal.outputFile = new File(outputDir, "merge.gens.vcf.gz")
val chrGens = inputGens.groupBy(_.contig).map {
case (contig, gens) =>
val cvChr = new CatVariants(this)
cvChr.assumeSorted = true
//cvChr.isIntermediate = true
cvChr.outputFile = new File(outputDir, s"$contig.merge.gens.vcf.gz")
gens.zipWithIndex.foreach { gen =>
val gensToVcf = new GensToVcf(this)
gensToVcf.inputGens = gen._1.genotypes
gensToVcf.inputInfo = gen._1.info
gensToVcf.contig = gen._1.contig
gensToVcf.samplesFile = phenotypeFile
gensToVcf.outputVcf = new File(outputDir, gen._1.genotypes.getName + s".${gen._2}.vcf.gz")
gensToVcf.isIntermediate = true
add(gensToVcf)
cvChr.variant :+= gensToVcf.outputVcf
}
add(cvChr)
cvTotal.variant :+= cvChr.outputFile
contig -> cvChr.outputFile
}
add(cvTotal)
}
}
object Impute2Vcf extends PipelineCommand {
case class GensInput(genotypes: File, info: Option[File], contig: String)
def imputeSpecsToGensInput(specsFile: File, validate: Boolean = true): List[GensInput] = {
ImputeOutput.readSpecsFile(specsFile, validate)
.map(x => GensInput(x.gens, Some(x.gensInfo), x.chromosome))
}
}
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment