Skip to content
Snippets Groups Projects
Commit c6e11fcf authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Add region extraction

parent 49975278
No related branches found
No related tags found
No related merge requests found
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project that are
* not part of GATK Queue is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.extensions.gatk
import java.io.File
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{Input, Output}
/**
* Extension for CombineVariants from GATK
*
* Created by pjvan_thof on 2/26/15.
*/
class SelectVariants(val root: Configurable) extends Gatk {
val analysisType = "SelectVariants"
@Input(doc = "", required = true)
var inputFiles: List[File] = Nil
@Output(doc = "", required = true)
var outputFile: File = null
var excludeNonVariants: Boolean = false
var inputMap: Map[File, String] = Map()
def addInput(file: File, name: String): Unit = {
inputFiles :+= file
inputMap += file -> name
}
override def beforeGraph(): Unit = {
super.beforeGraph()
if (outputFile.getName.endsWith(".vcf.gz")) outputFiles :+= new File(outputFile.getAbsolutePath + ".tbi")
deps :::= inputFiles.filter(_.getName.endsWith("vcf.gz")).map(x => new File(x.getAbsolutePath + ".tbi"))
deps = deps.distinct
}
override def cmdLine = super.cmdLine +
(for (file <- inputFiles) yield {
inputMap.get(file) match {
case Some(name) => required("-V:" + name, file)
case _ => required("-V", file)
}
}).mkString +
required("-o", outputFile) +
conditional(excludeNonVariants, "--excludeNonVariants")
}
package nl.lumc.sasc.biopet.pipelines.gwastest
import java.io.File
import nl.lumc.sasc.biopet.core.{PipelineCommand, Reference, BiopetQScript}
import nl.lumc.sasc.biopet.extensions.gatk.CombineVariants
import nl.lumc.sasc.biopet.extensions.gatk.{SelectVariants, CombineVariants}
import nl.lumc.sasc.biopet.extensions.tools.GensToVcf
import nl.lumc.sasc.biopet.utils.config.Configurable
import nl.lumc.sasc.biopet.utils.intervals.BedRecordList
......@@ -49,6 +51,7 @@ class GwasTest(val root: Configurable) extends QScript with BiopetQScript with R
gensToVcf.contig = gen.contig
gensToVcf.samplesFile = phenotypeFile
gensToVcf.outputVcf = new File(outputDirGens, gen.genotypes.getName + ".vcf.gz")
gensToVcf.isIntermediate = true
add(gensToVcf)
cv.inputFiles :+= gensToVcf.outputVcf
}
......@@ -59,7 +62,18 @@ class GwasTest(val root: Configurable) extends QScript with BiopetQScript with R
val snpTests = BedRecordList.fromReference(referenceFasta())
.scatter(config("bin_size", default = 10^6))
.allRecords.map { region =>
//TODO: bcftools view
val bedFile = new File(outputDir, "snptest" + File.separator + region.chr + File.separator +
s"${region.chr}-${region.start + 1}-${region.end}.bed")
bedFile.getParentFile.mkdirs()
BedRecordList.fromList(List(region)).writeToFile(bedFile)
val sv = new SelectVariants(this)
sv.inputFiles :+= vcfFile
sv.outputFile = new File(outputDir, "snptest" + File.separator + region.chr + File.separator +
s"${region.chr}-${region.start + 1}-${region.end}.vcf.gz")
sv.intervals :+= bedFile
sv.isIntermediate = true
add(sv)
//TODO: snptest
Map()
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment