Skip to content
Snippets Groups Projects
Commit 400d1a15 authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Added snptest parsing

parent 48edf17b
No related branches found
No related tags found
No related merge requests found
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project that are
* not part of GATK Queue is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.extensions.tools
import java.io.File
import nl.lumc.sasc.biopet.core.{Reference, ToolCommandFunction}
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{Input, Output}
/**
*
*/
class SnptestToVcf(val root: Configurable) extends ToolCommandFunction with Reference {
def toolObject = nl.lumc.sasc.biopet.tools.SnptestToVcf
@Input(doc = "input Info file", required = true)
var inputInfo: File = null
@Input(required = true)
var reference: File = _
@Output(required = true)
var outputVcf: File = _
var contig: String = _
override def defaultCoreMemory = 6.0
override def beforeGraph(): Unit = {
super.beforeGraph()
if (reference == null) reference = referenceFasta()
if (contig == null) throw new IllegalStateException
//if (outputVcf.getName.endsWith(".vcf.gz")) outputFiles :+= new File(outputVcf.getAbsolutePath + ".tbi")
}
override def cmdLine = super.cmdLine +
required("--inputInfo", inputInfo) +
required("--outputVcf", outputVcf) +
optional("--contig", contig) +
required("--referenceFasta", reference)
}
......@@ -4,7 +4,7 @@ import java.io.File
import java.util
import htsjdk.samtools.reference.{FastaSequenceFile, ReferenceSequenceFileFactory}
import htsjdk.variant.variantcontext.writer.{AsyncVariantContextWriter, VariantContextWriterBuilder}
import htsjdk.variant.variantcontext.writer.{AsyncVariantContextWriter, Options, VariantContextWriterBuilder}
import htsjdk.variant.variantcontext.{Allele, GenotypeBuilder, VariantContextBuilder}
import htsjdk.variant.vcf._
import nl.lumc.sasc.biopet.utils.ToolCommand
......@@ -72,15 +72,38 @@ object SnptestToVcf extends ToolCommand {
val writer = new AsyncVariantContextWriter(new VariantContextWriterBuilder()
.setOutputFile(cmdArgs.outputVcf)
.setReferenceDictionary(vcfHeader.getSequenceDictionary)
.unsetOption(Options.INDEX_ON_THE_FLY)
.build)
writer.writeHeader(vcfHeader)
val infoKeys = for (key <- headerKeys if key != "rsid" if key != "chromosome" if key != "position"
if key != "alleleA" if key != "alleleB" if key != "alleleA") yield key
var counter = 0
for (line <- lineIt if !line.startsWith("#")) {
val values = line.split(" ")
require(values.size == headerKeys.size, "Number of values are not the same as number of header keys")
val alleles = List(Allele.create(values(headerMap("alleleA")), true), Allele.create(values(headerMap("alleleB"))))
val start = values(headerMap("position")).toLong
val end = alleles.head.length() + start - 1
val rsid = values(headerMap("rsid"))
val builder = (new VariantContextBuilder)
.chr(cmdArgs.contig)
.alleles(alleles)
.start(start)
.stop(end)
.noGenotypes()
val infoBuilder = infoKeys.foldLeft(builder) { case (a,b) => a.attribute("ST_" + b, values(headerMap(b))) }
writer.add(builder.id(rsid).make())
counter += 1
if (counter % 10000 == 0) logger.info(s"$counter lines processed")
}
logger.info(s"$counter lines processed")
writer.close()
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment