Skip to content
Snippets Groups Projects
Commit ea03c880 authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Added spec validation

parent d3851377
No related branches found
No related tags found
No related merge requests found
......@@ -4,12 +4,15 @@ import java.io.File
import java.util
import nl.lumc.sasc.biopet.core.{ PipelineCommand, Reference, BiopetQScript }
import nl.lumc.sasc.biopet.extensions.Cat
import nl.lumc.sasc.biopet.extensions.gatk.{ SelectVariants, CombineVariants }
import nl.lumc.sasc.biopet.extensions.tools.GensToVcf
import nl.lumc.sasc.biopet.utils.config.Configurable
import nl.lumc.sasc.biopet.utils.intervals.BedRecordList
import org.broadinstitute.gatk.queue.QScript
import nl.lumc.sasc.biopet.pipelines.gwastest.impute.ImputeOutput
import scala.collection.JavaConversions._
/**
......@@ -18,13 +21,15 @@ import scala.collection.JavaConversions._
class GwasTest(val root: Configurable) extends QScript with BiopetQScript with Reference {
def this() = this(null)
import GwasTest._
val inputVcf: Option[File] = config("input_vcf")
val phenotypeFile: File = config("phenotype_file")
case class GensInput(genotypes: File, info: Option[File], contig: String)
val specsFile: Option[File] = config("imute_specs_file")
val inputBlaGens: List[GensInput] = if (inputVcf.isDefined) List[GensInput]()
val inputGens: List[GensInput] = if (inputVcf.isDefined) List[GensInput]()
else config("input_gens", default = Nil).asList.map(x => x match {
case value: Map[String, Any] =>
GensInput(new File(value("genotypes").toString),
......@@ -35,6 +40,9 @@ class GwasTest(val root: Configurable) extends QScript with BiopetQScript with R
value.toMap.get("info").map(x => new File(x.toString)),
value.get("contig").toString)
case _ => throw new IllegalArgumentException
}) ++ (specsFile match {
case Some(file) => imputeSpecsToGensInput(file, config("validate_specs", default = true))
case _ => Nil
})
/** Init for pipeline */
......@@ -44,18 +52,19 @@ class GwasTest(val root: Configurable) extends QScript with BiopetQScript with R
/** Pipeline itself */
def biopetScript(): Unit = {
val vcfFile: File = inputVcf.getOrElse {
require(inputBlaGens.nonEmpty, "No vcf file or gens files defined in config")
require(inputGens.nonEmpty, "No vcf file or gens files defined in config")
val outputDirGens = new File(outputDir, "gens_to_vcf")
val cv = new CombineVariants(this)
cv.outputFile = new File(outputDirGens, "merge.gens.vcf.gz")
cv.setKey = "null"
inputBlaGens.foreach { gen =>
cv.genotypeMergeOptions = Some("UNSORTED") //TODO: should be a default
inputGens.zipWithIndex.foreach { gen =>
val gensToVcf = new GensToVcf(this)
gensToVcf.inputGens = gen.genotypes
gensToVcf.inputInfo = gen.info
gensToVcf.contig = gen.contig
gensToVcf.inputGens = gen._1.genotypes
gensToVcf.inputInfo = gen._1.info
gensToVcf.contig = gen._1.contig
gensToVcf.samplesFile = phenotypeFile
gensToVcf.outputVcf = new File(outputDirGens, gen.genotypes.getName + ".vcf.gz")
gensToVcf.outputVcf = new File(outputDirGens, gen._1.genotypes.getName + s".${gen._2}.vcf.gz")
gensToVcf.isIntermediate = true
add(gensToVcf)
cv.inputFiles :+= gensToVcf.outputVcf
......@@ -81,9 +90,20 @@ class GwasTest(val root: Configurable) extends QScript with BiopetQScript with R
add(sv)
//TODO: snptest
(region -> "")
region -> sv.outputFile
}
val cat = new Cat(this)
cat.input = snpTests.map(_._2).toList
cat.output = new File(outputDir, "merge.txt")
add(cat)
}
}
object GwasTest extends PipelineCommand
\ No newline at end of file
object GwasTest extends PipelineCommand {
case class GensInput(genotypes: File, info: Option[File], contig: String)
def imputeSpecsToGensInput(specsFile: File, validate: Boolean = true): List[GensInput] = {
ImputeOutput.readSpecsFile(specsFile, validate)
.map(x => GensInput(x.gens, Some(x.gensInfo), x.chromosome))
}
}
\ No newline at end of file
package nl.lumc.sasc.biopet.pipelines.gwastest.impute
import java.io.File
import nl.lumc.sasc.biopet.utils.Logging
import Spec.SpecDecoderOps
import scala.io.Source
/**
* Created by pjvan_thof on 3/25/16.
*/
object ImputeOutput {
case class Chunk(chromosome: String, summary: File, warnings: File,
gens: File, gensInfo: File, gensInfoBySample: File)
def expandChunk(chromosome: String, basename: String) = Chunk(
chromosome,
new File(basename + ".gens_summary"),
new File(basename + ".gens_warnings"),
new File(basename + ".gens"),
new File(basename + ".gens_info"),
new File(basename + ".gens_info_by_sample")
)
def readSpecsFile(specsFile: File, validate: Boolean = true): List[Chunk] = {
val content = Source.fromFile(specsFile).mkString
val chunks = content.decode[List[Spec.ImputeOutput]]
.map(x => expandChunk(x.chromosome, specsFile.getParent + File.separator + x.name.split(File.separator).last))
chunks.flatMap(validateChunk(_, validate))
}
val ASSESSMENT_HEADER = "-{32}\\n Imputation accuracy assessment \\n-{32}".r
val TOO_FEW_SNPS = "There are no SNPs in the imputation interval, so " +
"there is nothing for IMPUTE2 to analyze; the program will quit now."
def validateChunk(chunk: Chunk, raiseErrors: Boolean = true): Option[Chunk] = {
def addError(msg: String) = if (raiseErrors) Logging.addError(msg) else Logging.logger.warn(msg)
if (!chunk.summary.exists()) {
Logging.addError(s"Summary file '${chunk.summary}' does not exist, please check Impute output")
None
} else if (!chunk.warnings.exists()) {
addError(s"Warnings file '${chunk.warnings}' does not exist, please check Impute output")
None
} else if (chunk.summary.canRead()) {
val summaryReader = Source.fromFile(chunk.summary)
val summaryLines = summaryReader.getLines().toList
summaryReader.close()
if (summaryLines.contains(TOO_FEW_SNPS)) None
else if (summaryLines.exists(ASSESSMENT_HEADER.findFirstIn(_).isDefined)) None
else if (!chunk.gens.exists()) {
addError(s"Gens file '${chunk.gens}' does not exist, please check Impute output")
None
} else if (!chunk.gensInfo.exists()) {
addError(s"GensInfo file '${chunk.gensInfo}' does not exist, please check Impute output")
None
} else if (!chunk.gensInfoBySample.exists()) {
addError(s"GensInfoBySample file '${chunk.gensInfoBySample}' does not exist, please check Impute output")
None
} else {
Some(chunk)
}
} else None
}
}
package nl.lumc.sasc.biopet.pipelines.gwastest.impute
import scala.util.parsing.combinator.JavaTokenParsers
/**
* .spec file parser combinator.
*
* TODO: clean up.
*
* @author Matthijs Moed <M.H.Moed@lumc.nl>
*/
object Spec extends JavaTokenParsers {
def obj: Parser[Map[String, Any]] =
"{" ~> rep(member) <~ "}" ^^ (Map() ++ _)
def arr: Parser[List[Any]] = "(" ~> repsep(value, ",") <~ ")"
def key: Parser[String] = """([\w+])*+""".r
def member: Parser[(String, Any)] = key ~ "=" ~ value <~ literal(";") ^^ {
case name ~ "=" ~ value => (name, value)
}
// This is a hack to get around the fact that JavaTokenParsers'
// stringLiterals do not have their double quotes removed. They're stripped
// here, without any error checking.
// TODO: implement as token parser
def string: Parser[String] = stringLiteral ^^ {
case s => s.substring(1, s.length - 1)
}
def value: Parser[Any] = (
obj
| arr
| string
| wholeNumber ^^ (_.toInt)
| floatingPointNumber ^^ (_.toDouble)
)
//
trait SpecDecoder[T] {
def decode(obj: Map[String, Any]): T
}
case class ImputeOutput(chromosome: String, name: String, orig: String)
class ImputeOutputMappingDecoder extends SpecDecoder[List[ImputeOutput]] {
override def decode(obj: Map[String, Any]): List[ImputeOutput] = {
val dicts = obj("files").asInstanceOf[List[Map[String, String]]]
dicts.map(d => ImputeOutput(d("chromosome"), d("name"), d("orig")))
}
}
implicit val imputeOutputMappingDecoder = new ImputeOutputMappingDecoder
implicit class SpecDecoderOps(string: String) {
implicit def decode[T](implicit decoder: SpecDecoder[T]): T = {
decoder.decode(Spec.parseAll(Spec.obj, string).get)
}
}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment