/** * Biopet is built on top of GATK Queue for building bioinformatic * pipelines. It is mainly intended to support LUMC SHARK cluster which is running * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) * should also be able to execute Biopet tools and pipelines. * * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center * * Contact us at: sasc@lumc.nl * * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL * license; For commercial users or users who do not want to follow the AGPL * license, please contact us to obtain a separate license. */ package nl.lumc.sasc.biopet.extensions.clever /** * Created by wyleung on 4-4-16. */ import java.io.{ File, PrintWriter } import nl.lumc.sasc.biopet.core.BiopetJavaCommandLineFunction import nl.lumc.sasc.biopet.utils.ToolCommand import nl.lumc.sasc.biopet.utils.config.Configurable import org.broadinstitute.gatk.utils.commandline.{ Argument, Input, Output } import scala.io.Source class CleverFixVCF(val parent: Configurable) extends BiopetJavaCommandLineFunction { javaMainClass = getClass.getName @Input(doc = "Input Clever VCF") var input: File = _ @Output(doc = "Output fixed VCF") var output: File = _ @Argument(doc = "Samplename") var sampleName: String = _ override def cmdLine = super.cmdLine + required("-i", input) + required("-o", output) + required("-s", sampleName) } object CleverFixVCF extends ToolCommand { case class Args(inputVCF: File = null, sampleLabel: String = "", outputVCF: File = null) extends AbstractArgs class OptParser extends AbstractOptParser { opt[File]('i', "inputvcf") required () valueName "" action { (x, c) => c.copy(inputVCF = x) } text "Please specify the input Clever VCF file" opt[String]('s', "samplelabel") valueName "" action { (x, c) => c.copy(sampleLabel = x) } text "Sample label is missing" opt[File]('o', "outputvcf") valueName "" action { (x, c) => c.copy(outputVCF = x) } text "Output path is missing" } def replaceHeaderLine(inHeaderLine: String, toCheckFor: String, replacement: String, extraHeader: String): String = { (inHeaderLine == toCheckFor) match { case true => { extraHeader + "\n" + replacement + "\n" } case _ => { // We have to deal with matching records // these don't start with # inHeaderLine.startsWith("#") match { case true => inHeaderLine + "\n" case _ => { // this should be a record // Ensure the REF field is at least an N val cols = inHeaderLine.split("\t") cols(3) = "N" cols.mkString("\t") + "\n" } } } } } val extraHeader = """##INFO= ##INFO= ##INFO= ##INFO= ##INFO= ##INFO= ##INFO= ##INFO= ##INFO= ##INFO= ##INFO= ##INFO= ##INFO= ##INFO= ##INFO= ##INFO= ##INFO= ##INFO= ##INFO= ##INFO= ##INFO= ##INFO= ##INFO= ##INFO= ##INFO= ##INFO= ##INFO= ##INFO= ##INFO= ##INFO= ##INFO= ##FORMAT= ##FORMAT= ##FORMAT= ##FORMAT= ##FORMAT= ##FORMAT= ##FORMAT= ##FORMAT= ##FORMAT=""" val vcfColHeader = "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tdefault" val vcfColReplacementHeader = s"#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t" /** * @param args the command line arguments */ def main(args: Array[String]): Unit = { val argsParser = new OptParser val commandArgs: Args = argsParser.parse(args, Args()) getOrElse (throw new IllegalArgumentException) val input: File = commandArgs.inputVCF val output: File = commandArgs.outputVCF val inputVCF = Source.fromFile(input) val writer = new PrintWriter(output) inputVCF.getLines().foreach(x => writer.write(replaceHeaderLine(x, vcfColHeader, vcfColReplacementHeader + commandArgs.sampleLabel, extraHeader)) ) writer.close() inputVCF.close() } }