BastyGenerateFasta.scala 4.28 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
package nl.lumc.sasc.biopet.tools

import htsjdk.variant.variantcontext.VariantContext
import htsjdk.variant.vcf.VCFFileReader
import java.io.File
import java.io.PrintWriter
import nl.lumc.sasc.biopet.core.BiopetJavaCommandLineFunction
import nl.lumc.sasc.biopet.core.ToolCommand
import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Input, Output, Argument }
import scala.collection.JavaConversions._
Peter van 't Hof's avatar
Peter van 't Hof committed
12
import nl.lumc.sasc.biopet.util.VcfUtils._
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36

class BastyGenerateFasta(val root: Configurable) extends BiopetJavaCommandLineFunction {
  javaMainClass = getClass.getName

  @Input(doc = "Input vcf file", required = false)
  var inputVcf: File = _

  @Input(doc = "Bam File", required = false)
  var bamFile: File = _

  @Output(doc = "Output fasta, variants only", required = false)
  var outputVariants: File = _

  @Argument(doc = "Output interval list", required = false)
  var snpsOnly: Boolean = config("snps_only", default = false)

  @Argument(doc = "Sample name", required = false)
  var sampleName: String = _

  @Argument(doc = "minAD", required = false)
  var minAD: Int = config("min_ad", default = 8)

  override val defaultVmem = "8G"
  memoryLimit = Option(4.0)
37
  var reference = false
38
39
40
41
42
43
44

  override def commandLine = super.commandLine +
    optional("--inputVcf", inputVcf) +
    optional("--bamFile", bamFile) +
    optional("--outputVariants", outputVariants) +
    conditional(snpsOnly, "--snpsOnly") +
    optional("--sampleName", sampleName) +
45
46
    optional("--minAD", minAD) +
    conditional(reference, "--reference")
47
48
49
50
51
52
53
54
}

object BastyGenerateFasta extends ToolCommand {
  case class Args(inputVcf: File = null,
                  outputVariants: File = null,
                  bamFile: File = null,
                  snpsOnly: Boolean = false,
                  sampleName: String = null,
55
56
                  minAD: Int = 8,
                  reference: Boolean = false) extends AbstractArgs
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76

  class OptParser extends AbstractOptParser {
    opt[File]('V', "inputVcf") unbounded () valueName ("<file>") action { (x, c) =>
      c.copy(inputVcf = x)
    }
    opt[File]("bamFile") unbounded () valueName ("<file>") action { (x, c) =>
      c.copy(bamFile = x)
    }
    opt[File]("outputVariants") unbounded () valueName ("<file>") action { (x, c) =>
      c.copy(outputVariants = x)
    }
    opt[Unit]("snpsOnly") unbounded () action { (x, c) =>
      c.copy(snpsOnly = true)
    }
    opt[String]("sampleName") unbounded () action { (x, c) =>
      c.copy(sampleName = x)
    }
    opt[Int]("minAD") unbounded () action { (x, c) =>
      c.copy(minAD = x)
    }
77
78
79
    opt[Unit]("reference") unbounded () action { (x, c) =>
      c.copy(reference = true)
    }
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
  }

  var commandArgs: Args = _

  /**
   * @param args the command line arguments
   */
  def main(args: Array[String]): Unit = {
    val argsParser = new OptParser
    commandArgs = argsParser.parse(args, Args()) getOrElse sys.exit(1)

    if (commandArgs.outputVariants != null) writeVariantsOnly()

  }

  def writeVariantsOnly() {
Peter van 't Hof's avatar
Peter van 't Hof committed
96
97
    if (commandArgs.inputVcf == null) throw new IllegalStateException("To write outputVariants input vcf is required, please use --outputVariants option")
    if (!commandArgs.inputVcf.exists) throw new IllegalStateException("File does not exist: " + commandArgs.inputVcf)
98
99
100
101
102
103
104
105
106
107
108
109
110
    val writer = new PrintWriter(commandArgs.outputVariants)
    writer.println(">" + commandArgs.sampleName)
    val vcfReader = new VCFFileReader(commandArgs.inputVcf, false)
    for (vcfRecord <- vcfReader if (!commandArgs.snpsOnly || vcfRecord.isSNP)) yield {
      writer.print(getMaxAllele(vcfRecord, commandArgs.sampleName))
    }
    writer.println()
    writer.close
    vcfReader.close
  }

  def getMaxAllele(vcfRecord: VariantContext, sample: String): String = {
    val genotype = vcfRecord.getGenotype(sample)
Peter van 't Hof's avatar
Peter van 't Hof committed
111
    val maxSize = getLongestAllele(vcfRecord).getBases.length
112
113

    def fill(bases: String) = bases + (Array.fill[Char](maxSize - bases.size)('N')).mkString
114
    if (commandArgs.reference) return fill(vcfRecord.getReference.getBaseString)
115

116
    if (genotype == null) return fill("")
117
    val AD = genotype.getAD
118
    if (AD == null) return fill("")
119
120
121
122
123
    val maxADid = AD.zipWithIndex.maxBy(_._1)._2
    if (AD(maxADid) < commandArgs.minAD) return fill("")
    return fill(vcfRecord.getAlleles()(maxADid).getBaseString)
  }
}