VcfUtils.scala 3.22 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
16
package nl.lumc.sasc.biopet.utils
Peter van 't Hof's avatar
Peter van 't Hof committed
17

Sander Bollen's avatar
Sander Bollen committed
18
19
import java.util

Peter van 't Hof's avatar
Peter van 't Hof committed
20
import htsjdk.variant.variantcontext.VariantContext
Sander Bollen's avatar
Sander Bollen committed
21
import htsjdk.variant.vcf.{ VCFHeader, VCFFilterHeaderLine }
Peter van 't Hof's avatar
Peter van 't Hof committed
22

Peter van 't Hof's avatar
Peter van 't Hof committed
23
24
25
26
27
28
29
30
import scala.collection.JavaConversions._

/** Utility object for general vcf file/records functions. */
object VcfUtils {
  /**
   * Return longest allele of VariantContext.
   *
   * @param vcfRecord record to check
Peter van 't Hof's avatar
Peter van 't Hof committed
31
   * @return allele with most nucleotides
Peter van 't Hof's avatar
Peter van 't Hof committed
32
33
34
35
36
37
   */
  def getLongestAllele(vcfRecord: VariantContext) = {
    val alleles = vcfRecord.getAlleles
    val longestAlleleId = alleles.map(_.getBases.length).zipWithIndex.maxBy(_._1)._2
    alleles(longestAlleleId)
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
38

Peter van 't Hof's avatar
Peter van 't Hof committed
39
40
41
42
43
44
45
  /**
   * Method will extend a allele till a new length
   * @param bases Allele
   * @param newSize New size of allele
   * @param fillWith Char to fill gap
   * @return
   */
Peter van 't Hof's avatar
Peter van 't Hof committed
46
  def fillAllele(bases: String, newSize: Int, fillWith: Char = '-'): String = {
Peter van 't Hof's avatar
Peter van 't Hof committed
47
    bases + Array.fill[Char](newSize - bases.length)(fillWith).mkString
Peter van 't Hof's avatar
Peter van 't Hof committed
48
  }
Sander Bollen's avatar
Sander Bollen committed
49
50
51
52
53
54
55
56
57
58
59
60
61
62

  /**
   * Stands for scalaListToJavaObjectArrayList
   * Convert a scala List[Any] to a java ArrayList[Object]. This is necessary for BCF conversions
   * As scala ints and floats cannot be directly cast to java objects (they aren't objects),
   * we need to box them.
   * For items not Int, Float or Object, we assume them to be strings (TODO: sane assumption?)
   * @param array scala List[Any]
   * @return converted java ArrayList[Object]
   */
  def scalaListToJavaObjectArrayList(array: List[Any]): util.ArrayList[Object] = {
    val out = new util.ArrayList[Object]()

    array.foreach {
63
      case x: Long    => out.add(Long.box(x))
Peter van 't Hof's avatar
Peter van 't Hof committed
64
      case x: Int     => out.add(Int.box(x))
65
66
67
      case x: Char    => out.add(Char.box(x))
      case x: Byte    => out.add(Byte.box(x))
      case x: Double  => out.add(Double.box(x))
Peter van 't Hof's avatar
Peter van 't Hof committed
68
69
70
71
72
      case x: Float   => out.add(Float.box(x))
      case x: Boolean => out.add(Boolean.box(x))
      case x: String  => out.add(x)
      case x: Object  => out.add(x)
      case x          => out.add(x.toString)
Sander Bollen's avatar
Sander Bollen committed
73
74
75
76
    }
    out
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
77
  //TODO: Add genotype comparing to this function
Sander Bollen's avatar
Sander Bollen committed
78
  def identicalVariantContext(var1: VariantContext, var2: VariantContext): Boolean = {
Peter van 't Hof's avatar
Peter van 't Hof committed
79
80
81
82
    var1.getContig == var2.getContig &&
      var1.getStart == var2.getStart &&
      var1.getEnd == var2.getEnd &&
      var1.getAttributes == var2.getAttributes
Sander Bollen's avatar
Sander Bollen committed
83
  }
Sander Bollen's avatar
Sander Bollen committed
84
85
86
87
88
89
90
91
92

  /**
   * Return true if header is a block-type GVCF file
   * @param header header of Vcf file
   * @return boolean
   */
  def isBlockGVcf(header: VCFHeader): Boolean = {
    header.getMetaDataLine("GVCFBlock") != null
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
93
}