ValidateAnnotation.scala 3.72 KB
Newer Older
pjvan_thof's avatar
pjvan_thof committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
/**
  * Biopet is built on top of GATK Queue for building bioinformatic
  * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
  * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
  * should also be able to execute Biopet tools and pipelines.
  *
  * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
  *
  * Contact us at: sasc@lumc.nl
  *
  * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
  * license; For commercial users or users who do not want to follow the AGPL
  * license, please contact us to obtain a separate license.
  */
package nl.lumc.sasc.biopet.tools

import java.io.File

19
import nl.lumc.sasc.biopet.utils.annotation.Feature
20
import nl.lumc.sasc.biopet.utils.{AbstractOptParser, FastaUtils, ToolCommand}
21

pjvan_thof's avatar
pjvan_thof committed
22
23
24
25
26
27
import scala.io.Source

/**
  * Created by pjvanthof on 10/12/2016.
  */
object ValidateAnnotation extends ToolCommand {
28
  case class Args(refflatFile: Option[File] = None,
pjvan_thof's avatar
pjvan_thof committed
29
30
                  reference: File = null,
                  failOnError: Boolean = true,
31
                  gtfFiles: List[File] = Nil)
pjvan_thof's avatar
pjvan_thof committed
32

33
  class OptParser extends AbstractOptParser[Args](commandName) {
pjvan_thof's avatar
pjvan_thof committed
34
    opt[File]('r', "refflatFile") unbounded () maxOccurs 1 valueName "<file>" action { (x, c) =>
35
      c.copy(refflatFile = Some(x))
pjvan_thof's avatar
pjvan_thof committed
36
    } text "Refflat file to check"
pjvan_thof's avatar
pjvan_thof committed
37
    opt[File]('g', "gtfFile") unbounded () valueName "<file>" action { (x, c) =>
38
39
      c.copy(gtfFiles = x :: c.gtfFiles)
    } text "Gtf files to check"
pjvan_thof's avatar
pjvan_thof committed
40
41
42
    opt[File]('R', "reference") unbounded () required () maxOccurs 1 valueName "<file>" action {
      (x, c) =>
        c.copy(reference = x)
pjvan_thof's avatar
pjvan_thof committed
43
    } text "Reference fasta to check vcf file against"
pjvan_thof's avatar
pjvan_thof committed
44
    opt[Unit]("disableFail") unbounded () maxOccurs 1 valueName "<file>" action { (_, c) =>
pjvan_thof's avatar
pjvan_thof committed
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
      c.copy(failOnError = false)
    } text "Do not fail on error. The tool will still exit when encountering an error, but will do so with exit code 0"
  }

  def main(args: Array[String]): Unit = {
    logger.info("Start")

    val argsParser = new OptParser
    val cmdArgs
      : Args = argsParser.parse(args, Args()) getOrElse (throw new IllegalArgumentException)

    val dict = FastaUtils.getCachedDict(cmdArgs.reference)

    try {

60
      val refflatLines = cmdArgs.refflatFile.map(Source.fromFile(_).getLines().toList.sorted)
pjvan_thof's avatar
pjvan_thof committed
61

62
      for (line <- refflatLines.getOrElse(Nil)) {
pjvan_thof's avatar
pjvan_thof committed
63
        val contig = line.split("\t")(2)
pjvan_thof's avatar
pjvan_thof committed
64
65
        require(dict.getSequence(contig) != null,
                s"Contig '$contig' found in refflat but not found on reference")
pjvan_thof's avatar
pjvan_thof committed
66
67
      }

68
      cmdArgs.gtfFiles.distinct.foreach { file =>
69
70
71
72
73
        refflatLines match {
          case Some(lines) =>
            val tempRefflat = File.createTempFile("temp.", ".refflat")
            tempRefflat.deleteOnExit()
            GtfToRefflat.gtfToRefflat(file, tempRefflat, Some(cmdArgs.reference))
74

75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
            val tempRefflatLines = Source.fromFile(tempRefflat).getLines().toList.sorted
            for ((line1, line2) <- lines.zip(tempRefflatLines)) {
              require(line1 == line2, "Refflat and gtf contain different information")
            }
          case _ =>
            Source
              .fromFile(file)
              .getLines()
              .filter(!_.startsWith("#"))
              .map(Feature.fromLine)
              .foreach { feature =>
                require(
                  dict.getSequence(feature.contig) != null,
                  s"Contig '${feature.contig}' found in gtf/gff but not found on reference: $file")
              }
pjvan_thof's avatar
pjvan_thof committed
90
91
92
93
94
95
96
97
98
99
100
        }
      }
    } catch {
      case e: IllegalArgumentException =>
        if (cmdArgs.failOnError) throw e
        else logger.error(e.getMessage)
    }

    logger.info("Done")
  }
}