BedToInterval.scala 4.08 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
16
package nl.lumc.sasc.biopet.tools
17

Peter van 't Hof's avatar
Peter van 't Hof committed
18 19
import java.io.{ File, PrintWriter }

Peter van 't Hof's avatar
Peter van 't Hof committed
20
import htsjdk.samtools.{ SAMSequenceRecord, SamReaderFactory }
21
import nl.lumc.sasc.biopet.core.config.Configurable
Peter van 't Hof's avatar
Peter van 't Hof committed
22
import nl.lumc.sasc.biopet.core.{ ToolCommand, ToolCommandFuntion }
bow's avatar
bow committed
23
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
Peter van 't Hof's avatar
Peter van 't Hof committed
24

25 26
import scala.io.Source

Peter van 't Hof's avatar
Peter van 't Hof committed
27 28 29
/**
 * @deprecated Use picard.util.BedToIntervalList instead
 */
30
class BedToInterval(val root: Configurable) extends ToolCommandFuntion {
31
  javaMainClass = getClass.getName
bow's avatar
bow committed
32 33

  @Input(doc = "Input Bed file", required = true)
34
  var input: File = _
bow's avatar
bow committed
35 36 37 38 39

  @Input(doc = "Bam File", required = true)
  var bamFile: File = _

  @Output(doc = "Output interval list", required = true)
40
  var output: File = _
bow's avatar
bow committed
41

Peter van 't Hof's avatar
Peter van 't Hof committed
42
  override val defaultCoreMemory = 1.0
bow's avatar
bow committed
43

Peter van 't Hof's avatar
Peter van 't Hof committed
44
  override def commandLine = super.commandLine + required("-I", input) + required("-b", bamFile) + required("-o", output)
45 46
}

Peter van 't Hof's avatar
Peter van 't Hof committed
47 48 49
/**
 * @deprecated Use picard.util.BedToIntervalList instead
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
50
object BedToInterval extends ToolCommand {
bow's avatar
bow committed
51
  def apply(root: Configurable, inputBed: File, inputBam: File, output: File): BedToInterval = {
52 53 54 55
    val bedToInterval = new BedToInterval(root)
    bedToInterval.input = inputBed
    bedToInterval.bamFile = inputBam
    bedToInterval.output = output
Peter van 't Hof's avatar
Peter van 't Hof committed
56
    bedToInterval
57
  }
bow's avatar
bow committed
58

Peter van 't Hof's avatar
Peter van 't Hof committed
59
  case class Args(inputFile: File = null, outputFile: File = null, bamFile: File = null) extends AbstractArgs
Peter van 't Hof's avatar
Peter van 't Hof committed
60 61

  class OptParser extends AbstractOptParser {
Peter van 't Hof's avatar
Peter van 't Hof committed
62
    opt[File]('I', "inputFile") required () valueName "<file>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
63 64
      c.copy(inputFile = x)
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
65
    opt[File]('o', "output") required () valueName "<file>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
66 67
      c.copy(outputFile = x)
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
68
    opt[File]('b', "bam") required () valueName "<file>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
69 70
      c.copy(bamFile = x)
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
71
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
72

73 74 75 76
  /**
   * @param args the command line arguments
   */
  def main(args: Array[String]): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
77 78
    val argsParser = new OptParser
    val commandArgs: Args = argsParser.parse(args, Args()) getOrElse sys.exit(1)
Peter van 't Hof's avatar
Peter van 't Hof committed
79

Peter van 't Hof's avatar
Peter van 't Hof committed
80
    val writer = new PrintWriter(commandArgs.outputFile)
bow's avatar
bow committed
81

Peter van 't Hof's avatar
Peter van 't Hof committed
82
    val inputSam = SamReaderFactory.makeDefault.open(commandArgs.bamFile)
83 84
    val refs = for (SQ <- inputSam.getFileHeader.getSequenceDictionary.getSequences.toArray) yield {
      val record = SQ.asInstanceOf[SAMSequenceRecord]
85
      writer.write("@SQ\tSN:" + record.getSequenceName + "\tLN:" + record.getSequenceLength + "\n")
86
      record.getSequenceName -> record.getSequenceLength
87
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
88
    inputSam.close()
Peter van 't Hof's avatar
Peter van 't Hof committed
89 90
    val refsMap = Map(refs: _*)

Peter van 't Hof's avatar
Peter van 't Hof committed
91
    val bedFile = Source.fromFile(commandArgs.inputFile)
92
    for (
Peter van 't Hof's avatar
Peter van 't Hof committed
93
      line <- bedFile.getLines();
Peter van 't Hof's avatar
Peter van 't Hof committed
94 95 96 97
      split = line.split("\t") if split.size >= 3;
      chr = split(0);
      start = split(1);
      stop = split(2) if start forall Character.isDigit if stop forall Character.isDigit
Peter van 't Hof's avatar
Peter van 't Hof committed
98
    ) {
99
      if (!refsMap.contains(chr)) throw new IllegalStateException("Chr '" + chr + "' in bed file not found in bam file")
100 101 102 103 104
      writer.write(chr + "\t" + start + "\t" + stop + "\t")
      if (split.length >= 6 && (split(5) == "+" || split(5) == "-")) writer.write(split(5))
      else {
        var strand = "+"
        for (t <- 3 until split.length) {
Peter van 't Hof's avatar
Peter van 't Hof committed
105
          if (split(t) == "+" || split(t) == "-") strand = split(t)
106 107 108 109 110 111 112
        }
        writer.write(strand)
      }
      writer.write("\t" + chr + ":" + start + "-" + stop)
      for (t <- 3 until split.length) writer.write(":" + split(t))
      writer.write("\n")
    }
bow's avatar
bow committed
113

114 115 116
    writer.close()
  }
}