PrefixFastq.scala 3.41 KB
Newer Older
1
2
3
4
package nl.lumc.sasc.biopet.tools

import java.io.File

Peter van 't Hof's avatar
Peter van 't Hof committed
5
import nl.lumc.sasc.biopet.core.{ BiopetJavaCommandLineFunction, ToolCommand }
6
import htsjdk.samtools.fastq.{ FastqRecord, AsyncFastqWriter, FastqReader, BasicFastqWriter }
Peter van 't Hof's avatar
Peter van 't Hof committed
7
import nl.lumc.sasc.biopet.core.config.Configurable
Peter van 't Hof's avatar
Peter van 't Hof committed
8
import org.broadinstitute.gatk.utils.commandline.{ Argument, Output, Input }
9
10
11
import scala.collection.JavaConversions._

/**
Peter van 't Hof's avatar
Peter van 't Hof committed
12
13
 * Queue class for PrefixFastq tool
 *
14
15
 * Created by pjvan_thof on 1/13/15.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
16
17
18
19
20
21
class PrefixFastq(val root: Configurable) extends BiopetJavaCommandLineFunction {
  javaMainClass = getClass.getName

  @Input(doc = "Input fastq", shortName = "I", required = true)
  var inputFastq: File = _

Peter van 't Hof's avatar
Peter van 't Hof committed
22
  @Output(doc = "Output fastq", shortName = "o", required = true)
Peter van 't Hof's avatar
Peter van 't Hof committed
23
24
  var outputFastq: File = _

Peter van 't Hof's avatar
Peter van 't Hof committed
25
26
  @Argument(doc = "Prefix seq", required = true)
  var prefixSeq: String = _
Peter van 't Hof's avatar
Peter van 't Hof committed
27

Peter van 't Hof's avatar
Peter van 't Hof committed
28
29
30
31
  /**
   * Creates command to execute extension
   * @return
   */
Peter van 't Hof's avatar
Peter van 't Hof committed
32
33
34
35
36
37
  override def commandLine = super.commandLine +
    required("-i", inputFastq) +
    required("-o", outputFastq) +
    optional("-s", prefixSeq)
}

38
object PrefixFastq extends ToolCommand {
Peter van 't Hof's avatar
Peter van 't Hof committed
39
40
41
42
43
44
45
46
  /**
   * Create a PrefixFastq class object with a sufix ".prefix.fastq" in the output folder
   *
   * @param root parent object
   * @param input input file
   * @param outputDir outputFolder
   * @return PrefixFastq class object
   */
Peter van 't Hof's avatar
Peter van 't Hof committed
47
48
49
50
51
52
53
  def apply(root: Configurable, input: File, outputDir: String): PrefixFastq = {
    val prefixFastq = new PrefixFastq(root)
    prefixFastq.inputFastq = input
    prefixFastq.outputFastq = new File(outputDir, input.getName + ".prefix.fastq")
    return prefixFastq
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
54
55
56
57
58
59
  /**
   * Args for commandline program
   * @param input input fastq file (can be zipper)
   * @param output output fastq file (can be zipper)
   * @param seq Seq to prefix the reads with
   */
Peter van 't Hof's avatar
Peter van 't Hof committed
60
  case class Args(input: File = null, output: File = null, seq: String = null) extends AbstractArgs
61
62
63
64
65
66
67
68

  class OptParser extends AbstractOptParser {
    opt[File]('i', "input") required () maxOccurs (1) valueName ("<file>") action { (x, c) =>
      c.copy(input = x)
    }
    opt[File]('o', "output") required () maxOccurs (1) valueName ("<file>") action { (x, c) =>
      c.copy(output = x)
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
69
    opt[String]('s', "seq") required () maxOccurs (1) valueName ("<prefix seq>") action { (x, c) =>
70
71
72
73
74
      c.copy(seq = x)
    }
  }

  /**
Peter van 't Hof's avatar
Peter van 't Hof committed
75
76
   * Program will prefix reads with a given seq
   *
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
   * @param args the command line arguments
   */
  def main(args: Array[String]): Unit = {
    logger.info("Start")

    val argsParser = new OptParser
    val cmdArgs: Args = argsParser.parse(args, Args()) getOrElse sys.exit(1)

    val writer = new AsyncFastqWriter(new BasicFastqWriter(cmdArgs.output), 3000)
    val reader = new FastqReader(cmdArgs.input)

    var counter = 0
    while (reader.hasNext) {
      val read = reader.next()

      val maxQuality = read.getBaseQualityString.max

      val readHeader = read.getReadHeader
      val readSeq = cmdArgs.seq + read.getReadString
      val baseQualityHeader = read.getBaseQualityHeader
      val baseQuality = Array.fill(cmdArgs.seq.size)(maxQuality).mkString + read.getBaseQualityString

      writer.write(new FastqRecord(readHeader, readSeq, baseQualityHeader, baseQuality))

      counter += 1
      if (counter % 1e6 == 0) logger.info(counter + " reads processed")
    }

    if (counter % 1e6 != 0) logger.info(counter + " reads processed")
    writer.close()
    reader.close()
    logger.info("Done")
  }
}