Commit 6e3aabd0 authored by bow's avatar bow
Browse files

Merge branch 'feature-improve_fastqSplitter' into 'develop'

Feature improve fastq splitter

Fix for #97

Also improved code

See merge request !66
parents a9d60948 50feeba1
......@@ -15,14 +15,18 @@
*/
package nl.lumc.sasc.biopet.tools
import java.io.{ BufferedInputStream, File, FileInputStream, PrintWriter }
import java.util.zip.GZIPInputStream
import java.io.File
import htsjdk.samtools.fastq.{ AsyncFastqWriter, FastqReader, BasicFastqWriter }
import nl.lumc.sasc.biopet.core.BiopetJavaCommandLineFunction
import scala.io.Source
import nl.lumc.sasc.biopet.core.ToolCommand
import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
import scala.collection.JavaConversions._
/**
* Queue extension for the FastqSplitter
* @param root Parent object
*/
class FastqSplitter(val root: Configurable) extends BiopetJavaCommandLineFunction {
javaMainClass = getClass.getName
......@@ -35,10 +39,20 @@ class FastqSplitter(val root: Configurable) extends BiopetJavaCommandLineFunctio
override val defaultVmem = "8G"
memoryLimit = Option(4.0)
/**
* Generate command to execute
* @return
*/
override def commandLine = super.commandLine + required("-I", input) + repeat("-o", output)
}
object FastqSplitter extends ToolCommand {
/**
* Arg for commandline program
* @param inputFile input fastq file
* @param outputFile output fastq files
*/
case class Args(inputFile: File = null, outputFile: List[File] = Nil) extends AbstractArgs
class OptParser extends AbstractOptParser {
......@@ -51,32 +65,32 @@ object FastqSplitter extends ToolCommand {
}
/**
* Program will split fastq file in multiple fastq files
*
* @param args the command line arguments
*/
def main(args: Array[String]): Unit = {
val argsParser = new OptParser
val commandArgs: Args = argsParser.parse(args, Args()) getOrElse sys.exit(1)
val groupsize = 100
val output = for (file <- commandArgs.outputFile) yield new PrintWriter(file)
val inputStream = {
if (commandArgs.inputFile.getName.endsWith(".gz") || commandArgs.inputFile.getName.endsWith(".gzip")) Source.fromInputStream(
new GZIPInputStream(
new BufferedInputStream(
new FileInputStream(commandArgs.inputFile)))).bufferedReader
else Source.fromFile(commandArgs.inputFile).bufferedReader
}
while (inputStream.ready) {
for (writter <- output) {
for (t <- 1 to groupsize) {
for (t <- 1 to (4)) {
if (inputStream.ready) {
writter.write(inputStream.readLine + "\n")
}
}
val groupSize = 100
val output = for (file <- commandArgs.outputFile) yield new AsyncFastqWriter(new BasicFastqWriter(file), groupSize)
val reader = new FastqReader(commandArgs.inputFile)
logger.info("Starting to split fatsq file: " + commandArgs.inputFile)
logger.info("Output files: " + commandArgs.outputFile.mkString(", "))
var counter: Long = 0
while (reader.hasNext) {
for (writer <- output) {
for (t <- 1 to groupSize if reader.hasNext) {
writer.write(reader.next())
counter += 1
if (counter % 1000000 == 0) logger.info(counter + " reads processed")
}
}
}
for (writter <- output) writter.close
for (writer <- output) writer.close
logger.info("Done, " + counter + " reads processed")
}
}
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment