Commit 57b31904 authored by Sander Bollen's avatar Sander Bollen
Browse files

Merge branch 'feature-fastq_filter' into 'develop'

Fix for BIOPET-402



See merge request !467
parents b6382747 942ad5e7
......@@ -31,6 +31,7 @@ object BiopetToolsExecutable extends BiopetExecutable {
nl.lumc.sasc.biopet.tools.ExtractAlignedFastq,
nl.lumc.sasc.biopet.tools.FastqSplitter,
nl.lumc.sasc.biopet.tools.FastqSync,
nl.lumc.sasc.biopet.tools.FastqFilter,
nl.lumc.sasc.biopet.tools.FindRepeatsPacBio,
nl.lumc.sasc.biopet.tools.FindOverlapMatch,
nl.lumc.sasc.biopet.tools.GvcfToBed,
......
package nl.lumc.sasc.biopet.tools
import java.io.File
import htsjdk.samtools.fastq.{ AsyncFastqWriter, BasicFastqWriter, FastqReader }
import nl.lumc.sasc.biopet.utils.ToolCommand
import scala.util.matching.Regex
import scala.collection.JavaConversions._
/**
* Created by pjvan_thof on 28-10-16.
*/
object FastqFilter extends ToolCommand {
/**
* Arg for commandline program
* @param inputFile input fastq file
* @param outputFile output fastq files
*/
case class Args(inputFile: File = null,
outputFile: File = null,
idRegex: Option[Regex] = None) extends AbstractArgs
class OptParser extends AbstractOptParser {
opt[File]('I', "inputFile") required () valueName "<file>" action { (x, c) =>
c.copy(inputFile = x)
} text "Path to input file"
opt[File]('o', "output") required () unbounded () valueName "<file>" action { (x, c) =>
c.copy(outputFile = x)
} text "Path to output file"
opt[String]("idRegex") unbounded () valueName "<file>" action { (x, c) =>
c.copy(idRegex = Some(x.r))
} text "Regex to match ID"
}
def main(args: Array[String]): Unit = {
val argsParser = new OptParser
val cmdArgs: Args = argsParser.parse(args, Args()) getOrElse (throw new IllegalArgumentException)
logger.info("Start")
val reader = new FastqReader(cmdArgs.inputFile)
val writer = new AsyncFastqWriter(new BasicFastqWriter(cmdArgs.outputFile), 10000)
var total = 0
var kept = 0
for (record <- reader.iterator()) {
if (cmdArgs.idRegex.map(_.findFirstIn(record.getReadHeader.takeWhile(_ != ' ')).isDefined).getOrElse(true)) {
writer.write(record)
kept += 1
}
total += 1
if (total % 100000 == 0) logger.info(s"Total reads: $total, reads left: $kept")
}
logger.info(s"Total reads: $total, reads left: $kept")
writer.close()
reader.close()
logger.info("Done")
}
}
@r01_filter hello
A
+
H
@r03_filter
G
+
H
@r01_filter hello
A
+
H
@r02
T
+
I
@r03_filter
G
+
H
@r04
C
+
I
@r05
A
+
H
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.tools
import java.io.File
import java.nio.file.Paths
import org.scalatest.Matchers
import org.scalatest.mock.MockitoSugar
import org.scalatest.testng.TestNGSuite
import org.testng.annotations.Test
import scala.io.Source
/**
* Created by ahbbollen on 27-8-15.
*/
class FastqFilterTest extends TestNGSuite with MockitoSugar with Matchers {
import FastqFilter._
private def resourcePath(p: String): String = {
Paths.get(getClass.getResource(p).toURI).toString
}
val preFilterFastq = resourcePath("/paired01_pre_filter.fq")
val postFilterFastq = resourcePath("/paired01_post_filter.fq")
@Test
def testMain() = {
val temp = File.createTempFile("out", ".fastq")
temp.deleteOnExit()
val args = Array("-I", preFilterFastq, "-o", temp.getAbsolutePath, "--idRegex", "_filter$")
main(args)
Source.fromFile(temp).getLines().toList shouldBe Source.fromFile(postFilterFastq).getLines().toList
}
}
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment